1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s
4 define x86_mmx @stack_fold_cvtpd2pi(<2 x double> %a0) {
5 ; CHECK-LABEL: stack_fold_cvtpd2pi:
7 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
11 ; CHECK-NEXT: cvtpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
12 ; CHECK-NEXT: movq2dq %mm0, %xmm0
14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
15 %2 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) nounwind readnone
18 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
20 define <2 x double> @stack_fold_cvtpi2pd(x86_mmx %a0) {
21 ; CHECK-LABEL: stack_fold_cvtpi2pd:
23 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
27 ; CHECK-NEXT: cvtpi2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
29 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
30 %2 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0) nounwind readnone
33 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
35 define <4 x float> @stack_fold_cvtpi2ps(<4 x float> %a0, x86_mmx %a1) {
36 ; CHECK-LABEL: stack_fold_cvtpi2ps:
38 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
42 ; CHECK-NEXT: cvtpi2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
44 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
45 %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %a1) nounwind readnone
48 declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
50 define x86_mmx @stack_fold_cvtps2pi(<4 x float> %a0) {
51 ; CHECK-LABEL: stack_fold_cvtps2pi:
53 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
57 ; CHECK-NEXT: cvtps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
58 ; CHECK-NEXT: movq2dq %mm0, %xmm0
60 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
61 %2 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) nounwind readnone
64 declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone
66 define x86_mmx @stack_fold_cvttpd2pi(<2 x double> %a0) {
67 ; CHECK-LABEL: stack_fold_cvttpd2pi:
69 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
73 ; CHECK-NEXT: cvttpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
74 ; CHECK-NEXT: movq2dq %mm0, %xmm0
76 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
77 %2 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) nounwind readnone
80 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
82 define x86_mmx @stack_fold_cvttps2pi(<4 x float> %a0) {
83 ; CHECK-LABEL: stack_fold_cvttps2pi:
85 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
89 ; CHECK-NEXT: cvttps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
90 ; CHECK-NEXT: movq2dq %mm0, %xmm0
92 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
93 %2 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0) nounwind readnone
96 declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone
98 ; TODO stack_fold_movd_load
99 ; TODO stack_fold_movd_store
100 ; TODO stack_fold_movq_load
101 ; TODO stack_fold_movq_store
103 define x86_mmx @stack_fold_pabsb(x86_mmx %a0) {
104 ; CHECK-LABEL: stack_fold_pabsb:
106 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
109 ; CHECK-NEXT: #NO_APP
110 ; CHECK-NEXT: pabsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
111 ; CHECK-NEXT: movq2dq %mm0, %xmm0
113 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
114 %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %a0) nounwind readnone
117 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
119 define x86_mmx @stack_fold_pabsd(x86_mmx %a0) {
120 ; CHECK-LABEL: stack_fold_pabsd:
122 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
125 ; CHECK-NEXT: #NO_APP
126 ; CHECK-NEXT: pabsd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
127 ; CHECK-NEXT: movq2dq %mm0, %xmm0
129 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
130 %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %a0) nounwind readnone
133 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
135 define x86_mmx @stack_fold_pabsw(x86_mmx %a0) {
136 ; CHECK-LABEL: stack_fold_pabsw:
138 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
141 ; CHECK-NEXT: #NO_APP
142 ; CHECK-NEXT: pabsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
143 ; CHECK-NEXT: movq2dq %mm0, %xmm0
145 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
146 %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %a0) nounwind readnone
149 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
151 define x86_mmx @stack_fold_packssdw(x86_mmx %a, x86_mmx %b) {
152 ; CHECK-LABEL: stack_fold_packssdw:
154 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
157 ; CHECK-NEXT: #NO_APP
158 ; CHECK-NEXT: packssdw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
159 ; CHECK-NEXT: movq2dq %mm0, %xmm0
161 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
162 %2 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a, x86_mmx %b) nounwind readnone
165 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
167 define x86_mmx @stack_fold_packsswb(x86_mmx %a, x86_mmx %b) {
168 ; CHECK-LABEL: stack_fold_packsswb:
170 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
173 ; CHECK-NEXT: #NO_APP
174 ; CHECK-NEXT: packsswb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
175 ; CHECK-NEXT: movq2dq %mm0, %xmm0
177 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
178 %2 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a, x86_mmx %b) nounwind readnone
181 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
183 define x86_mmx @stack_fold_packuswb(x86_mmx %a, x86_mmx %b) {
184 ; CHECK-LABEL: stack_fold_packuswb:
186 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
189 ; CHECK-NEXT: #NO_APP
190 ; CHECK-NEXT: packuswb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
191 ; CHECK-NEXT: movq2dq %mm0, %xmm0
193 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
194 %2 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a, x86_mmx %b) nounwind readnone
197 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
199 define x86_mmx @stack_fold_paddb(x86_mmx %a, x86_mmx %b) {
200 ; CHECK-LABEL: stack_fold_paddb:
202 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
205 ; CHECK-NEXT: #NO_APP
206 ; CHECK-NEXT: paddb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
207 ; CHECK-NEXT: movq2dq %mm0, %xmm0
209 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
210 %2 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a, x86_mmx %b) nounwind readnone
213 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
215 define x86_mmx @stack_fold_paddd(x86_mmx %a, x86_mmx %b) {
216 ; CHECK-LABEL: stack_fold_paddd:
218 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
221 ; CHECK-NEXT: #NO_APP
222 ; CHECK-NEXT: paddd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
223 ; CHECK-NEXT: movq2dq %mm0, %xmm0
225 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
226 %2 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a, x86_mmx %b) nounwind readnone
229 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
231 define x86_mmx @stack_fold_paddq(x86_mmx %a, x86_mmx %b) {
232 ; CHECK-LABEL: stack_fold_paddq:
234 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
237 ; CHECK-NEXT: #NO_APP
238 ; CHECK-NEXT: paddq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
239 ; CHECK-NEXT: movq2dq %mm0, %xmm0
241 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
242 %2 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a, x86_mmx %b) nounwind readnone
245 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
247 define x86_mmx @stack_fold_paddsb(x86_mmx %a, x86_mmx %b) {
248 ; CHECK-LABEL: stack_fold_paddsb:
250 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
253 ; CHECK-NEXT: #NO_APP
254 ; CHECK-NEXT: paddsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
255 ; CHECK-NEXT: movq2dq %mm0, %xmm0
257 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
258 %2 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a, x86_mmx %b) nounwind readnone
261 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
263 define x86_mmx @stack_fold_paddsw(x86_mmx %a, x86_mmx %b) {
264 ; CHECK-LABEL: stack_fold_paddsw:
266 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
269 ; CHECK-NEXT: #NO_APP
270 ; CHECK-NEXT: paddsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
271 ; CHECK-NEXT: movq2dq %mm0, %xmm0
273 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
274 %2 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a, x86_mmx %b) nounwind readnone
277 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
279 define x86_mmx @stack_fold_paddusb(x86_mmx %a, x86_mmx %b) {
280 ; CHECK-LABEL: stack_fold_paddusb:
282 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
285 ; CHECK-NEXT: #NO_APP
286 ; CHECK-NEXT: paddusb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
287 ; CHECK-NEXT: movq2dq %mm0, %xmm0
289 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
290 %2 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a, x86_mmx %b) nounwind readnone
293 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
295 define x86_mmx @stack_fold_paddusw(x86_mmx %a, x86_mmx %b) {
296 ; CHECK-LABEL: stack_fold_paddusw:
298 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
301 ; CHECK-NEXT: #NO_APP
302 ; CHECK-NEXT: paddusw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
303 ; CHECK-NEXT: movq2dq %mm0, %xmm0
305 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
306 %2 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a, x86_mmx %b) nounwind readnone
309 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
311 define x86_mmx @stack_fold_paddw(x86_mmx %a, x86_mmx %b) {
312 ; CHECK-LABEL: stack_fold_paddw:
314 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
317 ; CHECK-NEXT: #NO_APP
318 ; CHECK-NEXT: paddw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
319 ; CHECK-NEXT: movq2dq %mm0, %xmm0
321 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
322 %2 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a, x86_mmx %b) nounwind readnone
325 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
327 define x86_mmx @stack_fold_palignr(x86_mmx %a, x86_mmx %b) {
328 ; CHECK-LABEL: stack_fold_palignr:
330 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
333 ; CHECK-NEXT: #NO_APP
334 ; CHECK-NEXT: palignr $1, {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
335 ; CHECK-NEXT: movq2dq %mm0, %xmm0
337 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
338 %2 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a, x86_mmx %b, i8 1) nounwind readnone
341 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
343 define x86_mmx @stack_fold_pand(x86_mmx %a, x86_mmx %b) {
344 ; CHECK-LABEL: stack_fold_pand:
346 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
349 ; CHECK-NEXT: #NO_APP
350 ; CHECK-NEXT: pand {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
351 ; CHECK-NEXT: movq2dq %mm0, %xmm0
353 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
354 %2 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a, x86_mmx %b) nounwind readnone
357 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
359 define x86_mmx @stack_fold_pandn(x86_mmx %a, x86_mmx %b) {
360 ; CHECK-LABEL: stack_fold_pandn:
362 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
365 ; CHECK-NEXT: #NO_APP
366 ; CHECK-NEXT: pandn {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
367 ; CHECK-NEXT: movq2dq %mm0, %xmm0
369 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
370 %2 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a, x86_mmx %b) nounwind readnone
373 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
375 define x86_mmx @stack_fold_pavgb(x86_mmx %a, x86_mmx %b) {
376 ; CHECK-LABEL: stack_fold_pavgb:
378 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
381 ; CHECK-NEXT: #NO_APP
382 ; CHECK-NEXT: pavgb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
383 ; CHECK-NEXT: movq2dq %mm0, %xmm0
385 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
386 %2 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a, x86_mmx %b) nounwind readnone
389 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
391 define x86_mmx @stack_fold_pavgw(x86_mmx %a, x86_mmx %b) {
392 ; CHECK-LABEL: stack_fold_pavgw:
394 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
397 ; CHECK-NEXT: #NO_APP
398 ; CHECK-NEXT: pavgw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
399 ; CHECK-NEXT: movq2dq %mm0, %xmm0
401 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
402 %2 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a, x86_mmx %b) nounwind readnone
405 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
407 define x86_mmx @stack_fold_pcmpeqb(x86_mmx %a, x86_mmx %b) {
408 ; CHECK-LABEL: stack_fold_pcmpeqb:
410 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
413 ; CHECK-NEXT: #NO_APP
414 ; CHECK-NEXT: pcmpeqb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
415 ; CHECK-NEXT: movq2dq %mm0, %xmm0
417 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
418 %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a, x86_mmx %b) nounwind readnone
421 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
423 define x86_mmx @stack_fold_pcmpeqd(x86_mmx %a, x86_mmx %b) {
424 ; CHECK-LABEL: stack_fold_pcmpeqd:
426 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
429 ; CHECK-NEXT: #NO_APP
430 ; CHECK-NEXT: pcmpeqd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
431 ; CHECK-NEXT: movq2dq %mm0, %xmm0
433 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
434 %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a, x86_mmx %b) nounwind readnone
437 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
439 define x86_mmx @stack_fold_pcmpeqw(x86_mmx %a, x86_mmx %b) {
440 ; CHECK-LABEL: stack_fold_pcmpeqw:
442 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
445 ; CHECK-NEXT: #NO_APP
446 ; CHECK-NEXT: pcmpeqw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
447 ; CHECK-NEXT: movq2dq %mm0, %xmm0
449 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
450 %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a, x86_mmx %b) nounwind readnone
453 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
455 define x86_mmx @stack_fold_pcmpgtb(x86_mmx %a, x86_mmx %b) {
456 ; CHECK-LABEL: stack_fold_pcmpgtb:
458 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
461 ; CHECK-NEXT: #NO_APP
462 ; CHECK-NEXT: pcmpgtb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
463 ; CHECK-NEXT: movq2dq %mm0, %xmm0
465 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
466 %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a, x86_mmx %b) nounwind readnone
469 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
471 define x86_mmx @stack_fold_pcmpgtd(x86_mmx %a, x86_mmx %b) {
472 ; CHECK-LABEL: stack_fold_pcmpgtd:
474 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
477 ; CHECK-NEXT: #NO_APP
478 ; CHECK-NEXT: pcmpgtd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
479 ; CHECK-NEXT: movq2dq %mm0, %xmm0
481 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
482 %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a, x86_mmx %b) nounwind readnone
485 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
487 define x86_mmx @stack_fold_pcmpgtw(x86_mmx %a, x86_mmx %b) {
488 ; CHECK-LABEL: stack_fold_pcmpgtw:
490 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
493 ; CHECK-NEXT: #NO_APP
494 ; CHECK-NEXT: pcmpgtw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
495 ; CHECK-NEXT: movq2dq %mm0, %xmm0
497 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
498 %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a, x86_mmx %b) nounwind readnone
501 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
503 define x86_mmx @stack_fold_phaddd(x86_mmx %a, x86_mmx %b) {
504 ; CHECK-LABEL: stack_fold_phaddd:
506 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
509 ; CHECK-NEXT: #NO_APP
510 ; CHECK-NEXT: phaddd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
511 ; CHECK-NEXT: movq2dq %mm0, %xmm0
513 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
514 %2 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a, x86_mmx %b) nounwind readnone
517 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
519 define x86_mmx @stack_fold_phaddsw(x86_mmx %a, x86_mmx %b) {
520 ; CHECK-LABEL: stack_fold_phaddsw:
522 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
525 ; CHECK-NEXT: #NO_APP
526 ; CHECK-NEXT: phaddsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
527 ; CHECK-NEXT: movq2dq %mm0, %xmm0
529 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
530 %2 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
533 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
535 define x86_mmx @stack_fold_phaddw(x86_mmx %a, x86_mmx %b) {
536 ; CHECK-LABEL: stack_fold_phaddw:
538 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
541 ; CHECK-NEXT: #NO_APP
542 ; CHECK-NEXT: phaddw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
543 ; CHECK-NEXT: movq2dq %mm0, %xmm0
545 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
546 %2 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a, x86_mmx %b) nounwind readnone
549 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
551 define x86_mmx @stack_fold_phsubd(x86_mmx %a, x86_mmx %b) {
552 ; CHECK-LABEL: stack_fold_phsubd:
554 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
557 ; CHECK-NEXT: #NO_APP
558 ; CHECK-NEXT: phsubd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
559 ; CHECK-NEXT: movq2dq %mm0, %xmm0
561 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
562 %2 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a, x86_mmx %b) nounwind readnone
565 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
567 define x86_mmx @stack_fold_phsubsw(x86_mmx %a, x86_mmx %b) {
568 ; CHECK-LABEL: stack_fold_phsubsw:
570 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
573 ; CHECK-NEXT: #NO_APP
574 ; CHECK-NEXT: phsubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
575 ; CHECK-NEXT: movq2dq %mm0, %xmm0
577 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
578 %2 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
581 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
583 define x86_mmx @stack_fold_phsubw(x86_mmx %a, x86_mmx %b) {
584 ; CHECK-LABEL: stack_fold_phsubw:
586 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
589 ; CHECK-NEXT: #NO_APP
590 ; CHECK-NEXT: phsubw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
591 ; CHECK-NEXT: movq2dq %mm0, %xmm0
593 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
594 %2 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a, x86_mmx %b) nounwind readnone
597 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
599 ; TODO stack_fold_pinsrw
601 define x86_mmx @stack_fold_pmaddubsw(x86_mmx %a, x86_mmx %b) {
602 ; CHECK-LABEL: stack_fold_pmaddubsw:
604 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
607 ; CHECK-NEXT: #NO_APP
608 ; CHECK-NEXT: pmaddubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
609 ; CHECK-NEXT: movq2dq %mm0, %xmm0
611 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
612 %2 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
615 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
617 define x86_mmx @stack_fold_pmaddwd(x86_mmx %a, x86_mmx %b) {
618 ; CHECK-LABEL: stack_fold_pmaddwd:
620 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
623 ; CHECK-NEXT: #NO_APP
624 ; CHECK-NEXT: pmaddwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
625 ; CHECK-NEXT: movq2dq %mm0, %xmm0
627 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
628 %2 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a, x86_mmx %b) nounwind readnone
631 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
633 define x86_mmx @stack_fold_pmaxsw(x86_mmx %a, x86_mmx %b) {
634 ; CHECK-LABEL: stack_fold_pmaxsw:
636 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
639 ; CHECK-NEXT: #NO_APP
640 ; CHECK-NEXT: pmaxsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
641 ; CHECK-NEXT: movq2dq %mm0, %xmm0
643 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
644 %2 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a, x86_mmx %b) nounwind readnone
647 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
649 define x86_mmx @stack_fold_pmaxub(x86_mmx %a, x86_mmx %b) {
650 ; CHECK-LABEL: stack_fold_pmaxub:
652 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
655 ; CHECK-NEXT: #NO_APP
656 ; CHECK-NEXT: pmaxub {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
657 ; CHECK-NEXT: movq2dq %mm0, %xmm0
659 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
660 %2 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a, x86_mmx %b) nounwind readnone
663 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
665 define x86_mmx @stack_fold_pminsw(x86_mmx %a, x86_mmx %b) {
666 ; CHECK-LABEL: stack_fold_pminsw:
668 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
671 ; CHECK-NEXT: #NO_APP
672 ; CHECK-NEXT: pminsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
673 ; CHECK-NEXT: movq2dq %mm0, %xmm0
675 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
676 %2 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a, x86_mmx %b) nounwind readnone
679 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
681 define x86_mmx @stack_fold_pminub(x86_mmx %a, x86_mmx %b) {
682 ; CHECK-LABEL: stack_fold_pminub:
684 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
687 ; CHECK-NEXT: #NO_APP
688 ; CHECK-NEXT: pminub {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
689 ; CHECK-NEXT: movq2dq %mm0, %xmm0
691 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
692 %2 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a, x86_mmx %b) nounwind readnone
695 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
697 define x86_mmx @stack_fold_pmulhrsw(x86_mmx %a, x86_mmx %b) {
698 ; CHECK-LABEL: stack_fold_pmulhrsw:
700 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
703 ; CHECK-NEXT: #NO_APP
704 ; CHECK-NEXT: pmulhrsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
705 ; CHECK-NEXT: movq2dq %mm0, %xmm0
707 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
708 %2 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
711 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
713 define x86_mmx @stack_fold_pmulhuw(x86_mmx %a, x86_mmx %b) {
714 ; CHECK-LABEL: stack_fold_pmulhuw:
716 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
719 ; CHECK-NEXT: #NO_APP
720 ; CHECK-NEXT: pmulhuw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
721 ; CHECK-NEXT: movq2dq %mm0, %xmm0
723 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
724 %2 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a, x86_mmx %b) nounwind readnone
727 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
729 define x86_mmx @stack_fold_pmulhw(x86_mmx %a, x86_mmx %b) {
730 ; CHECK-LABEL: stack_fold_pmulhw:
732 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
735 ; CHECK-NEXT: #NO_APP
736 ; CHECK-NEXT: pmulhw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
737 ; CHECK-NEXT: movq2dq %mm0, %xmm0
739 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
740 %2 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a, x86_mmx %b) nounwind readnone
743 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
745 define x86_mmx @stack_fold_pmullw(x86_mmx %a, x86_mmx %b) {
746 ; CHECK-LABEL: stack_fold_pmullw:
748 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
751 ; CHECK-NEXT: #NO_APP
752 ; CHECK-NEXT: pmullw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
753 ; CHECK-NEXT: movq2dq %mm0, %xmm0
755 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
756 %2 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a, x86_mmx %b) nounwind readnone
759 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
761 define x86_mmx @stack_fold_pmuludq(x86_mmx %a, x86_mmx %b) {
762 ; CHECK-LABEL: stack_fold_pmuludq:
764 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
767 ; CHECK-NEXT: #NO_APP
768 ; CHECK-NEXT: pmuludq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
769 ; CHECK-NEXT: movq2dq %mm0, %xmm0
771 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
772 %2 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a, x86_mmx %b) nounwind readnone
775 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
777 define x86_mmx @stack_fold_por(x86_mmx %a, x86_mmx %b) {
778 ; CHECK-LABEL: stack_fold_por:
780 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
783 ; CHECK-NEXT: #NO_APP
784 ; CHECK-NEXT: por {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
785 ; CHECK-NEXT: movq2dq %mm0, %xmm0
787 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
788 %2 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a, x86_mmx %b) nounwind readnone
791 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
793 define x86_mmx @stack_fold_psadbw(x86_mmx %a, x86_mmx %b) {
794 ; CHECK-LABEL: stack_fold_psadbw:
796 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
799 ; CHECK-NEXT: #NO_APP
800 ; CHECK-NEXT: psadbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
801 ; CHECK-NEXT: movq2dq %mm0, %xmm0
803 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
804 %2 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind readnone
807 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
809 define x86_mmx @stack_fold_pshufb(x86_mmx %a, x86_mmx %b) {
810 ; CHECK-LABEL: stack_fold_pshufb:
812 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
813 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
816 ; CHECK-NEXT: #NO_APP
817 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Reload
818 ; CHECK-NEXT: pshufb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
819 ; CHECK-NEXT: movq2dq %mm0, %xmm0
821 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
822 %2 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a, x86_mmx %b) nounwind readnone
825 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
827 define x86_mmx @stack_fold_pshufw(x86_mmx %a) {
828 ; CHECK-LABEL: stack_fold_pshufw:
830 ; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
833 ; CHECK-NEXT: #NO_APP
834 ; CHECK-NEXT: pshufw $1, {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
835 ; CHECK-NEXT: # mm0 = mem[1,0,0,0]
836 ; CHECK-NEXT: movq2dq %mm0, %xmm0
838 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
839 %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %a, i8 1) nounwind readnone
842 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
844 define x86_mmx @stack_fold_psignb(x86_mmx %a0, x86_mmx %a1) {
845 ; CHECK-LABEL: stack_fold_psignb:
847 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
850 ; CHECK-NEXT: #NO_APP
851 ; CHECK-NEXT: psignb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
852 ; CHECK-NEXT: movq2dq %mm0, %xmm0
854 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
855 %2 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1) nounwind readnone
858 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
860 define x86_mmx @stack_fold_psignd(x86_mmx %a0, x86_mmx %a1) {
861 ; CHECK-LABEL: stack_fold_psignd:
863 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
866 ; CHECK-NEXT: #NO_APP
867 ; CHECK-NEXT: psignd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
868 ; CHECK-NEXT: movq2dq %mm0, %xmm0
870 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
871 %2 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1) nounwind readnone
874 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
876 define x86_mmx @stack_fold_psignw(x86_mmx %a0, x86_mmx %a1) {
877 ; CHECK-LABEL: stack_fold_psignw:
879 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
882 ; CHECK-NEXT: #NO_APP
883 ; CHECK-NEXT: psignw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
884 ; CHECK-NEXT: movq2dq %mm0, %xmm0
886 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
887 %2 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1) nounwind readnone
890 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
892 define x86_mmx @stack_fold_pslld(x86_mmx %a, x86_mmx %b) {
893 ; CHECK-LABEL: stack_fold_pslld:
895 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
898 ; CHECK-NEXT: #NO_APP
899 ; CHECK-NEXT: pslld {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
900 ; CHECK-NEXT: movq2dq %mm0, %xmm0
902 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
903 %2 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a, x86_mmx %b) nounwind readnone
906 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
908 define x86_mmx @stack_fold_psllq(x86_mmx %a, x86_mmx %b) {
909 ; CHECK-LABEL: stack_fold_psllq:
911 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
914 ; CHECK-NEXT: #NO_APP
915 ; CHECK-NEXT: psllq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
916 ; CHECK-NEXT: movq2dq %mm0, %xmm0
918 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
919 %2 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a, x86_mmx %b) nounwind readnone
922 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
924 define x86_mmx @stack_fold_psllw(x86_mmx %a, x86_mmx %b) {
925 ; CHECK-LABEL: stack_fold_psllw:
927 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
930 ; CHECK-NEXT: #NO_APP
931 ; CHECK-NEXT: psllw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
932 ; CHECK-NEXT: movq2dq %mm0, %xmm0
934 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
935 %2 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a, x86_mmx %b) nounwind readnone
938 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
940 define x86_mmx @stack_fold_psrad(x86_mmx %a, x86_mmx %b) {
941 ; CHECK-LABEL: stack_fold_psrad:
943 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
946 ; CHECK-NEXT: #NO_APP
947 ; CHECK-NEXT: psrad {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
948 ; CHECK-NEXT: movq2dq %mm0, %xmm0
950 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
951 %2 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a, x86_mmx %b) nounwind readnone
954 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
956 define x86_mmx @stack_fold_psraw(x86_mmx %a, x86_mmx %b) {
957 ; CHECK-LABEL: stack_fold_psraw:
959 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
962 ; CHECK-NEXT: #NO_APP
963 ; CHECK-NEXT: psraw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
964 ; CHECK-NEXT: movq2dq %mm0, %xmm0
966 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
967 %2 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a, x86_mmx %b) nounwind readnone
970 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
972 define x86_mmx @stack_fold_psrld(x86_mmx %a, x86_mmx %b) {
973 ; CHECK-LABEL: stack_fold_psrld:
975 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
978 ; CHECK-NEXT: #NO_APP
979 ; CHECK-NEXT: psrld {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
980 ; CHECK-NEXT: movq2dq %mm0, %xmm0
982 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
983 %2 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a, x86_mmx %b) nounwind readnone
986 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
988 define x86_mmx @stack_fold_psrlq(x86_mmx %a, x86_mmx %b) {
989 ; CHECK-LABEL: stack_fold_psrlq:
991 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
994 ; CHECK-NEXT: #NO_APP
995 ; CHECK-NEXT: psrlq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
996 ; CHECK-NEXT: movq2dq %mm0, %xmm0
998 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
999 %2 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a, x86_mmx %b) nounwind readnone
1002 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
1004 define x86_mmx @stack_fold_psrlw(x86_mmx %a, x86_mmx %b) {
1005 ; CHECK-LABEL: stack_fold_psrlw:
1007 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1010 ; CHECK-NEXT: #NO_APP
1011 ; CHECK-NEXT: psrlw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1012 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1014 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1015 %2 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1018 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
1020 define x86_mmx @stack_fold_psubb(x86_mmx %a, x86_mmx %b) {
1021 ; CHECK-LABEL: stack_fold_psubb:
1023 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1026 ; CHECK-NEXT: #NO_APP
1027 ; CHECK-NEXT: psubb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1028 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1030 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1031 %2 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a, x86_mmx %b) nounwind readnone
1034 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
1036 define x86_mmx @stack_fold_psubd(x86_mmx %a, x86_mmx %b) {
1037 ; CHECK-LABEL: stack_fold_psubd:
1039 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1042 ; CHECK-NEXT: #NO_APP
1043 ; CHECK-NEXT: psubd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1044 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1046 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1047 %2 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a, x86_mmx %b) nounwind readnone
1050 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
1052 define x86_mmx @stack_fold_psubq(x86_mmx %a, x86_mmx %b) {
1053 ; CHECK-LABEL: stack_fold_psubq:
1055 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1058 ; CHECK-NEXT: #NO_APP
1059 ; CHECK-NEXT: psubq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1060 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1062 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1063 %2 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a, x86_mmx %b) nounwind readnone
1066 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
1068 define x86_mmx @stack_fold_psubsb(x86_mmx %a, x86_mmx %b) {
1069 ; CHECK-LABEL: stack_fold_psubsb:
1071 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1074 ; CHECK-NEXT: #NO_APP
1075 ; CHECK-NEXT: psubsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1076 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1078 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1079 %2 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a, x86_mmx %b) nounwind readnone
1082 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
1084 define x86_mmx @stack_fold_psubsw(x86_mmx %a, x86_mmx %b) {
1085 ; CHECK-LABEL: stack_fold_psubsw:
1087 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1090 ; CHECK-NEXT: #NO_APP
1091 ; CHECK-NEXT: psubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1092 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1094 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1095 %2 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1098 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
1100 define x86_mmx @stack_fold_psubusb(x86_mmx %a, x86_mmx %b) {
1101 ; CHECK-LABEL: stack_fold_psubusb:
1103 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1106 ; CHECK-NEXT: #NO_APP
1107 ; CHECK-NEXT: psubusb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1108 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1110 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1111 %2 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a, x86_mmx %b) nounwind readnone
1114 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
1116 define x86_mmx @stack_fold_psubusw(x86_mmx %a, x86_mmx %b) {
1117 ; CHECK-LABEL: stack_fold_psubusw:
1119 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1122 ; CHECK-NEXT: #NO_APP
1123 ; CHECK-NEXT: psubusw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1124 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1126 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1127 %2 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1130 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
1132 define x86_mmx @stack_fold_psubw(x86_mmx %a, x86_mmx %b) {
1133 ; CHECK-LABEL: stack_fold_psubw:
1135 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1138 ; CHECK-NEXT: #NO_APP
1139 ; CHECK-NEXT: psubw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1140 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1142 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1143 %2 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1146 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
1148 define x86_mmx @stack_fold_punpckhbw(x86_mmx %a, x86_mmx %b) {
1149 ; CHECK-LABEL: stack_fold_punpckhbw:
1151 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1154 ; CHECK-NEXT: #NO_APP
1155 ; CHECK-NEXT: punpckhbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1156 ; CHECK-NEXT: # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
1157 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1159 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1160 %2 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a, x86_mmx %b) nounwind readnone
1163 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
1165 define x86_mmx @stack_fold_punpckhdq(x86_mmx %a, x86_mmx %b) {
1166 ; CHECK-LABEL: stack_fold_punpckhdq:
1168 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1171 ; CHECK-NEXT: #NO_APP
1172 ; CHECK-NEXT: punpckhdq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1173 ; CHECK-NEXT: # mm0 = mm0[1],mem[1]
1174 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1176 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1177 %2 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a, x86_mmx %b) nounwind readnone
1180 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
1182 define x86_mmx @stack_fold_punpckhwd(x86_mmx %a, x86_mmx %b) {
1183 ; CHECK-LABEL: stack_fold_punpckhwd:
1185 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1188 ; CHECK-NEXT: #NO_APP
1189 ; CHECK-NEXT: punpckhwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1190 ; CHECK-NEXT: # mm0 = mm0[2],mem[2],mm0[3],mem[3]
1191 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1193 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1194 %2 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a, x86_mmx %b) nounwind readnone
1197 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
1199 define x86_mmx @stack_fold_punpcklbw(x86_mmx %a, x86_mmx %b) {
1200 ; CHECK-LABEL: stack_fold_punpcklbw:
1202 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1205 ; CHECK-NEXT: #NO_APP
1206 ; CHECK-NEXT: punpcklbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1207 ; CHECK-NEXT: # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
1208 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1210 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1211 %2 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a, x86_mmx %b) nounwind readnone
1214 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
1216 define x86_mmx @stack_fold_punpckldq(x86_mmx %a, x86_mmx %b) {
1217 ; CHECK-LABEL: stack_fold_punpckldq:
1219 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1222 ; CHECK-NEXT: #NO_APP
1223 ; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1224 ; CHECK-NEXT: # mm0 = mm0[0],mem[0]
1225 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1227 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1228 %2 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a, x86_mmx %b) nounwind readnone
1231 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
1233 define x86_mmx @stack_fold_punpcklwd(x86_mmx %a, x86_mmx %b) {
1234 ; CHECK-LABEL: stack_fold_punpcklwd:
1236 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1239 ; CHECK-NEXT: #NO_APP
1240 ; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1241 ; CHECK-NEXT: # mm0 = mm0[0],mem[0],mm0[1],mem[1]
1242 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1244 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1245 %2 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a, x86_mmx %b) nounwind readnone
1248 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
1250 define x86_mmx @stack_fold_pxor(x86_mmx %a, x86_mmx %b) {
1251 ; CHECK-LABEL: stack_fold_pxor:
1253 ; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1256 ; CHECK-NEXT: #NO_APP
1257 ; CHECK-NEXT: pxor {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1258 ; CHECK-NEXT: movq2dq %mm0, %xmm0
1260 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1261 %2 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a, x86_mmx %b) nounwind readnone
1264 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone