1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,SSE,X86-SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64,SSE,X64-SSE
5 declare void @use(i32 %arg)
6 declare void @vec_use(<4 x i32> %arg)
10 define i32 @add_const_add_const(i32 %arg) {
11 ; X86-LABEL: add_const_add_const:
13 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
14 ; X86-NEXT: addl $10, %eax
17 ; X64-LABEL: add_const_add_const:
19 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
20 ; X64-NEXT: leal 10(%rdi), %eax
27 define i32 @add_const_add_const_extrause(i32 %arg) {
28 ; X86-LABEL: add_const_add_const_extrause:
30 ; X86-NEXT: pushl %esi
31 ; X86-NEXT: .cfi_def_cfa_offset 8
32 ; X86-NEXT: .cfi_offset %esi, -8
33 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
34 ; X86-NEXT: leal 8(%esi), %eax
35 ; X86-NEXT: pushl %eax
36 ; X86-NEXT: .cfi_adjust_cfa_offset 4
38 ; X86-NEXT: addl $4, %esp
39 ; X86-NEXT: .cfi_adjust_cfa_offset -4
40 ; X86-NEXT: addl $10, %esi
41 ; X86-NEXT: movl %esi, %eax
43 ; X86-NEXT: .cfi_def_cfa_offset 4
46 ; X64-LABEL: add_const_add_const_extrause:
48 ; X64-NEXT: pushq %rbx
49 ; X64-NEXT: .cfi_def_cfa_offset 16
50 ; X64-NEXT: .cfi_offset %rbx, -16
51 ; X64-NEXT: movl %edi, %ebx
52 ; X64-NEXT: leal 8(%rbx), %edi
54 ; X64-NEXT: leal 10(%rbx), %eax
56 ; X64-NEXT: .cfi_def_cfa_offset 8
59 call void @use(i32 %t0)
64 define <4 x i32> @vec_add_const_add_const(<4 x i32> %arg) {
65 ; X86-LABEL: vec_add_const_add_const:
67 ; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
70 ; X64-LABEL: vec_add_const_add_const:
72 ; X64-NEXT: paddd {{.*}}(%rip), %xmm0
74 %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
75 %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
79 define <4 x i32> @vec_add_const_add_const_extrause(<4 x i32> %arg) {
80 ; X86-LABEL: vec_add_const_add_const_extrause:
82 ; X86-NEXT: subl $28, %esp
83 ; X86-NEXT: .cfi_def_cfa_offset 32
84 ; X86-NEXT: movdqa %xmm0, %xmm1
85 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
86 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
87 ; X86-NEXT: paddd %xmm1, %xmm0
88 ; X86-NEXT: calll vec_use
89 ; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
90 ; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
91 ; X86-NEXT: addl $28, %esp
92 ; X86-NEXT: .cfi_def_cfa_offset 4
95 ; X64-LABEL: vec_add_const_add_const_extrause:
97 ; X64-NEXT: subq $24, %rsp
98 ; X64-NEXT: .cfi_def_cfa_offset 32
99 ; X64-NEXT: movdqa %xmm0, %xmm1
100 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
101 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
102 ; X64-NEXT: paddd %xmm1, %xmm0
103 ; X64-NEXT: callq vec_use
104 ; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
105 ; X64-NEXT: paddd {{.*}}(%rip), %xmm0
106 ; X64-NEXT: addq $24, %rsp
107 ; X64-NEXT: .cfi_def_cfa_offset 8
109 %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
110 call void @vec_use(<4 x i32> %t0)
111 %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
115 define <4 x i32> @vec_add_const_add_const_nonsplat(<4 x i32> %arg) {
116 ; X86-LABEL: vec_add_const_add_const_nonsplat:
118 ; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
121 ; X64-LABEL: vec_add_const_add_const_nonsplat:
123 ; X64-NEXT: paddd {{.*}}(%rip), %xmm0
125 %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
126 %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
132 define i32 @add_const_sub_const(i32 %arg) {
133 ; X86-LABEL: add_const_sub_const:
135 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
136 ; X86-NEXT: addl $6, %eax
139 ; X64-LABEL: add_const_sub_const:
141 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
142 ; X64-NEXT: leal 6(%rdi), %eax
144 %t0 = add i32 %arg, 8
149 define i32 @add_const_sub_const_extrause(i32 %arg) {
150 ; X86-LABEL: add_const_sub_const_extrause:
152 ; X86-NEXT: pushl %esi
153 ; X86-NEXT: .cfi_def_cfa_offset 8
154 ; X86-NEXT: .cfi_offset %esi, -8
155 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
156 ; X86-NEXT: leal 8(%esi), %eax
157 ; X86-NEXT: pushl %eax
158 ; X86-NEXT: .cfi_adjust_cfa_offset 4
159 ; X86-NEXT: calll use
160 ; X86-NEXT: addl $4, %esp
161 ; X86-NEXT: .cfi_adjust_cfa_offset -4
162 ; X86-NEXT: addl $6, %esi
163 ; X86-NEXT: movl %esi, %eax
164 ; X86-NEXT: popl %esi
165 ; X86-NEXT: .cfi_def_cfa_offset 4
168 ; X64-LABEL: add_const_sub_const_extrause:
170 ; X64-NEXT: pushq %rbx
171 ; X64-NEXT: .cfi_def_cfa_offset 16
172 ; X64-NEXT: .cfi_offset %rbx, -16
173 ; X64-NEXT: movl %edi, %ebx
174 ; X64-NEXT: leal 8(%rbx), %edi
175 ; X64-NEXT: callq use
176 ; X64-NEXT: leal 6(%rbx), %eax
177 ; X64-NEXT: popq %rbx
178 ; X64-NEXT: .cfi_def_cfa_offset 8
180 %t0 = add i32 %arg, 8
181 call void @use(i32 %t0)
186 define <4 x i32> @vec_add_const_sub_const(<4 x i32> %arg) {
187 ; X86-LABEL: vec_add_const_sub_const:
189 ; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
192 ; X64-LABEL: vec_add_const_sub_const:
194 ; X64-NEXT: paddd {{.*}}(%rip), %xmm0
196 %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
197 %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
201 define <4 x i32> @vec_add_const_sub_const_extrause(<4 x i32> %arg) {
202 ; X86-LABEL: vec_add_const_sub_const_extrause:
204 ; X86-NEXT: subl $28, %esp
205 ; X86-NEXT: .cfi_def_cfa_offset 32
206 ; X86-NEXT: movdqa %xmm0, %xmm1
207 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
208 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
209 ; X86-NEXT: paddd %xmm1, %xmm0
210 ; X86-NEXT: calll vec_use
211 ; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
212 ; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
213 ; X86-NEXT: addl $28, %esp
214 ; X86-NEXT: .cfi_def_cfa_offset 4
217 ; X64-LABEL: vec_add_const_sub_const_extrause:
219 ; X64-NEXT: subq $24, %rsp
220 ; X64-NEXT: .cfi_def_cfa_offset 32
221 ; X64-NEXT: movdqa %xmm0, %xmm1
222 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
223 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
224 ; X64-NEXT: paddd %xmm1, %xmm0
225 ; X64-NEXT: callq vec_use
226 ; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
227 ; X64-NEXT: paddd {{.*}}(%rip), %xmm0
228 ; X64-NEXT: addq $24, %rsp
229 ; X64-NEXT: .cfi_def_cfa_offset 8
231 %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
232 call void @vec_use(<4 x i32> %t0)
233 %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
237 define <4 x i32> @vec_add_const_sub_const_nonsplat(<4 x i32> %arg) {
238 ; X86-LABEL: vec_add_const_sub_const_nonsplat:
240 ; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
243 ; X64-LABEL: vec_add_const_sub_const_nonsplat:
245 ; X64-NEXT: paddd {{.*}}(%rip), %xmm0
247 %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
248 %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
254 define i32 @add_const_const_sub(i32 %arg) {
255 ; X86-LABEL: add_const_const_sub:
257 ; X86-NEXT: movl $-6, %eax
258 ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
261 ; X64-LABEL: add_const_const_sub:
263 ; X64-NEXT: movl $-6, %eax
264 ; X64-NEXT: subl %edi, %eax
266 %t0 = add i32 %arg, 8
271 define i32 @add_const_const_sub_extrause(i32 %arg) {
272 ; X86-LABEL: add_const_const_sub_extrause:
274 ; X86-NEXT: pushl %esi
275 ; X86-NEXT: .cfi_def_cfa_offset 8
276 ; X86-NEXT: .cfi_offset %esi, -8
277 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
278 ; X86-NEXT: leal 8(%esi), %eax
279 ; X86-NEXT: pushl %eax
280 ; X86-NEXT: .cfi_adjust_cfa_offset 4
281 ; X86-NEXT: calll use
282 ; X86-NEXT: addl $4, %esp
283 ; X86-NEXT: .cfi_adjust_cfa_offset -4
284 ; X86-NEXT: movl $-6, %eax
285 ; X86-NEXT: subl %esi, %eax
286 ; X86-NEXT: popl %esi
287 ; X86-NEXT: .cfi_def_cfa_offset 4
290 ; X64-LABEL: add_const_const_sub_extrause:
292 ; X64-NEXT: pushq %rbx
293 ; X64-NEXT: .cfi_def_cfa_offset 16
294 ; X64-NEXT: .cfi_offset %rbx, -16
295 ; X64-NEXT: movl %edi, %ebx
296 ; X64-NEXT: leal 8(%rbx), %edi
297 ; X64-NEXT: callq use
298 ; X64-NEXT: movl $-6, %eax
299 ; X64-NEXT: subl %ebx, %eax
300 ; X64-NEXT: popq %rbx
301 ; X64-NEXT: .cfi_def_cfa_offset 8
303 %t0 = add i32 %arg, 8
304 call void @use(i32 %t0)
309 define <4 x i32> @vec_add_const_const_sub(<4 x i32> %arg) {
310 ; X86-LABEL: vec_add_const_const_sub:
312 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967290,4294967290,4294967290,4294967290]
313 ; X86-NEXT: psubd %xmm0, %xmm1
314 ; X86-NEXT: movdqa %xmm1, %xmm0
317 ; X64-LABEL: vec_add_const_const_sub:
319 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [4294967290,4294967290,4294967290,4294967290]
320 ; X64-NEXT: psubd %xmm0, %xmm1
321 ; X64-NEXT: movdqa %xmm1, %xmm0
323 %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
324 %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
328 define <4 x i32> @vec_add_const_const_sub_extrause(<4 x i32> %arg) {
329 ; X86-LABEL: vec_add_const_const_sub_extrause:
331 ; X86-NEXT: subl $28, %esp
332 ; X86-NEXT: .cfi_def_cfa_offset 32
333 ; X86-NEXT: movdqa %xmm0, %xmm1
334 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
335 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
336 ; X86-NEXT: paddd %xmm1, %xmm0
337 ; X86-NEXT: calll vec_use
338 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [4294967290,4294967290,4294967290,4294967290]
339 ; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
340 ; X86-NEXT: psubd %xmm1, %xmm0
341 ; X86-NEXT: addl $28, %esp
342 ; X86-NEXT: .cfi_def_cfa_offset 4
345 ; X64-LABEL: vec_add_const_const_sub_extrause:
347 ; X64-NEXT: subq $24, %rsp
348 ; X64-NEXT: .cfi_def_cfa_offset 32
349 ; X64-NEXT: movdqa %xmm0, %xmm1
350 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
351 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
352 ; X64-NEXT: paddd %xmm1, %xmm0
353 ; X64-NEXT: callq vec_use
354 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [4294967290,4294967290,4294967290,4294967290]
355 ; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
356 ; X64-NEXT: addq $24, %rsp
357 ; X64-NEXT: .cfi_def_cfa_offset 8
359 %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
360 call void @vec_use(<4 x i32> %t0)
361 %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
365 define <4 x i32> @vec_add_const_const_sub_nonsplat(<4 x i32> %arg) {
366 ; X86-LABEL: vec_add_const_const_sub_nonsplat:
368 ; X86-NEXT: movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
369 ; X86-NEXT: psubd %xmm0, %xmm1
370 ; X86-NEXT: movdqa %xmm1, %xmm0
373 ; X64-LABEL: vec_add_const_const_sub_nonsplat:
375 ; X64-NEXT: movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
376 ; X64-NEXT: psubd %xmm0, %xmm1
377 ; X64-NEXT: movdqa %xmm1, %xmm0
379 %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
380 %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
386 define i32 @sub_const_add_const(i32 %arg) {
387 ; X86-LABEL: sub_const_add_const:
389 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
390 ; X86-NEXT: addl $-6, %eax
393 ; X64-LABEL: sub_const_add_const:
395 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
396 ; X64-NEXT: leal -6(%rdi), %eax
398 %t0 = sub i32 %arg, 8
403 define i32 @sub_const_add_const_extrause(i32 %arg) {
404 ; X86-LABEL: sub_const_add_const_extrause:
406 ; X86-NEXT: pushl %esi
407 ; X86-NEXT: .cfi_def_cfa_offset 8
408 ; X86-NEXT: .cfi_offset %esi, -8
409 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
410 ; X86-NEXT: leal -8(%esi), %eax
411 ; X86-NEXT: pushl %eax
412 ; X86-NEXT: .cfi_adjust_cfa_offset 4
413 ; X86-NEXT: calll use
414 ; X86-NEXT: addl $4, %esp
415 ; X86-NEXT: .cfi_adjust_cfa_offset -4
416 ; X86-NEXT: addl $-6, %esi
417 ; X86-NEXT: movl %esi, %eax
418 ; X86-NEXT: popl %esi
419 ; X86-NEXT: .cfi_def_cfa_offset 4
422 ; X64-LABEL: sub_const_add_const_extrause:
424 ; X64-NEXT: pushq %rbx
425 ; X64-NEXT: .cfi_def_cfa_offset 16
426 ; X64-NEXT: .cfi_offset %rbx, -16
427 ; X64-NEXT: movl %edi, %ebx
428 ; X64-NEXT: leal -8(%rbx), %edi
429 ; X64-NEXT: callq use
430 ; X64-NEXT: leal -6(%rbx), %eax
431 ; X64-NEXT: popq %rbx
432 ; X64-NEXT: .cfi_def_cfa_offset 8
434 %t0 = sub i32 %arg, 8
435 call void @use(i32 %t0)
440 define <4 x i32> @vec_sub_const_add_const(<4 x i32> %arg) {
441 ; X86-LABEL: vec_sub_const_add_const:
443 ; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
446 ; X64-LABEL: vec_sub_const_add_const:
448 ; X64-NEXT: paddd {{.*}}(%rip), %xmm0
450 %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
451 %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
455 define <4 x i32> @vec_sub_const_add_const_extrause(<4 x i32> %arg) {
456 ; X86-LABEL: vec_sub_const_add_const_extrause:
458 ; X86-NEXT: subl $28, %esp
459 ; X86-NEXT: .cfi_def_cfa_offset 32
460 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
461 ; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
462 ; X86-NEXT: calll vec_use
463 ; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
464 ; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
465 ; X86-NEXT: addl $28, %esp
466 ; X86-NEXT: .cfi_def_cfa_offset 4
469 ; X64-LABEL: vec_sub_const_add_const_extrause:
471 ; X64-NEXT: subq $24, %rsp
472 ; X64-NEXT: .cfi_def_cfa_offset 32
473 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
474 ; X64-NEXT: psubd {{.*}}(%rip), %xmm0
475 ; X64-NEXT: callq vec_use
476 ; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
477 ; X64-NEXT: paddd {{.*}}(%rip), %xmm0
478 ; X64-NEXT: addq $24, %rsp
479 ; X64-NEXT: .cfi_def_cfa_offset 8
481 %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
482 call void @vec_use(<4 x i32> %t0)
483 %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
487 define <4 x i32> @vec_sub_const_add_const_nonsplat(<4 x i32> %arg) {
488 ; X86-LABEL: vec_sub_const_add_const_nonsplat:
490 ; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
493 ; X64-LABEL: vec_sub_const_add_const_nonsplat:
495 ; X64-NEXT: paddd {{.*}}(%rip), %xmm0
497 %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
498 %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
504 define i32 @sub_const_sub_const(i32 %arg) {
505 ; X86-LABEL: sub_const_sub_const:
507 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
508 ; X86-NEXT: addl $-10, %eax
511 ; X64-LABEL: sub_const_sub_const:
513 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
514 ; X64-NEXT: leal -10(%rdi), %eax
516 %t0 = sub i32 %arg, 8
521 define i32 @sub_const_sub_const_extrause(i32 %arg) {
522 ; X86-LABEL: sub_const_sub_const_extrause:
524 ; X86-NEXT: pushl %esi
525 ; X86-NEXT: .cfi_def_cfa_offset 8
526 ; X86-NEXT: .cfi_offset %esi, -8
527 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
528 ; X86-NEXT: leal -8(%esi), %eax
529 ; X86-NEXT: pushl %eax
530 ; X86-NEXT: .cfi_adjust_cfa_offset 4
531 ; X86-NEXT: calll use
532 ; X86-NEXT: addl $4, %esp
533 ; X86-NEXT: .cfi_adjust_cfa_offset -4
534 ; X86-NEXT: addl $-10, %esi
535 ; X86-NEXT: movl %esi, %eax
536 ; X86-NEXT: popl %esi
537 ; X86-NEXT: .cfi_def_cfa_offset 4
540 ; X64-LABEL: sub_const_sub_const_extrause:
542 ; X64-NEXT: pushq %rbx
543 ; X64-NEXT: .cfi_def_cfa_offset 16
544 ; X64-NEXT: .cfi_offset %rbx, -16
545 ; X64-NEXT: movl %edi, %ebx
546 ; X64-NEXT: leal -8(%rbx), %edi
547 ; X64-NEXT: callq use
548 ; X64-NEXT: leal -10(%rbx), %eax
549 ; X64-NEXT: popq %rbx
550 ; X64-NEXT: .cfi_def_cfa_offset 8
552 %t0 = sub i32 %arg, 8
553 call void @use(i32 %t0)
558 define <4 x i32> @vec_sub_const_sub_const(<4 x i32> %arg) {
559 ; X86-LABEL: vec_sub_const_sub_const:
561 ; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
564 ; X64-LABEL: vec_sub_const_sub_const:
566 ; X64-NEXT: psubd {{.*}}(%rip), %xmm0
568 %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
569 %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
573 define <4 x i32> @vec_sub_const_sub_const_extrause(<4 x i32> %arg) {
574 ; X86-LABEL: vec_sub_const_sub_const_extrause:
576 ; X86-NEXT: subl $28, %esp
577 ; X86-NEXT: .cfi_def_cfa_offset 32
578 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
579 ; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
580 ; X86-NEXT: calll vec_use
581 ; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
582 ; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
583 ; X86-NEXT: addl $28, %esp
584 ; X86-NEXT: .cfi_def_cfa_offset 4
587 ; X64-LABEL: vec_sub_const_sub_const_extrause:
589 ; X64-NEXT: subq $24, %rsp
590 ; X64-NEXT: .cfi_def_cfa_offset 32
591 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
592 ; X64-NEXT: psubd {{.*}}(%rip), %xmm0
593 ; X64-NEXT: callq vec_use
594 ; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
595 ; X64-NEXT: psubd {{.*}}(%rip), %xmm0
596 ; X64-NEXT: addq $24, %rsp
597 ; X64-NEXT: .cfi_def_cfa_offset 8
599 %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
600 call void @vec_use(<4 x i32> %t0)
601 %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
605 define <4 x i32> @vec_sub_const_sub_const_nonsplat(<4 x i32> %arg) {
606 ; X86-LABEL: vec_sub_const_sub_const_nonsplat:
608 ; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
611 ; X64-LABEL: vec_sub_const_sub_const_nonsplat:
613 ; X64-NEXT: psubd {{.*}}(%rip), %xmm0
615 %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
616 %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
622 define i32 @sub_const_const_sub(i32 %arg) {
623 ; X86-LABEL: sub_const_const_sub:
625 ; X86-NEXT: movl $10, %eax
626 ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
629 ; X64-LABEL: sub_const_const_sub:
631 ; X64-NEXT: movl $10, %eax
632 ; X64-NEXT: subl %edi, %eax
634 %t0 = sub i32 %arg, 8
639 define i32 @sub_const_const_sub_extrause(i32 %arg) {
640 ; X86-LABEL: sub_const_const_sub_extrause:
642 ; X86-NEXT: pushl %esi
643 ; X86-NEXT: .cfi_def_cfa_offset 8
644 ; X86-NEXT: .cfi_offset %esi, -8
645 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
646 ; X86-NEXT: leal -8(%esi), %eax
647 ; X86-NEXT: pushl %eax
648 ; X86-NEXT: .cfi_adjust_cfa_offset 4
649 ; X86-NEXT: calll use
650 ; X86-NEXT: addl $4, %esp
651 ; X86-NEXT: .cfi_adjust_cfa_offset -4
652 ; X86-NEXT: movl $10, %eax
653 ; X86-NEXT: subl %esi, %eax
654 ; X86-NEXT: popl %esi
655 ; X86-NEXT: .cfi_def_cfa_offset 4
658 ; X64-LABEL: sub_const_const_sub_extrause:
660 ; X64-NEXT: pushq %rbx
661 ; X64-NEXT: .cfi_def_cfa_offset 16
662 ; X64-NEXT: .cfi_offset %rbx, -16
663 ; X64-NEXT: movl %edi, %ebx
664 ; X64-NEXT: leal -8(%rbx), %edi
665 ; X64-NEXT: callq use
666 ; X64-NEXT: movl $10, %eax
667 ; X64-NEXT: subl %ebx, %eax
668 ; X64-NEXT: popq %rbx
669 ; X64-NEXT: .cfi_def_cfa_offset 8
671 %t0 = sub i32 %arg, 8
672 call void @use(i32 %t0)
677 define <4 x i32> @vec_sub_const_const_sub(<4 x i32> %arg) {
678 ; X86-LABEL: vec_sub_const_const_sub:
680 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
681 ; X86-NEXT: psubd %xmm0, %xmm1
682 ; X86-NEXT: movdqa %xmm1, %xmm0
685 ; X64-LABEL: vec_sub_const_const_sub:
687 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
688 ; X64-NEXT: psubd %xmm0, %xmm1
689 ; X64-NEXT: movdqa %xmm1, %xmm0
691 %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
692 %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
696 define <4 x i32> @vec_sub_const_const_sub_extrause(<4 x i32> %arg) {
697 ; X86-LABEL: vec_sub_const_const_sub_extrause:
699 ; X86-NEXT: subl $28, %esp
700 ; X86-NEXT: .cfi_def_cfa_offset 32
701 ; X86-NEXT: psubd {{\.LCPI.*}}, %xmm0
702 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
703 ; X86-NEXT: calll vec_use
704 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
705 ; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
706 ; X86-NEXT: psubd %xmm1, %xmm0
707 ; X86-NEXT: addl $28, %esp
708 ; X86-NEXT: .cfi_def_cfa_offset 4
711 ; X64-LABEL: vec_sub_const_const_sub_extrause:
713 ; X64-NEXT: subq $24, %rsp
714 ; X64-NEXT: .cfi_def_cfa_offset 32
715 ; X64-NEXT: psubd {{.*}}(%rip), %xmm0
716 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
717 ; X64-NEXT: callq vec_use
718 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
719 ; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
720 ; X64-NEXT: addq $24, %rsp
721 ; X64-NEXT: .cfi_def_cfa_offset 8
723 %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
724 call void @vec_use(<4 x i32> %t0)
725 %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
729 define <4 x i32> @vec_sub_const_const_sub_nonsplat(<4 x i32> %arg) {
730 ; X86-LABEL: vec_sub_const_const_sub_nonsplat:
732 ; X86-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
733 ; X86-NEXT: psubd %xmm0, %xmm1
734 ; X86-NEXT: movdqa %xmm1, %xmm0
737 ; X64-LABEL: vec_sub_const_const_sub_nonsplat:
739 ; X64-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
740 ; X64-NEXT: psubd %xmm0, %xmm1
741 ; X64-NEXT: movdqa %xmm1, %xmm0
743 %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
744 %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
750 define i32 @const_sub_add_const(i32 %arg) {
751 ; X86-LABEL: const_sub_add_const:
753 ; X86-NEXT: movl $10, %eax
754 ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
757 ; X64-LABEL: const_sub_add_const:
759 ; X64-NEXT: movl $10, %eax
760 ; X64-NEXT: subl %edi, %eax
762 %t0 = sub i32 8, %arg
767 define i32 @const_sub_add_const_extrause(i32 %arg) {
768 ; X86-LABEL: const_sub_add_const_extrause:
770 ; X86-NEXT: pushl %esi
771 ; X86-NEXT: .cfi_def_cfa_offset 8
772 ; X86-NEXT: .cfi_offset %esi, -8
773 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
774 ; X86-NEXT: movl $8, %eax
775 ; X86-NEXT: subl %esi, %eax
776 ; X86-NEXT: pushl %eax
777 ; X86-NEXT: .cfi_adjust_cfa_offset 4
778 ; X86-NEXT: calll use
779 ; X86-NEXT: addl $4, %esp
780 ; X86-NEXT: .cfi_adjust_cfa_offset -4
781 ; X86-NEXT: movl $10, %eax
782 ; X86-NEXT: subl %esi, %eax
783 ; X86-NEXT: popl %esi
784 ; X86-NEXT: .cfi_def_cfa_offset 4
787 ; X64-LABEL: const_sub_add_const_extrause:
789 ; X64-NEXT: pushq %rbx
790 ; X64-NEXT: .cfi_def_cfa_offset 16
791 ; X64-NEXT: .cfi_offset %rbx, -16
792 ; X64-NEXT: movl %edi, %ebx
793 ; X64-NEXT: movl $8, %edi
794 ; X64-NEXT: subl %ebx, %edi
795 ; X64-NEXT: callq use
796 ; X64-NEXT: movl $10, %eax
797 ; X64-NEXT: subl %ebx, %eax
798 ; X64-NEXT: popq %rbx
799 ; X64-NEXT: .cfi_def_cfa_offset 8
801 %t0 = sub i32 8, %arg
802 call void @use(i32 %t0)
807 define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) {
808 ; X86-LABEL: vec_const_sub_add_const:
810 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
811 ; X86-NEXT: psubd %xmm0, %xmm1
812 ; X86-NEXT: movdqa %xmm1, %xmm0
815 ; X64-LABEL: vec_const_sub_add_const:
817 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
818 ; X64-NEXT: psubd %xmm0, %xmm1
819 ; X64-NEXT: movdqa %xmm1, %xmm0
821 %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
822 %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
826 define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) {
827 ; X86-LABEL: vec_const_sub_add_const_extrause:
829 ; X86-NEXT: subl $28, %esp
830 ; X86-NEXT: .cfi_def_cfa_offset 32
831 ; X86-NEXT: movdqa %xmm0, %xmm1
832 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
833 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
834 ; X86-NEXT: psubd %xmm1, %xmm0
835 ; X86-NEXT: calll vec_use
836 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
837 ; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
838 ; X86-NEXT: psubd %xmm1, %xmm0
839 ; X86-NEXT: addl $28, %esp
840 ; X86-NEXT: .cfi_def_cfa_offset 4
843 ; X64-LABEL: vec_const_sub_add_const_extrause:
845 ; X64-NEXT: subq $24, %rsp
846 ; X64-NEXT: .cfi_def_cfa_offset 32
847 ; X64-NEXT: movdqa %xmm0, %xmm1
848 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
849 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
850 ; X64-NEXT: psubd %xmm1, %xmm0
851 ; X64-NEXT: callq vec_use
852 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
853 ; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
854 ; X64-NEXT: addq $24, %rsp
855 ; X64-NEXT: .cfi_def_cfa_offset 8
857 %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
858 call void @vec_use(<4 x i32> %t0)
859 %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
863 define <4 x i32> @vec_const_sub_add_const_nonsplat(<4 x i32> %arg) {
864 ; X86-LABEL: vec_const_sub_add_const_nonsplat:
866 ; X86-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
867 ; X86-NEXT: psubd %xmm0, %xmm1
868 ; X86-NEXT: movdqa %xmm1, %xmm0
871 ; X64-LABEL: vec_const_sub_add_const_nonsplat:
873 ; X64-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
874 ; X64-NEXT: psubd %xmm0, %xmm1
875 ; X64-NEXT: movdqa %xmm1, %xmm0
877 %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
878 %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
884 define i32 @const_sub_sub_const(i32 %arg) {
885 ; X86-LABEL: const_sub_sub_const:
887 ; X86-NEXT: movl $6, %eax
888 ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
891 ; X64-LABEL: const_sub_sub_const:
893 ; X64-NEXT: movl $6, %eax
894 ; X64-NEXT: subl %edi, %eax
896 %t0 = sub i32 8, %arg
901 define i32 @const_sub_sub_const_extrause(i32 %arg) {
902 ; X86-LABEL: const_sub_sub_const_extrause:
904 ; X86-NEXT: pushl %esi
905 ; X86-NEXT: .cfi_def_cfa_offset 8
906 ; X86-NEXT: .cfi_offset %esi, -8
907 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
908 ; X86-NEXT: movl $8, %eax
909 ; X86-NEXT: subl %esi, %eax
910 ; X86-NEXT: pushl %eax
911 ; X86-NEXT: .cfi_adjust_cfa_offset 4
912 ; X86-NEXT: calll use
913 ; X86-NEXT: addl $4, %esp
914 ; X86-NEXT: .cfi_adjust_cfa_offset -4
915 ; X86-NEXT: movl $6, %eax
916 ; X86-NEXT: subl %esi, %eax
917 ; X86-NEXT: popl %esi
918 ; X86-NEXT: .cfi_def_cfa_offset 4
921 ; X64-LABEL: const_sub_sub_const_extrause:
923 ; X64-NEXT: pushq %rbx
924 ; X64-NEXT: .cfi_def_cfa_offset 16
925 ; X64-NEXT: .cfi_offset %rbx, -16
926 ; X64-NEXT: movl %edi, %ebx
927 ; X64-NEXT: movl $8, %edi
928 ; X64-NEXT: subl %ebx, %edi
929 ; X64-NEXT: callq use
930 ; X64-NEXT: movl $6, %eax
931 ; X64-NEXT: subl %ebx, %eax
932 ; X64-NEXT: popq %rbx
933 ; X64-NEXT: .cfi_def_cfa_offset 8
935 %t0 = sub i32 8, %arg
936 call void @use(i32 %t0)
941 define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) {
942 ; X86-LABEL: vec_const_sub_sub_const:
944 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6]
945 ; X86-NEXT: psubd %xmm0, %xmm1
946 ; X86-NEXT: movdqa %xmm1, %xmm0
949 ; X64-LABEL: vec_const_sub_sub_const:
951 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6]
952 ; X64-NEXT: psubd %xmm0, %xmm1
953 ; X64-NEXT: movdqa %xmm1, %xmm0
955 %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
956 %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
960 define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) {
961 ; X86-LABEL: vec_const_sub_sub_const_extrause:
963 ; X86-NEXT: subl $28, %esp
964 ; X86-NEXT: .cfi_def_cfa_offset 32
965 ; X86-NEXT: movdqa %xmm0, %xmm1
966 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
967 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
968 ; X86-NEXT: psubd %xmm1, %xmm0
969 ; X86-NEXT: calll vec_use
970 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
971 ; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
972 ; X86-NEXT: psubd %xmm1, %xmm0
973 ; X86-NEXT: addl $28, %esp
974 ; X86-NEXT: .cfi_def_cfa_offset 4
977 ; X64-LABEL: vec_const_sub_sub_const_extrause:
979 ; X64-NEXT: subq $24, %rsp
980 ; X64-NEXT: .cfi_def_cfa_offset 32
981 ; X64-NEXT: movdqa %xmm0, %xmm1
982 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
983 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
984 ; X64-NEXT: psubd %xmm1, %xmm0
985 ; X64-NEXT: callq vec_use
986 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
987 ; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
988 ; X64-NEXT: addq $24, %rsp
989 ; X64-NEXT: .cfi_def_cfa_offset 8
991 %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
992 call void @vec_use(<4 x i32> %t0)
993 %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
997 define <4 x i32> @vec_const_sub_sub_const_nonsplat(<4 x i32> %arg) {
998 ; X86-LABEL: vec_const_sub_sub_const_nonsplat:
1000 ; X86-NEXT: movdqa {{.*#+}} xmm1 = <19,u,u,6>
1001 ; X86-NEXT: psubd %xmm0, %xmm1
1002 ; X86-NEXT: movdqa %xmm1, %xmm0
1005 ; X64-LABEL: vec_const_sub_sub_const_nonsplat:
1007 ; X64-NEXT: movdqa {{.*#+}} xmm1 = <19,u,u,6>
1008 ; X64-NEXT: psubd %xmm0, %xmm1
1009 ; X64-NEXT: movdqa %xmm1, %xmm0
1011 %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
1012 %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
1018 define i32 @const_sub_const_sub(i32 %arg) {
1019 ; X86-LABEL: const_sub_const_sub:
1021 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1022 ; X86-NEXT: addl $-6, %eax
1025 ; X64-LABEL: const_sub_const_sub:
1027 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
1028 ; X64-NEXT: leal -6(%rdi), %eax
1030 %t0 = sub i32 8, %arg
1031 %t1 = sub i32 2, %t0
1035 define i32 @const_sub_const_sub_extrause(i32 %arg) {
1036 ; X86-LABEL: const_sub_const_sub_extrause:
1038 ; X86-NEXT: pushl %esi
1039 ; X86-NEXT: .cfi_def_cfa_offset 8
1040 ; X86-NEXT: .cfi_offset %esi, -8
1041 ; X86-NEXT: movl $8, %esi
1042 ; X86-NEXT: subl {{[0-9]+}}(%esp), %esi
1043 ; X86-NEXT: pushl %esi
1044 ; X86-NEXT: .cfi_adjust_cfa_offset 4
1045 ; X86-NEXT: calll use
1046 ; X86-NEXT: addl $4, %esp
1047 ; X86-NEXT: .cfi_adjust_cfa_offset -4
1048 ; X86-NEXT: movl $2, %eax
1049 ; X86-NEXT: subl %esi, %eax
1050 ; X86-NEXT: popl %esi
1051 ; X86-NEXT: .cfi_def_cfa_offset 4
1054 ; X64-LABEL: const_sub_const_sub_extrause:
1056 ; X64-NEXT: pushq %rbx
1057 ; X64-NEXT: .cfi_def_cfa_offset 16
1058 ; X64-NEXT: .cfi_offset %rbx, -16
1059 ; X64-NEXT: movl $8, %ebx
1060 ; X64-NEXT: subl %edi, %ebx
1061 ; X64-NEXT: movl %ebx, %edi
1062 ; X64-NEXT: callq use
1063 ; X64-NEXT: movl $2, %eax
1064 ; X64-NEXT: subl %ebx, %eax
1065 ; X64-NEXT: popq %rbx
1066 ; X64-NEXT: .cfi_def_cfa_offset 8
1068 %t0 = sub i32 8, %arg
1069 call void @use(i32 %t0)
1070 %t1 = sub i32 2, %t0
1074 define <4 x i32> @vec_const_sub_const_sub(<4 x i32> %arg) {
1075 ; X86-LABEL: vec_const_sub_const_sub:
1077 ; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
1080 ; X64-LABEL: vec_const_sub_const_sub:
1082 ; X64-NEXT: paddd {{.*}}(%rip), %xmm0
1084 %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
1085 %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
1089 define <4 x i32> @vec_const_sub_const_sub_extrause(<4 x i32> %arg) {
1090 ; X86-LABEL: vec_const_sub_const_sub_extrause:
1092 ; X86-NEXT: subl $28, %esp
1093 ; X86-NEXT: .cfi_def_cfa_offset 32
1094 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8]
1095 ; X86-NEXT: psubd %xmm0, %xmm1
1096 ; X86-NEXT: movdqu %xmm1, (%esp) # 16-byte Spill
1097 ; X86-NEXT: movdqa %xmm1, %xmm0
1098 ; X86-NEXT: calll vec_use
1099 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
1100 ; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
1101 ; X86-NEXT: psubd %xmm1, %xmm0
1102 ; X86-NEXT: addl $28, %esp
1103 ; X86-NEXT: .cfi_def_cfa_offset 4
1106 ; X64-LABEL: vec_const_sub_const_sub_extrause:
1108 ; X64-NEXT: subq $24, %rsp
1109 ; X64-NEXT: .cfi_def_cfa_offset 32
1110 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8]
1111 ; X64-NEXT: psubd %xmm0, %xmm1
1112 ; X64-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
1113 ; X64-NEXT: movdqa %xmm1, %xmm0
1114 ; X64-NEXT: callq vec_use
1115 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
1116 ; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
1117 ; X64-NEXT: addq $24, %rsp
1118 ; X64-NEXT: .cfi_def_cfa_offset 8
1120 %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
1121 call void @vec_use(<4 x i32> %t0)
1122 %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
1126 define <4 x i32> @vec_const_sub_const_sub_nonsplat(<4 x i32> %arg) {
1127 ; X86-LABEL: vec_const_sub_const_sub_nonsplat:
1129 ; X86-NEXT: paddd {{\.LCPI.*}}, %xmm0
1132 ; X64-LABEL: vec_const_sub_const_sub_nonsplat:
1134 ; X64-NEXT: paddd {{.*}}(%rip), %xmm0
1136 %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
1137 %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0