1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
5 declare void @use(i32 %arg)
6 declare void @vec_use(<4 x i32> %arg)
10 define i32 @add_const_add_const(i32 %arg) {
11 ; X86-LABEL: add_const_add_const:
13 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
14 ; X86-NEXT: addl $10, %eax
17 ; X64-LABEL: add_const_add_const:
19 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
20 ; X64-NEXT: leal 10(%rdi), %eax
27 define i32 @add_const_add_const_extrause(i32 %arg) {
28 ; X86-LABEL: add_const_add_const_extrause:
30 ; X86-NEXT: pushl %esi
31 ; X86-NEXT: .cfi_def_cfa_offset 8
32 ; X86-NEXT: .cfi_offset %esi, -8
33 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
34 ; X86-NEXT: leal 8(%esi), %eax
35 ; X86-NEXT: pushl %eax
36 ; X86-NEXT: .cfi_adjust_cfa_offset 4
37 ; X86-NEXT: calll use@PLT
38 ; X86-NEXT: addl $4, %esp
39 ; X86-NEXT: .cfi_adjust_cfa_offset -4
40 ; X86-NEXT: addl $10, %esi
41 ; X86-NEXT: movl %esi, %eax
43 ; X86-NEXT: .cfi_def_cfa_offset 4
46 ; X64-LABEL: add_const_add_const_extrause:
48 ; X64-NEXT: pushq %rbx
49 ; X64-NEXT: .cfi_def_cfa_offset 16
50 ; X64-NEXT: .cfi_offset %rbx, -16
51 ; X64-NEXT: movl %edi, %ebx
52 ; X64-NEXT: leal 8(%rbx), %edi
53 ; X64-NEXT: callq use@PLT
54 ; X64-NEXT: addl $10, %ebx
55 ; X64-NEXT: movl %ebx, %eax
57 ; X64-NEXT: .cfi_def_cfa_offset 8
60 call void @use(i32 %t0)
65 define <4 x i32> @vec_add_const_add_const(<4 x i32> %arg) {
66 ; X86-LABEL: vec_add_const_add_const:
68 ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
71 ; X64-LABEL: vec_add_const_add_const:
73 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
75 %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
76 %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
80 define <4 x i32> @vec_add_const_add_const_extrause(<4 x i32> %arg) {
81 ; X86-LABEL: vec_add_const_add_const_extrause:
83 ; X86-NEXT: subl $16, %esp
84 ; X86-NEXT: .cfi_def_cfa_offset 20
85 ; X86-NEXT: movdqa %xmm0, %xmm1
86 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
87 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
88 ; X86-NEXT: paddd %xmm1, %xmm0
89 ; X86-NEXT: calll vec_use@PLT
90 ; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
91 ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
92 ; X86-NEXT: addl $16, %esp
93 ; X86-NEXT: .cfi_def_cfa_offset 4
96 ; X64-LABEL: vec_add_const_add_const_extrause:
98 ; X64-NEXT: subq $24, %rsp
99 ; X64-NEXT: .cfi_def_cfa_offset 32
100 ; X64-NEXT: movdqa %xmm0, %xmm1
101 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
102 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
103 ; X64-NEXT: paddd %xmm1, %xmm0
104 ; X64-NEXT: callq vec_use@PLT
105 ; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
106 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
107 ; X64-NEXT: addq $24, %rsp
108 ; X64-NEXT: .cfi_def_cfa_offset 8
110 %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
111 call void @vec_use(<4 x i32> %t0)
112 %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
116 define <4 x i32> @vec_add_const_add_const_nonsplat(<4 x i32> %arg) {
117 ; X86-LABEL: vec_add_const_add_const_nonsplat:
119 ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
122 ; X64-LABEL: vec_add_const_add_const_nonsplat:
124 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
126 %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
127 %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
133 define i32 @add_const_sub_const(i32 %arg) {
134 ; X86-LABEL: add_const_sub_const:
136 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
137 ; X86-NEXT: addl $6, %eax
140 ; X64-LABEL: add_const_sub_const:
142 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
143 ; X64-NEXT: leal 6(%rdi), %eax
145 %t0 = add i32 %arg, 8
150 define i32 @add_const_sub_const_extrause(i32 %arg) {
151 ; X86-LABEL: add_const_sub_const_extrause:
153 ; X86-NEXT: pushl %esi
154 ; X86-NEXT: .cfi_def_cfa_offset 8
155 ; X86-NEXT: .cfi_offset %esi, -8
156 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
157 ; X86-NEXT: leal 8(%esi), %eax
158 ; X86-NEXT: pushl %eax
159 ; X86-NEXT: .cfi_adjust_cfa_offset 4
160 ; X86-NEXT: calll use@PLT
161 ; X86-NEXT: addl $4, %esp
162 ; X86-NEXT: .cfi_adjust_cfa_offset -4
163 ; X86-NEXT: addl $6, %esi
164 ; X86-NEXT: movl %esi, %eax
165 ; X86-NEXT: popl %esi
166 ; X86-NEXT: .cfi_def_cfa_offset 4
169 ; X64-LABEL: add_const_sub_const_extrause:
171 ; X64-NEXT: pushq %rbx
172 ; X64-NEXT: .cfi_def_cfa_offset 16
173 ; X64-NEXT: .cfi_offset %rbx, -16
174 ; X64-NEXT: movl %edi, %ebx
175 ; X64-NEXT: leal 8(%rbx), %edi
176 ; X64-NEXT: callq use@PLT
177 ; X64-NEXT: addl $6, %ebx
178 ; X64-NEXT: movl %ebx, %eax
179 ; X64-NEXT: popq %rbx
180 ; X64-NEXT: .cfi_def_cfa_offset 8
182 %t0 = add i32 %arg, 8
183 call void @use(i32 %t0)
188 define <4 x i32> @vec_add_const_sub_const(<4 x i32> %arg) {
189 ; X86-LABEL: vec_add_const_sub_const:
191 ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
194 ; X64-LABEL: vec_add_const_sub_const:
196 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
198 %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
199 %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
203 define <4 x i32> @vec_add_const_sub_const_extrause(<4 x i32> %arg) {
204 ; X86-LABEL: vec_add_const_sub_const_extrause:
206 ; X86-NEXT: subl $16, %esp
207 ; X86-NEXT: .cfi_def_cfa_offset 20
208 ; X86-NEXT: movdqa %xmm0, %xmm1
209 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
210 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
211 ; X86-NEXT: paddd %xmm1, %xmm0
212 ; X86-NEXT: calll vec_use@PLT
213 ; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
214 ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
215 ; X86-NEXT: addl $16, %esp
216 ; X86-NEXT: .cfi_def_cfa_offset 4
219 ; X64-LABEL: vec_add_const_sub_const_extrause:
221 ; X64-NEXT: subq $24, %rsp
222 ; X64-NEXT: .cfi_def_cfa_offset 32
223 ; X64-NEXT: movdqa %xmm0, %xmm1
224 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
225 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
226 ; X64-NEXT: paddd %xmm1, %xmm0
227 ; X64-NEXT: callq vec_use@PLT
228 ; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
229 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
230 ; X64-NEXT: addq $24, %rsp
231 ; X64-NEXT: .cfi_def_cfa_offset 8
233 %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
234 call void @vec_use(<4 x i32> %t0)
235 %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
239 define <4 x i32> @vec_add_const_sub_const_nonsplat(<4 x i32> %arg) {
240 ; X86-LABEL: vec_add_const_sub_const_nonsplat:
242 ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
245 ; X64-LABEL: vec_add_const_sub_const_nonsplat:
247 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
249 %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
250 %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
256 define i32 @add_const_const_sub(i32 %arg) {
257 ; X86-LABEL: add_const_const_sub:
259 ; X86-NEXT: movl $-6, %eax
260 ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
263 ; X64-LABEL: add_const_const_sub:
265 ; X64-NEXT: movl $-6, %eax
266 ; X64-NEXT: subl %edi, %eax
268 %t0 = add i32 %arg, 8
273 define i32 @add_const_const_sub_extrause(i32 %arg) {
274 ; X86-LABEL: add_const_const_sub_extrause:
276 ; X86-NEXT: pushl %esi
277 ; X86-NEXT: .cfi_def_cfa_offset 8
278 ; X86-NEXT: .cfi_offset %esi, -8
279 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
280 ; X86-NEXT: leal 8(%esi), %eax
281 ; X86-NEXT: pushl %eax
282 ; X86-NEXT: .cfi_adjust_cfa_offset 4
283 ; X86-NEXT: calll use@PLT
284 ; X86-NEXT: addl $4, %esp
285 ; X86-NEXT: .cfi_adjust_cfa_offset -4
286 ; X86-NEXT: movl $-6, %eax
287 ; X86-NEXT: subl %esi, %eax
288 ; X86-NEXT: popl %esi
289 ; X86-NEXT: .cfi_def_cfa_offset 4
292 ; X64-LABEL: add_const_const_sub_extrause:
294 ; X64-NEXT: pushq %rbx
295 ; X64-NEXT: .cfi_def_cfa_offset 16
296 ; X64-NEXT: .cfi_offset %rbx, -16
297 ; X64-NEXT: movl %edi, %ebx
298 ; X64-NEXT: leal 8(%rbx), %edi
299 ; X64-NEXT: callq use@PLT
300 ; X64-NEXT: movl $-6, %eax
301 ; X64-NEXT: subl %ebx, %eax
302 ; X64-NEXT: popq %rbx
303 ; X64-NEXT: .cfi_def_cfa_offset 8
305 %t0 = add i32 %arg, 8
306 call void @use(i32 %t0)
311 define <4 x i32> @vec_add_const_const_sub(<4 x i32> %arg) {
312 ; X86-LABEL: vec_add_const_const_sub:
314 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967290,4294967290,4294967290,4294967290]
315 ; X86-NEXT: psubd %xmm0, %xmm1
316 ; X86-NEXT: movdqa %xmm1, %xmm0
319 ; X64-LABEL: vec_add_const_const_sub:
321 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [4294967290,4294967290,4294967290,4294967290]
322 ; X64-NEXT: psubd %xmm0, %xmm1
323 ; X64-NEXT: movdqa %xmm1, %xmm0
325 %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
326 %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
330 define <4 x i32> @vec_add_const_const_sub_extrause(<4 x i32> %arg) {
331 ; X86-LABEL: vec_add_const_const_sub_extrause:
333 ; X86-NEXT: subl $16, %esp
334 ; X86-NEXT: .cfi_def_cfa_offset 20
335 ; X86-NEXT: movdqa %xmm0, %xmm1
336 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
337 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
338 ; X86-NEXT: paddd %xmm1, %xmm0
339 ; X86-NEXT: calll vec_use@PLT
340 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [4294967290,4294967290,4294967290,4294967290]
341 ; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
342 ; X86-NEXT: psubd %xmm1, %xmm0
343 ; X86-NEXT: addl $16, %esp
344 ; X86-NEXT: .cfi_def_cfa_offset 4
347 ; X64-LABEL: vec_add_const_const_sub_extrause:
349 ; X64-NEXT: subq $24, %rsp
350 ; X64-NEXT: .cfi_def_cfa_offset 32
351 ; X64-NEXT: movdqa %xmm0, %xmm1
352 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
353 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
354 ; X64-NEXT: paddd %xmm1, %xmm0
355 ; X64-NEXT: callq vec_use@PLT
356 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [4294967290,4294967290,4294967290,4294967290]
357 ; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
358 ; X64-NEXT: addq $24, %rsp
359 ; X64-NEXT: .cfi_def_cfa_offset 8
361 %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
362 call void @vec_use(<4 x i32> %t0)
363 %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
367 define <4 x i32> @vec_add_const_const_sub_nonsplat(<4 x i32> %arg) {
368 ; X86-LABEL: vec_add_const_const_sub_nonsplat:
370 ; X86-NEXT: movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
371 ; X86-NEXT: psubd %xmm0, %xmm1
372 ; X86-NEXT: movdqa %xmm1, %xmm0
375 ; X64-LABEL: vec_add_const_const_sub_nonsplat:
377 ; X64-NEXT: movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
378 ; X64-NEXT: psubd %xmm0, %xmm1
379 ; X64-NEXT: movdqa %xmm1, %xmm0
381 %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
382 %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
388 define i32 @sub_const_add_const(i32 %arg) {
389 ; X86-LABEL: sub_const_add_const:
391 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
392 ; X86-NEXT: addl $-6, %eax
395 ; X64-LABEL: sub_const_add_const:
397 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
398 ; X64-NEXT: leal -6(%rdi), %eax
400 %t0 = sub i32 %arg, 8
405 define i32 @sub_const_add_const_extrause(i32 %arg) {
406 ; X86-LABEL: sub_const_add_const_extrause:
408 ; X86-NEXT: pushl %esi
409 ; X86-NEXT: .cfi_def_cfa_offset 8
410 ; X86-NEXT: .cfi_offset %esi, -8
411 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
412 ; X86-NEXT: leal -8(%esi), %eax
413 ; X86-NEXT: pushl %eax
414 ; X86-NEXT: .cfi_adjust_cfa_offset 4
415 ; X86-NEXT: calll use@PLT
416 ; X86-NEXT: addl $4, %esp
417 ; X86-NEXT: .cfi_adjust_cfa_offset -4
418 ; X86-NEXT: addl $-6, %esi
419 ; X86-NEXT: movl %esi, %eax
420 ; X86-NEXT: popl %esi
421 ; X86-NEXT: .cfi_def_cfa_offset 4
424 ; X64-LABEL: sub_const_add_const_extrause:
426 ; X64-NEXT: pushq %rbx
427 ; X64-NEXT: .cfi_def_cfa_offset 16
428 ; X64-NEXT: .cfi_offset %rbx, -16
429 ; X64-NEXT: movl %edi, %ebx
430 ; X64-NEXT: leal -8(%rbx), %edi
431 ; X64-NEXT: callq use@PLT
432 ; X64-NEXT: addl $-6, %ebx
433 ; X64-NEXT: movl %ebx, %eax
434 ; X64-NEXT: popq %rbx
435 ; X64-NEXT: .cfi_def_cfa_offset 8
437 %t0 = sub i32 %arg, 8
438 call void @use(i32 %t0)
443 define <4 x i32> @vec_sub_const_add_const(<4 x i32> %arg) {
444 ; X86-LABEL: vec_sub_const_add_const:
446 ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
449 ; X64-LABEL: vec_sub_const_add_const:
451 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
453 %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
454 %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
458 define <4 x i32> @vec_sub_const_add_const_extrause(<4 x i32> %arg) {
459 ; X86-LABEL: vec_sub_const_add_const_extrause:
461 ; X86-NEXT: subl $16, %esp
462 ; X86-NEXT: .cfi_def_cfa_offset 20
463 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
464 ; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
465 ; X86-NEXT: calll vec_use@PLT
466 ; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
467 ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
468 ; X86-NEXT: addl $16, %esp
469 ; X86-NEXT: .cfi_def_cfa_offset 4
472 ; X64-LABEL: vec_sub_const_add_const_extrause:
474 ; X64-NEXT: subq $24, %rsp
475 ; X64-NEXT: .cfi_def_cfa_offset 32
476 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
477 ; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
478 ; X64-NEXT: callq vec_use@PLT
479 ; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
480 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
481 ; X64-NEXT: addq $24, %rsp
482 ; X64-NEXT: .cfi_def_cfa_offset 8
484 %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
485 call void @vec_use(<4 x i32> %t0)
486 %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
490 define <4 x i32> @vec_sub_const_add_const_nonsplat(<4 x i32> %arg) {
491 ; X86-LABEL: vec_sub_const_add_const_nonsplat:
493 ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
496 ; X64-LABEL: vec_sub_const_add_const_nonsplat:
498 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
500 %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
501 %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
507 define i32 @sub_const_sub_const(i32 %arg) {
508 ; X86-LABEL: sub_const_sub_const:
510 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
511 ; X86-NEXT: addl $-10, %eax
514 ; X64-LABEL: sub_const_sub_const:
516 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
517 ; X64-NEXT: leal -10(%rdi), %eax
519 %t0 = sub i32 %arg, 8
524 define i32 @sub_const_sub_const_extrause(i32 %arg) {
525 ; X86-LABEL: sub_const_sub_const_extrause:
527 ; X86-NEXT: pushl %esi
528 ; X86-NEXT: .cfi_def_cfa_offset 8
529 ; X86-NEXT: .cfi_offset %esi, -8
530 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
531 ; X86-NEXT: leal -8(%esi), %eax
532 ; X86-NEXT: pushl %eax
533 ; X86-NEXT: .cfi_adjust_cfa_offset 4
534 ; X86-NEXT: calll use@PLT
535 ; X86-NEXT: addl $4, %esp
536 ; X86-NEXT: .cfi_adjust_cfa_offset -4
537 ; X86-NEXT: addl $-10, %esi
538 ; X86-NEXT: movl %esi, %eax
539 ; X86-NEXT: popl %esi
540 ; X86-NEXT: .cfi_def_cfa_offset 4
543 ; X64-LABEL: sub_const_sub_const_extrause:
545 ; X64-NEXT: pushq %rbx
546 ; X64-NEXT: .cfi_def_cfa_offset 16
547 ; X64-NEXT: .cfi_offset %rbx, -16
548 ; X64-NEXT: movl %edi, %ebx
549 ; X64-NEXT: leal -8(%rbx), %edi
550 ; X64-NEXT: callq use@PLT
551 ; X64-NEXT: addl $-10, %ebx
552 ; X64-NEXT: movl %ebx, %eax
553 ; X64-NEXT: popq %rbx
554 ; X64-NEXT: .cfi_def_cfa_offset 8
556 %t0 = sub i32 %arg, 8
557 call void @use(i32 %t0)
562 define <4 x i32> @vec_sub_const_sub_const(<4 x i32> %arg) {
563 ; X86-LABEL: vec_sub_const_sub_const:
565 ; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
568 ; X64-LABEL: vec_sub_const_sub_const:
570 ; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
572 %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
573 %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
577 define <4 x i32> @vec_sub_const_sub_const_extrause(<4 x i32> %arg) {
578 ; X86-LABEL: vec_sub_const_sub_const_extrause:
580 ; X86-NEXT: subl $16, %esp
581 ; X86-NEXT: .cfi_def_cfa_offset 20
582 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
583 ; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
584 ; X86-NEXT: calll vec_use@PLT
585 ; X86-NEXT: movdqu (%esp), %xmm0 # 16-byte Reload
586 ; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
587 ; X86-NEXT: addl $16, %esp
588 ; X86-NEXT: .cfi_def_cfa_offset 4
591 ; X64-LABEL: vec_sub_const_sub_const_extrause:
593 ; X64-NEXT: subq $24, %rsp
594 ; X64-NEXT: .cfi_def_cfa_offset 32
595 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
596 ; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
597 ; X64-NEXT: callq vec_use@PLT
598 ; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
599 ; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
600 ; X64-NEXT: addq $24, %rsp
601 ; X64-NEXT: .cfi_def_cfa_offset 8
603 %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
604 call void @vec_use(<4 x i32> %t0)
605 %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
609 define <4 x i32> @vec_sub_const_sub_const_nonsplat(<4 x i32> %arg) {
610 ; X86-LABEL: vec_sub_const_sub_const_nonsplat:
612 ; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
615 ; X64-LABEL: vec_sub_const_sub_const_nonsplat:
617 ; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
619 %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
620 %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
626 define i32 @sub_const_const_sub(i32 %arg) {
627 ; X86-LABEL: sub_const_const_sub:
629 ; X86-NEXT: movl $10, %eax
630 ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
633 ; X64-LABEL: sub_const_const_sub:
635 ; X64-NEXT: movl $10, %eax
636 ; X64-NEXT: subl %edi, %eax
638 %t0 = sub i32 %arg, 8
643 define i32 @sub_const_const_sub_extrause(i32 %arg) {
644 ; X86-LABEL: sub_const_const_sub_extrause:
646 ; X86-NEXT: pushl %esi
647 ; X86-NEXT: .cfi_def_cfa_offset 8
648 ; X86-NEXT: .cfi_offset %esi, -8
649 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
650 ; X86-NEXT: leal -8(%esi), %eax
651 ; X86-NEXT: pushl %eax
652 ; X86-NEXT: .cfi_adjust_cfa_offset 4
653 ; X86-NEXT: calll use@PLT
654 ; X86-NEXT: addl $4, %esp
655 ; X86-NEXT: .cfi_adjust_cfa_offset -4
656 ; X86-NEXT: movl $10, %eax
657 ; X86-NEXT: subl %esi, %eax
658 ; X86-NEXT: popl %esi
659 ; X86-NEXT: .cfi_def_cfa_offset 4
662 ; X64-LABEL: sub_const_const_sub_extrause:
664 ; X64-NEXT: pushq %rbx
665 ; X64-NEXT: .cfi_def_cfa_offset 16
666 ; X64-NEXT: .cfi_offset %rbx, -16
667 ; X64-NEXT: movl %edi, %ebx
668 ; X64-NEXT: leal -8(%rbx), %edi
669 ; X64-NEXT: callq use@PLT
670 ; X64-NEXT: movl $10, %eax
671 ; X64-NEXT: subl %ebx, %eax
672 ; X64-NEXT: popq %rbx
673 ; X64-NEXT: .cfi_def_cfa_offset 8
675 %t0 = sub i32 %arg, 8
676 call void @use(i32 %t0)
681 define <4 x i32> @vec_sub_const_const_sub(<4 x i32> %arg) {
682 ; X86-LABEL: vec_sub_const_const_sub:
684 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
685 ; X86-NEXT: psubd %xmm0, %xmm1
686 ; X86-NEXT: movdqa %xmm1, %xmm0
689 ; X64-LABEL: vec_sub_const_const_sub:
691 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
692 ; X64-NEXT: psubd %xmm0, %xmm1
693 ; X64-NEXT: movdqa %xmm1, %xmm0
695 %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
696 %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
700 define <4 x i32> @vec_sub_const_const_sub_extrause(<4 x i32> %arg) {
701 ; X86-LABEL: vec_sub_const_const_sub_extrause:
703 ; X86-NEXT: subl $16, %esp
704 ; X86-NEXT: .cfi_def_cfa_offset 20
705 ; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
706 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
707 ; X86-NEXT: calll vec_use@PLT
708 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
709 ; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
710 ; X86-NEXT: psubd %xmm1, %xmm0
711 ; X86-NEXT: addl $16, %esp
712 ; X86-NEXT: .cfi_def_cfa_offset 4
715 ; X64-LABEL: vec_sub_const_const_sub_extrause:
717 ; X64-NEXT: subq $24, %rsp
718 ; X64-NEXT: .cfi_def_cfa_offset 32
719 ; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
720 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
721 ; X64-NEXT: callq vec_use@PLT
722 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
723 ; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
724 ; X64-NEXT: addq $24, %rsp
725 ; X64-NEXT: .cfi_def_cfa_offset 8
727 %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
728 call void @vec_use(<4 x i32> %t0)
729 %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
733 define <4 x i32> @vec_sub_const_const_sub_nonsplat(<4 x i32> %arg) {
734 ; X86-LABEL: vec_sub_const_const_sub_nonsplat:
736 ; X86-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
737 ; X86-NEXT: psubd %xmm0, %xmm1
738 ; X86-NEXT: movdqa %xmm1, %xmm0
741 ; X64-LABEL: vec_sub_const_const_sub_nonsplat:
743 ; X64-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
744 ; X64-NEXT: psubd %xmm0, %xmm1
745 ; X64-NEXT: movdqa %xmm1, %xmm0
747 %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
748 %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
754 define i32 @const_sub_add_const(i32 %arg) {
755 ; X86-LABEL: const_sub_add_const:
757 ; X86-NEXT: movl $10, %eax
758 ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
761 ; X64-LABEL: const_sub_add_const:
763 ; X64-NEXT: movl $10, %eax
764 ; X64-NEXT: subl %edi, %eax
766 %t0 = sub i32 8, %arg
771 define i32 @const_sub_add_const_extrause(i32 %arg) {
772 ; X86-LABEL: const_sub_add_const_extrause:
774 ; X86-NEXT: pushl %esi
775 ; X86-NEXT: .cfi_def_cfa_offset 8
776 ; X86-NEXT: .cfi_offset %esi, -8
777 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
778 ; X86-NEXT: movl $8, %eax
779 ; X86-NEXT: subl %esi, %eax
780 ; X86-NEXT: pushl %eax
781 ; X86-NEXT: .cfi_adjust_cfa_offset 4
782 ; X86-NEXT: calll use@PLT
783 ; X86-NEXT: addl $4, %esp
784 ; X86-NEXT: .cfi_adjust_cfa_offset -4
785 ; X86-NEXT: movl $10, %eax
786 ; X86-NEXT: subl %esi, %eax
787 ; X86-NEXT: popl %esi
788 ; X86-NEXT: .cfi_def_cfa_offset 4
791 ; X64-LABEL: const_sub_add_const_extrause:
793 ; X64-NEXT: pushq %rbx
794 ; X64-NEXT: .cfi_def_cfa_offset 16
795 ; X64-NEXT: .cfi_offset %rbx, -16
796 ; X64-NEXT: movl %edi, %ebx
797 ; X64-NEXT: movl $8, %edi
798 ; X64-NEXT: subl %ebx, %edi
799 ; X64-NEXT: callq use@PLT
800 ; X64-NEXT: movl $10, %eax
801 ; X64-NEXT: subl %ebx, %eax
802 ; X64-NEXT: popq %rbx
803 ; X64-NEXT: .cfi_def_cfa_offset 8
805 %t0 = sub i32 8, %arg
806 call void @use(i32 %t0)
811 define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) {
812 ; X86-LABEL: vec_const_sub_add_const:
814 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
815 ; X86-NEXT: psubd %xmm0, %xmm1
816 ; X86-NEXT: movdqa %xmm1, %xmm0
819 ; X64-LABEL: vec_const_sub_add_const:
821 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10]
822 ; X64-NEXT: psubd %xmm0, %xmm1
823 ; X64-NEXT: movdqa %xmm1, %xmm0
825 %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
826 %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
830 define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) {
831 ; X86-LABEL: vec_const_sub_add_const_extrause:
833 ; X86-NEXT: subl $16, %esp
834 ; X86-NEXT: .cfi_def_cfa_offset 20
835 ; X86-NEXT: movdqa %xmm0, %xmm1
836 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
837 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
838 ; X86-NEXT: psubd %xmm1, %xmm0
839 ; X86-NEXT: calll vec_use@PLT
840 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
841 ; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
842 ; X86-NEXT: psubd %xmm1, %xmm0
843 ; X86-NEXT: addl $16, %esp
844 ; X86-NEXT: .cfi_def_cfa_offset 4
847 ; X64-LABEL: vec_const_sub_add_const_extrause:
849 ; X64-NEXT: subq $24, %rsp
850 ; X64-NEXT: .cfi_def_cfa_offset 32
851 ; X64-NEXT: movdqa %xmm0, %xmm1
852 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
853 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
854 ; X64-NEXT: psubd %xmm1, %xmm0
855 ; X64-NEXT: callq vec_use@PLT
856 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
857 ; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
858 ; X64-NEXT: addq $24, %rsp
859 ; X64-NEXT: .cfi_def_cfa_offset 8
861 %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
862 call void @vec_use(<4 x i32> %t0)
863 %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
867 define <4 x i32> @vec_const_sub_add_const_nonsplat(<4 x i32> %arg) {
868 ; X86-LABEL: vec_const_sub_add_const_nonsplat:
870 ; X86-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
871 ; X86-NEXT: psubd %xmm0, %xmm1
872 ; X86-NEXT: movdqa %xmm1, %xmm0
875 ; X64-LABEL: vec_const_sub_add_const_nonsplat:
877 ; X64-NEXT: movdqa {{.*#+}} xmm1 = <23,u,u,10>
878 ; X64-NEXT: psubd %xmm0, %xmm1
879 ; X64-NEXT: movdqa %xmm1, %xmm0
881 %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
882 %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
888 define i32 @const_sub_sub_const(i32 %arg) {
889 ; X86-LABEL: const_sub_sub_const:
891 ; X86-NEXT: movl $6, %eax
892 ; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
895 ; X64-LABEL: const_sub_sub_const:
897 ; X64-NEXT: movl $6, %eax
898 ; X64-NEXT: subl %edi, %eax
900 %t0 = sub i32 8, %arg
905 define i32 @const_sub_sub_const_extrause(i32 %arg) {
906 ; X86-LABEL: const_sub_sub_const_extrause:
908 ; X86-NEXT: pushl %esi
909 ; X86-NEXT: .cfi_def_cfa_offset 8
910 ; X86-NEXT: .cfi_offset %esi, -8
911 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
912 ; X86-NEXT: movl $8, %eax
913 ; X86-NEXT: subl %esi, %eax
914 ; X86-NEXT: pushl %eax
915 ; X86-NEXT: .cfi_adjust_cfa_offset 4
916 ; X86-NEXT: calll use@PLT
917 ; X86-NEXT: addl $4, %esp
918 ; X86-NEXT: .cfi_adjust_cfa_offset -4
919 ; X86-NEXT: movl $6, %eax
920 ; X86-NEXT: subl %esi, %eax
921 ; X86-NEXT: popl %esi
922 ; X86-NEXT: .cfi_def_cfa_offset 4
925 ; X64-LABEL: const_sub_sub_const_extrause:
927 ; X64-NEXT: pushq %rbx
928 ; X64-NEXT: .cfi_def_cfa_offset 16
929 ; X64-NEXT: .cfi_offset %rbx, -16
930 ; X64-NEXT: movl %edi, %ebx
931 ; X64-NEXT: movl $8, %edi
932 ; X64-NEXT: subl %ebx, %edi
933 ; X64-NEXT: callq use@PLT
934 ; X64-NEXT: movl $6, %eax
935 ; X64-NEXT: subl %ebx, %eax
936 ; X64-NEXT: popq %rbx
937 ; X64-NEXT: .cfi_def_cfa_offset 8
939 %t0 = sub i32 8, %arg
940 call void @use(i32 %t0)
945 define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) {
946 ; X86-LABEL: vec_const_sub_sub_const:
948 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6]
949 ; X86-NEXT: psubd %xmm0, %xmm1
950 ; X86-NEXT: movdqa %xmm1, %xmm0
953 ; X64-LABEL: vec_const_sub_sub_const:
955 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6]
956 ; X64-NEXT: psubd %xmm0, %xmm1
957 ; X64-NEXT: movdqa %xmm1, %xmm0
959 %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
960 %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
964 define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) {
965 ; X86-LABEL: vec_const_sub_sub_const_extrause:
967 ; X86-NEXT: subl $16, %esp
968 ; X86-NEXT: .cfi_def_cfa_offset 20
969 ; X86-NEXT: movdqa %xmm0, %xmm1
970 ; X86-NEXT: movdqu %xmm0, (%esp) # 16-byte Spill
971 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
972 ; X86-NEXT: psubd %xmm1, %xmm0
973 ; X86-NEXT: calll vec_use@PLT
974 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
975 ; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
976 ; X86-NEXT: psubd %xmm1, %xmm0
977 ; X86-NEXT: addl $16, %esp
978 ; X86-NEXT: .cfi_def_cfa_offset 4
981 ; X64-LABEL: vec_const_sub_sub_const_extrause:
983 ; X64-NEXT: subq $24, %rsp
984 ; X64-NEXT: .cfi_def_cfa_offset 32
985 ; X64-NEXT: movdqa %xmm0, %xmm1
986 ; X64-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
987 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
988 ; X64-NEXT: psubd %xmm1, %xmm0
989 ; X64-NEXT: callq vec_use@PLT
990 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
991 ; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
992 ; X64-NEXT: addq $24, %rsp
993 ; X64-NEXT: .cfi_def_cfa_offset 8
995 %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
996 call void @vec_use(<4 x i32> %t0)
997 %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
1001 define <4 x i32> @vec_const_sub_sub_const_nonsplat(<4 x i32> %arg) {
1002 ; X86-LABEL: vec_const_sub_sub_const_nonsplat:
1004 ; X86-NEXT: movdqa {{.*#+}} xmm1 = <19,u,u,6>
1005 ; X86-NEXT: psubd %xmm0, %xmm1
1006 ; X86-NEXT: movdqa %xmm1, %xmm0
1009 ; X64-LABEL: vec_const_sub_sub_const_nonsplat:
1011 ; X64-NEXT: movdqa {{.*#+}} xmm1 = <19,u,u,6>
1012 ; X64-NEXT: psubd %xmm0, %xmm1
1013 ; X64-NEXT: movdqa %xmm1, %xmm0
1015 %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
1016 %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
1022 define i32 @const_sub_const_sub(i32 %arg) {
1023 ; X86-LABEL: const_sub_const_sub:
1025 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1026 ; X86-NEXT: addl $-6, %eax
1029 ; X64-LABEL: const_sub_const_sub:
1031 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
1032 ; X64-NEXT: leal -6(%rdi), %eax
1034 %t0 = sub i32 8, %arg
1035 %t1 = sub i32 2, %t0
1039 define i32 @const_sub_const_sub_extrause(i32 %arg) {
1040 ; X86-LABEL: const_sub_const_sub_extrause:
1042 ; X86-NEXT: pushl %esi
1043 ; X86-NEXT: .cfi_def_cfa_offset 8
1044 ; X86-NEXT: .cfi_offset %esi, -8
1045 ; X86-NEXT: movl $8, %esi
1046 ; X86-NEXT: subl {{[0-9]+}}(%esp), %esi
1047 ; X86-NEXT: pushl %esi
1048 ; X86-NEXT: .cfi_adjust_cfa_offset 4
1049 ; X86-NEXT: calll use@PLT
1050 ; X86-NEXT: addl $4, %esp
1051 ; X86-NEXT: .cfi_adjust_cfa_offset -4
1052 ; X86-NEXT: movl $2, %eax
1053 ; X86-NEXT: subl %esi, %eax
1054 ; X86-NEXT: popl %esi
1055 ; X86-NEXT: .cfi_def_cfa_offset 4
1058 ; X64-LABEL: const_sub_const_sub_extrause:
1060 ; X64-NEXT: pushq %rbx
1061 ; X64-NEXT: .cfi_def_cfa_offset 16
1062 ; X64-NEXT: .cfi_offset %rbx, -16
1063 ; X64-NEXT: movl $8, %ebx
1064 ; X64-NEXT: subl %edi, %ebx
1065 ; X64-NEXT: movl %ebx, %edi
1066 ; X64-NEXT: callq use@PLT
1067 ; X64-NEXT: movl $2, %eax
1068 ; X64-NEXT: subl %ebx, %eax
1069 ; X64-NEXT: popq %rbx
1070 ; X64-NEXT: .cfi_def_cfa_offset 8
1072 %t0 = sub i32 8, %arg
1073 call void @use(i32 %t0)
1074 %t1 = sub i32 2, %t0
1078 define <4 x i32> @vec_const_sub_const_sub(<4 x i32> %arg) {
1079 ; X86-LABEL: vec_const_sub_const_sub:
1081 ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1084 ; X64-LABEL: vec_const_sub_const_sub:
1086 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1088 %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
1089 %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
1093 define <4 x i32> @vec_const_sub_const_sub_extrause(<4 x i32> %arg) {
1094 ; X86-LABEL: vec_const_sub_const_sub_extrause:
1096 ; X86-NEXT: subl $16, %esp
1097 ; X86-NEXT: .cfi_def_cfa_offset 20
1098 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8]
1099 ; X86-NEXT: psubd %xmm0, %xmm1
1100 ; X86-NEXT: movdqu %xmm1, (%esp) # 16-byte Spill
1101 ; X86-NEXT: movdqa %xmm1, %xmm0
1102 ; X86-NEXT: calll vec_use@PLT
1103 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
1104 ; X86-NEXT: movdqu (%esp), %xmm1 # 16-byte Reload
1105 ; X86-NEXT: psubd %xmm1, %xmm0
1106 ; X86-NEXT: addl $16, %esp
1107 ; X86-NEXT: .cfi_def_cfa_offset 4
1110 ; X64-LABEL: vec_const_sub_const_sub_extrause:
1112 ; X64-NEXT: subq $24, %rsp
1113 ; X64-NEXT: .cfi_def_cfa_offset 32
1114 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8]
1115 ; X64-NEXT: psubd %xmm0, %xmm1
1116 ; X64-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
1117 ; X64-NEXT: movdqa %xmm1, %xmm0
1118 ; X64-NEXT: callq vec_use@PLT
1119 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2]
1120 ; X64-NEXT: psubd (%rsp), %xmm0 # 16-byte Folded Reload
1121 ; X64-NEXT: addq $24, %rsp
1122 ; X64-NEXT: .cfi_def_cfa_offset 8
1124 %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
1125 call void @vec_use(<4 x i32> %t0)
1126 %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
1130 define <4 x i32> @vec_const_sub_const_sub_nonsplat(<4 x i32> %arg) {
1131 ; X86-LABEL: vec_const_sub_const_sub_nonsplat:
1133 ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1136 ; X64-LABEL: vec_const_sub_const_sub_nonsplat:
1138 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1140 %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
1141 %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0