1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+slow-lea,+slow-3ops-lea,+sse,+sse2 | FileCheck %s --check-prefixes=ALL,X32
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+slow-lea,+slow-3ops-lea,+sse,+sse2 | FileCheck %s --check-prefixes=ALL,X64
5 ; Scalar tests. Trying to avoid LEA here, so the output is actually readable..
8 ; Outer 'add' is commutative - 2 variants.
10 define i32 @sink_add_of_const_to_add0(i32 %a, i32 %b) {
11 ; X32-LABEL: sink_add_of_const_to_add0:
13 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
14 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
15 ; X32-NEXT: addl %ecx, %eax
16 ; X32-NEXT: addl $32, %eax
19 ; X64-LABEL: sink_add_of_const_to_add0:
21 ; X64-NEXT: # kill: def $esi killed $esi def $rsi
22 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
23 ; X64-NEXT: leal 32(%rdi,%rsi), %eax
25 %t0 = add i32 %a, 32 ; constant always on RHS
29 define i32 @sink_add_of_const_to_add1(i32 %a, i32 %b) {
30 ; X32-LABEL: sink_add_of_const_to_add1:
32 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
33 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
34 ; X32-NEXT: addl %ecx, %eax
35 ; X32-NEXT: addl $32, %eax
38 ; X64-LABEL: sink_add_of_const_to_add1:
40 ; X64-NEXT: # kill: def $esi killed $esi def $rsi
41 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
42 ; X64-NEXT: leal 32(%rdi,%rsi), %eax
44 %t0 = add i32 %a, 32 ; constant always on RHS
50 ; Outer 'add' is commutative - 2 variants.
52 define i32 @sink_sub_of_const_to_add0(i32 %a, i32 %b) {
53 ; X32-LABEL: sink_sub_of_const_to_add0:
55 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
56 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
57 ; X32-NEXT: addl %ecx, %eax
58 ; X32-NEXT: addl $-32, %eax
61 ; X64-LABEL: sink_sub_of_const_to_add0:
63 ; X64-NEXT: # kill: def $esi killed $esi def $rsi
64 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
65 ; X64-NEXT: leal -32(%rdi,%rsi), %eax
71 define i32 @sink_sub_of_const_to_add1(i32 %a, i32 %b) {
72 ; X32-LABEL: sink_sub_of_const_to_add1:
74 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
75 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
76 ; X32-NEXT: addl %ecx, %eax
77 ; X32-NEXT: addl $-32, %eax
80 ; X64-LABEL: sink_sub_of_const_to_add1:
82 ; X64-NEXT: # kill: def $esi killed $esi def $rsi
83 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
84 ; X64-NEXT: leal -32(%rdi,%rsi), %eax
92 ; Outer 'add' is commutative - 2 variants.
94 define i32 @sink_sub_from_const_to_add0(i32 %a, i32 %b) {
95 ; X32-LABEL: sink_sub_from_const_to_add0:
97 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
98 ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
99 ; X32-NEXT: addl $32, %eax
102 ; X64-LABEL: sink_sub_from_const_to_add0:
104 ; X64-NEXT: # kill: def $esi killed $esi def $rsi
105 ; X64-NEXT: subl %edi, %esi
106 ; X64-NEXT: leal 32(%rsi), %eax
112 define i32 @sink_sub_from_const_to_add1(i32 %a, i32 %b) {
113 ; X32-LABEL: sink_sub_from_const_to_add1:
115 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
116 ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
117 ; X32-NEXT: addl $32, %eax
120 ; X64-LABEL: sink_sub_from_const_to_add1:
122 ; X64-NEXT: # kill: def $esi killed $esi def $rsi
123 ; X64-NEXT: subl %edi, %esi
124 ; X64-NEXT: leal 32(%rsi), %eax
131 ; sub (add %x, C), %y
132 ; sub %y, (add %x, C)
134 define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b) {
135 ; X32-LABEL: sink_add_of_const_to_sub:
137 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
138 ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
139 ; X32-NEXT: addl $32, %eax
142 ; X64-LABEL: sink_add_of_const_to_sub:
144 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
145 ; X64-NEXT: subl %esi, %edi
146 ; X64-NEXT: leal 32(%rdi), %eax
148 %t0 = add i32 %a, 32 ; constant always on RHS
152 define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b) {
153 ; X32-LABEL: sink_add_of_const_to_sub2:
155 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
156 ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
157 ; X32-NEXT: addl $-32, %eax
160 ; X64-LABEL: sink_add_of_const_to_sub2:
162 ; X64-NEXT: # kill: def $esi killed $esi def $rsi
163 ; X64-NEXT: subl %edi, %esi
164 ; X64-NEXT: leal -32(%rsi), %eax
166 %t0 = add i32 %a, 32 ; constant always on RHS
171 ; sub (sub %x, C), %y
172 ; sub %y, (sub %x, C)
174 define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b) {
175 ; X32-LABEL: sink_sub_of_const_to_sub:
177 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
178 ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
179 ; X32-NEXT: addl $-32, %eax
182 ; X64-LABEL: sink_sub_of_const_to_sub:
184 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
185 ; X64-NEXT: subl %esi, %edi
186 ; X64-NEXT: leal -32(%rdi), %eax
192 define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b) {
193 ; X32-LABEL: sink_sub_of_const_to_sub2:
195 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
196 ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
197 ; X32-NEXT: addl $32, %eax
200 ; X64-LABEL: sink_sub_of_const_to_sub2:
202 ; X64-NEXT: # kill: def $esi killed $esi def $rsi
203 ; X64-NEXT: subl %edi, %esi
204 ; X64-NEXT: leal 32(%rsi), %eax
211 ; sub (sub C, %x), %y
212 ; sub %y, (sub C, %x)
214 define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b) {
215 ; X32-LABEL: sink_sub_from_const_to_sub:
217 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
218 ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
219 ; X32-NEXT: movl $32, %eax
220 ; X32-NEXT: subl %ecx, %eax
223 ; X64-LABEL: sink_sub_from_const_to_sub:
225 ; X64-NEXT: addl %esi, %edi
226 ; X64-NEXT: movl $32, %eax
227 ; X64-NEXT: subl %edi, %eax
233 define i32 @sink_sub_from_const_to_sub2(i32 %a, i32 %b) {
234 ; X32-LABEL: sink_sub_from_const_to_sub2:
236 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
237 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
238 ; X32-NEXT: addl %ecx, %eax
239 ; X32-NEXT: addl $-32, %eax
242 ; X64-LABEL: sink_sub_from_const_to_sub2:
244 ; X64-NEXT: # kill: def $esi killed $esi def $rsi
245 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
246 ; X64-NEXT: leal -32(%rdi,%rsi), %eax
253 ;------------------------------------------------------------------------------;
254 ; Basic vector tests. Here it is easier to see where the constant operand is.
255 ;------------------------------------------------------------------------------;
257 ; add (add %x, C), %y
258 ; Outer 'add' is commutative - 2 variants.
260 define <4 x i32> @vec_sink_add_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
261 ; X32-LABEL: vec_sink_add_of_const_to_add0:
263 ; X32-NEXT: paddd %xmm1, %xmm0
264 ; X32-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
267 ; X64-LABEL: vec_sink_add_of_const_to_add0:
269 ; X64-NEXT: paddd %xmm1, %xmm0
270 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
272 %t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
273 %r = add <4 x i32> %t0, %b
276 define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
277 ; X32-LABEL: vec_sink_add_of_const_to_add1:
279 ; X32-NEXT: paddd %xmm1, %xmm0
280 ; X32-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
283 ; X64-LABEL: vec_sink_add_of_const_to_add1:
285 ; X64-NEXT: paddd %xmm1, %xmm0
286 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
288 %t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
289 %r = add <4 x i32> %b, %t0
293 ; add (sub %x, C), %y
294 ; Outer 'add' is commutative - 2 variants.
296 define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
297 ; X32-LABEL: vec_sink_sub_of_const_to_add0:
299 ; X32-NEXT: paddd %xmm1, %xmm0
300 ; X32-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
303 ; X64-LABEL: vec_sink_sub_of_const_to_add0:
305 ; X64-NEXT: paddd %xmm1, %xmm0
306 ; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
308 %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
309 %r = add <4 x i32> %t0, %b
312 define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
313 ; X32-LABEL: vec_sink_sub_of_const_to_add1:
315 ; X32-NEXT: paddd %xmm1, %xmm0
316 ; X32-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
319 ; X64-LABEL: vec_sink_sub_of_const_to_add1:
321 ; X64-NEXT: paddd %xmm1, %xmm0
322 ; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
324 %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
325 %r = add <4 x i32> %b, %t0
329 ; add (sub C, %x), %y
330 ; Outer 'add' is commutative - 2 variants.
332 define <4 x i32> @vec_sink_sub_from_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
333 ; X32-LABEL: vec_sink_sub_from_const_to_add0:
335 ; X32-NEXT: psubd %xmm0, %xmm1
336 ; X32-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
337 ; X32-NEXT: movdqa %xmm1, %xmm0
340 ; X64-LABEL: vec_sink_sub_from_const_to_add0:
342 ; X64-NEXT: psubd %xmm0, %xmm1
343 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
344 ; X64-NEXT: movdqa %xmm1, %xmm0
346 %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
347 %r = add <4 x i32> %t0, %b
350 define <4 x i32> @vec_sink_sub_from_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
351 ; X32-LABEL: vec_sink_sub_from_const_to_add1:
353 ; X32-NEXT: psubd %xmm0, %xmm1
354 ; X32-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
355 ; X32-NEXT: movdqa %xmm1, %xmm0
358 ; X64-LABEL: vec_sink_sub_from_const_to_add1:
360 ; X64-NEXT: psubd %xmm0, %xmm1
361 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
362 ; X64-NEXT: movdqa %xmm1, %xmm0
364 %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
365 %r = add <4 x i32> %b, %t0
369 ; sub (add %x, C), %y
370 ; sub %y, (add %x, C)
372 define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
373 ; X32-LABEL: vec_sink_add_of_const_to_sub:
375 ; X32-NEXT: psubd %xmm1, %xmm0
376 ; X32-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
379 ; X64-LABEL: vec_sink_add_of_const_to_sub:
381 ; X64-NEXT: psubd %xmm1, %xmm0
382 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
384 %t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
385 %r = sub <4 x i32> %t0, %b
388 define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
389 ; X32-LABEL: vec_sink_add_of_const_to_sub2:
391 ; X32-NEXT: psubd %xmm0, %xmm1
392 ; X32-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
393 ; X32-NEXT: movdqa %xmm1, %xmm0
396 ; X64-LABEL: vec_sink_add_of_const_to_sub2:
398 ; X64-NEXT: psubd %xmm0, %xmm1
399 ; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
400 ; X64-NEXT: movdqa %xmm1, %xmm0
402 %t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
403 %r = sub <4 x i32> %b, %t0
407 ; sub (sub %x, C), %y
408 ; sub %y, (sub %x, C)
410 define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
411 ; X32-LABEL: vec_sink_sub_of_const_to_sub:
413 ; X32-NEXT: psubd %xmm1, %xmm0
414 ; X32-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
417 ; X64-LABEL: vec_sink_sub_of_const_to_sub:
419 ; X64-NEXT: psubd %xmm1, %xmm0
420 ; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
422 %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
423 %r = sub <4 x i32> %t0, %b
426 define <4 x i32> @vec_sink_sub_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
427 ; X32-LABEL: vec_sink_sub_of_const_to_sub2:
429 ; X32-NEXT: psubd %xmm0, %xmm1
430 ; X32-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
431 ; X32-NEXT: movdqa %xmm1, %xmm0
434 ; X64-LABEL: vec_sink_sub_of_const_to_sub2:
436 ; X64-NEXT: psubd %xmm0, %xmm1
437 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
438 ; X64-NEXT: movdqa %xmm1, %xmm0
440 %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
441 %r = sub <4 x i32> %b, %t0
445 ; sub (sub C, %x), %y
446 ; sub %y, (sub C, %x)
448 define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
449 ; ALL-LABEL: vec_sink_sub_from_const_to_sub:
451 ; ALL-NEXT: movdqa {{.*#+}} xmm2 = <42,24,u,46>
452 ; ALL-NEXT: paddd %xmm1, %xmm0
453 ; ALL-NEXT: psubd %xmm0, %xmm2
454 ; ALL-NEXT: movdqa %xmm2, %xmm0
455 ; ALL-NEXT: ret{{[l|q]}}
456 %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
457 %r = sub <4 x i32> %t0, %b
460 define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
461 ; X32-LABEL: vec_sink_sub_from_const_to_sub2:
463 ; X32-NEXT: paddd %xmm1, %xmm0
464 ; X32-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
467 ; X64-LABEL: vec_sink_sub_from_const_to_sub2:
469 ; X64-NEXT: paddd %xmm1, %xmm0
470 ; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
472 %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
473 %r = sub <4 x i32> %b, %t0