1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -verify-machineinstrs | FileCheck %s
4 ; This file tests following optimization
6 ; leal (%rdx,%rax), %esi
9 ; can be transformed to
14 ; C - (A + B) --> C - A - B
15 define i32 @test1(ptr %p, i32 %a, i32 %b, i32 %c) {
17 ; CHECK: # %bb.0: # %entry
18 ; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
19 ; CHECK-NEXT: movl %esi, %eax
20 ; CHECK-NEXT: subl %edx, %ecx
21 ; CHECK-NEXT: subl %eax, %ecx
22 ; CHECK-NEXT: movl %ecx, (%rdi)
23 ; CHECK-NEXT: subl %edx, %eax
24 ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
29 store i32 %sub, ptr %p, align 4
30 %sub1 = sub i32 %a, %b
34 ; (A + B) + C --> C + A + B
35 define i32 @test2(ptr %p, i32 %a, i32 %b, i32 %c) {
37 ; CHECK: # %bb.0: # %entry
38 ; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
39 ; CHECK-NEXT: movl %esi, %eax
40 ; CHECK-NEXT: addl %eax, %ecx
41 ; CHECK-NEXT: addl %edx, %ecx
42 ; CHECK-NEXT: movl %ecx, (%rdi)
43 ; CHECK-NEXT: subl %edx, %eax
44 ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
49 store i32 %1, ptr %p, align 4
50 %sub1 = sub i32 %a, %b
54 ; C + (A + B) --> C + A + B
55 define i32 @test3(ptr %p, i32 %a, i32 %b, i32 %c) {
57 ; CHECK: # %bb.0: # %entry
58 ; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
59 ; CHECK-NEXT: movl %esi, %eax
60 ; CHECK-NEXT: addl %eax, %ecx
61 ; CHECK-NEXT: addl %edx, %ecx
62 ; CHECK-NEXT: movl %ecx, (%rdi)
63 ; CHECK-NEXT: subl %edx, %eax
64 ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
69 store i32 %1, ptr %p, align 4
70 %sub1 = sub i32 %a, %b
75 ; Can't be converted to A - C + B without introduce MOV
76 define i32 @test4(ptr %p, i32 %a, i32 %b, i32 %c) {
78 ; CHECK: # %bb.0: # %entry
79 ; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
80 ; CHECK-NEXT: movl %esi, %eax
81 ; CHECK-NEXT: leal (%rdx,%rax), %esi
82 ; CHECK-NEXT: subl %ecx, %esi
83 ; CHECK-NEXT: movl %esi, (%rdi)
84 ; CHECK-NEXT: subl %edx, %eax
85 ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
90 store i32 %sub, ptr %p, align 4
91 %sub1 = sub i32 %a, %b
95 define i64 @test5(ptr %p, i64 %a, i64 %b, i64 %c) {
97 ; CHECK: # %bb.0: # %entry
98 ; CHECK-NEXT: movq (%rdi), %rax
99 ; CHECK-NEXT: subq %rdx, %rcx
100 ; CHECK-NEXT: subq %rax, %rcx
101 ; CHECK-NEXT: movq %rcx, (%rdi)
102 ; CHECK-NEXT: subq %rdx, %rax
105 %ld = load i64, ptr %p, align 8
107 %sub = sub i64 %c, %0
108 store i64 %sub, ptr %p, align 8
109 %sub1 = sub i64 %ld, %b
113 define i64 @test6(ptr %p, i64 %a, i64 %b, i64 %c) {
114 ; CHECK-LABEL: test6:
115 ; CHECK: # %bb.0: # %entry
116 ; CHECK-NEXT: movq (%rdi), %rax
117 ; CHECK-NEXT: addq %rdx, %rcx
118 ; CHECK-NEXT: addq %rax, %rcx
119 ; CHECK-NEXT: movq %rcx, (%rdi)
120 ; CHECK-NEXT: subq %rdx, %rax
123 %ld = load i64, ptr %p, align 8
126 store i64 %1, ptr %p, align 8
127 %sub1 = sub i64 %ld, %b
131 define i64 @test7(ptr %p, i64 %a, i64 %b, i64 %c) {
132 ; CHECK-LABEL: test7:
133 ; CHECK: # %bb.0: # %entry
134 ; CHECK-NEXT: movq (%rdi), %rax
135 ; CHECK-NEXT: addq %rdx, %rcx
136 ; CHECK-NEXT: addq %rax, %rcx
137 ; CHECK-NEXT: movq %rcx, (%rdi)
138 ; CHECK-NEXT: subq %rdx, %rax
141 %ld = load i64, ptr %p, align 8
144 store i64 %1, ptr %p, align 8
145 %sub1 = sub i64 %ld, %b
149 ; The sub instruction generated flags is used by following branch,
150 ; so it should not be transformed.
151 define i64 @test8(ptr %p, i64 %a, i64 %b, i64 %c) {
152 ; CHECK-LABEL: test8:
153 ; CHECK: # %bb.0: # %entry
154 ; CHECK-NEXT: movq (%rdi), %rax
155 ; CHECK-NEXT: leaq (%rdx,%rax), %rsi
156 ; CHECK-NEXT: subq %rsi, %rcx
157 ; CHECK-NEXT: ja .LBB7_2
158 ; CHECK-NEXT: # %bb.1: # %then
159 ; CHECK-NEXT: movq %rcx, (%rdi)
160 ; CHECK-NEXT: subq %rdx, %rax
162 ; CHECK-NEXT: .LBB7_2: # %else
163 ; CHECK-NEXT: movq $0, (%rdi)
164 ; CHECK-NEXT: subq %rdx, %rax
167 %ld = load i64, ptr %p, align 8
169 %sub = sub i64 %c, %0
170 %cond = icmp ule i64 %c, %0
171 br i1 %cond, label %then, label %else
174 store i64 %sub, ptr %p, align 8
178 store i64 0, ptr %p, align 8
182 %sub1 = sub i64 %ld, %b
187 ; The sub register usage of lea dest should block the transformation.
188 define void @test9(i64 %p, i64 %s) {
189 ; CHECK-LABEL: test9:
190 ; CHECK: # %bb.0: # %entry
191 ; CHECK-NEXT: leaq (%rsi,%rdi), %rax
192 ; CHECK-NEXT: xorl %ecx, %ecx
193 ; CHECK-NEXT: testl $4095, %eax # imm = 0xFFF
194 ; CHECK-NEXT: setne %cl
195 ; CHECK-NEXT: shll $12, %ecx
196 ; CHECK-NEXT: addq %rax, %rcx
197 ; CHECK-NEXT: andq $-4096, %rcx # imm = 0xF000
198 ; CHECK-NEXT: addq %rcx, %rdi
199 ; CHECK-NEXT: jmp bar@PLT # TAILCALL
201 %add = add i64 %s, %p
202 %rem = and i64 %add, 4095
203 %cmp.not = icmp eq i64 %rem, 0
204 %add18 = select i1 %cmp.not, i64 0, i64 4096
205 %div9 = add i64 %add18, %add
206 %mul = and i64 %div9, -4096
207 %add2 = add i64 %mul, %p
208 tail call void @bar(i64 %add2, i64 %s)
212 define void @test10() {
213 ; CHECK-LABEL: test10:
214 ; CHECK: # %bb.0: # %entry
215 ; CHECK-NEXT: movl (%rax), %eax
216 ; CHECK-NEXT: movzwl (%rax), %ecx
217 ; CHECK-NEXT: leal (%rcx,%rcx,2), %esi
218 ; CHECK-NEXT: movl %ecx, %edi
219 ; CHECK-NEXT: subl %ecx, %edi
220 ; CHECK-NEXT: subl %ecx, %edi
221 ; CHECK-NEXT: negl %esi
222 ; CHECK-NEXT: xorl %ecx, %ecx
223 ; CHECK-NEXT: cmpl $4, %eax
224 ; CHECK-NEXT: movl %edi, (%rax)
225 ; CHECK-NEXT: movl %esi, (%rax)
226 ; CHECK-NEXT: cmovnel %eax, %ecx
227 ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
228 ; CHECK-NEXT: sarl %cl, %esi
229 ; CHECK-NEXT: movl %esi, (%rax)
232 %tmp = load i32, ptr undef, align 4
233 %tmp3 = sdiv i32 undef, 6
234 %tmp4 = load i32, ptr undef, align 4
235 %tmp5 = icmp eq i32 %tmp4, 4
236 %tmp6 = select i1 %tmp5, i32 %tmp3, i32 %tmp
237 %tmp10 = load i16, ptr undef, align 2
238 %tmp11 = zext i16 %tmp10 to i32
239 %tmp13 = zext i16 undef to i32
240 %tmp15 = load i16, ptr undef, align 2
241 %tmp16 = zext i16 %tmp15 to i32
242 %tmp19 = shl nsw i32 undef, 1
243 %tmp25 = shl nsw i32 undef, 1
244 %tmp26 = add nsw i32 %tmp25, %tmp13
245 %tmp28 = shl nsw i32 undef, 1
246 %tmp29 = add nsw i32 %tmp28, %tmp16
247 %tmp30 = sub nsw i32 %tmp19, %tmp29
248 %tmp31 = sub nsw i32 %tmp11, %tmp26
249 %tmp32 = shl nsw i32 %tmp30, 1
250 %tmp33 = add nsw i32 %tmp32, %tmp31
251 store i32 %tmp33, ptr undef, align 4
252 %tmp34 = mul nsw i32 %tmp31, -2
253 %tmp35 = add nsw i32 %tmp34, %tmp30
254 store i32 %tmp35, ptr undef, align 4
255 %tmp36 = select i1 %tmp5, i32 undef, i32 undef
256 %tmp38 = load i32, ptr undef, align 4
257 %tmp39 = ashr i32 %tmp38, %tmp6
258 store i32 %tmp39, ptr undef, align 4
262 declare void @bar(i64, i64)