1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=i686-- < %s | FileCheck %s --check-prefix=X86
3 ; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s --check-prefix=X64
5 ; Optimize expanded SRL/SHL used as an input of
6 ; SETCC comparing it with zero by removing rotation.
8 ; See https://bugs.llvm.org/show_bug.cgi?id=50197
9 define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
10 ; X86-LABEL: opt_setcc_lt_power_of_2:
12 ; X86-NEXT: pushl %ebp
13 ; X86-NEXT: pushl %ebx
14 ; X86-NEXT: pushl %edi
15 ; X86-NEXT: pushl %esi
16 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
17 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
18 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
19 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
20 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
21 ; X86-NEXT: .p2align 4, 0x90
22 ; X86-NEXT: .LBB0_1: # %loop
23 ; X86-NEXT: # =>This Inner Loop Header: Depth=1
24 ; X86-NEXT: addl $1, %edi
25 ; X86-NEXT: adcl $0, %esi
26 ; X86-NEXT: adcl $0, %edx
27 ; X86-NEXT: adcl $0, %ecx
28 ; X86-NEXT: movl %edx, %ebx
29 ; X86-NEXT: orl %ecx, %ebx
30 ; X86-NEXT: movl %esi, %ebp
31 ; X86-NEXT: orl %ebx, %ebp
32 ; X86-NEXT: shrdl $28, %ebx, %ebp
33 ; X86-NEXT: jne .LBB0_1
34 ; X86-NEXT: # %bb.2: # %exit
35 ; X86-NEXT: movl %edi, (%eax)
36 ; X86-NEXT: movl %esi, 4(%eax)
37 ; X86-NEXT: movl %edx, 8(%eax)
38 ; X86-NEXT: movl %ecx, 12(%eax)
45 ; X64-LABEL: opt_setcc_lt_power_of_2:
47 ; X64-NEXT: movq %rsi, %rdx
48 ; X64-NEXT: movq %rdi, %rax
49 ; X64-NEXT: .p2align 4, 0x90
50 ; X64-NEXT: .LBB0_1: # %loop
51 ; X64-NEXT: # =>This Inner Loop Header: Depth=1
52 ; X64-NEXT: addq $1, %rax
53 ; X64-NEXT: adcq $0, %rdx
54 ; X64-NEXT: movq %rax, %rcx
55 ; X64-NEXT: shrq $60, %rcx
56 ; X64-NEXT: orq %rdx, %rcx
57 ; X64-NEXT: jne .LBB0_1
58 ; X64-NEXT: # %bb.2: # %exit
63 %phi.a = phi i128 [ %a, %0 ], [ %inc, %loop ]
64 %inc = add i128 %phi.a, 1
65 %cmp = icmp ult i128 %inc, 1152921504606846976
66 br i1 %cmp, label %exit, label %loop
72 define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
73 ; X86-LABEL: opt_setcc_srl_eq_zero:
75 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
76 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
77 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
78 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
79 ; X86-NEXT: orl %ecx, %edx
80 ; X86-NEXT: orl %eax, %edx
81 ; X86-NEXT: orl %ecx, %eax
82 ; X86-NEXT: shldl $15, %edx, %eax
86 ; X64-LABEL: opt_setcc_srl_eq_zero:
88 ; X64-NEXT: shrq $17, %rdi
89 ; X64-NEXT: orq %rsi, %rdi
92 %srl = lshr i128 %a, 17
93 %cmp = icmp eq i128 %srl, 0
97 define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
98 ; X86-LABEL: opt_setcc_srl_ne_zero:
100 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
101 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
102 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
103 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
104 ; X86-NEXT: orl %ecx, %edx
105 ; X86-NEXT: orl %eax, %edx
106 ; X86-NEXT: orl %ecx, %eax
107 ; X86-NEXT: shldl $15, %edx, %eax
108 ; X86-NEXT: setne %al
111 ; X64-LABEL: opt_setcc_srl_ne_zero:
113 ; X64-NEXT: shrq $17, %rdi
114 ; X64-NEXT: orq %rsi, %rdi
115 ; X64-NEXT: setne %al
117 %srl = lshr i128 %a, 17
118 %cmp = icmp ne i128 %srl, 0
122 define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
123 ; X86-LABEL: opt_setcc_shl_eq_zero:
125 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
126 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
127 ; X86-NEXT: shll $17, %ecx
128 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
129 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
130 ; X86-NEXT: orl %ecx, %eax
134 ; X64-LABEL: opt_setcc_shl_eq_zero:
136 ; X64-NEXT: shlq $17, %rsi
137 ; X64-NEXT: orq %rdi, %rsi
140 %shl = shl i128 %a, 17
141 %cmp = icmp eq i128 %shl, 0
145 define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
146 ; X86-LABEL: opt_setcc_shl_ne_zero:
148 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
149 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
150 ; X86-NEXT: shll $17, %ecx
151 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
152 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
153 ; X86-NEXT: orl %ecx, %eax
154 ; X86-NEXT: setne %al
157 ; X64-LABEL: opt_setcc_shl_ne_zero:
159 ; X64-NEXT: shlq $17, %rsi
160 ; X64-NEXT: orq %rdi, %rsi
161 ; X64-NEXT: setne %al
163 %shl = shl i128 %a, 17
164 %cmp = icmp ne i128 %shl, 0
168 ; Negative test: optimization should not be applied if shift has multiple users.
169 define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
170 ; X86-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
172 ; X86-NEXT: pushl %ebx
173 ; X86-NEXT: pushl %edi
174 ; X86-NEXT: pushl %esi
175 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
176 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
177 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
178 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
179 ; X86-NEXT: shldl $17, %esi, %edx
180 ; X86-NEXT: shldl $17, %ecx, %esi
181 ; X86-NEXT: shldl $17, %eax, %ecx
182 ; X86-NEXT: shll $17, %eax
183 ; X86-NEXT: movl %ecx, %edi
184 ; X86-NEXT: orl %edx, %edi
185 ; X86-NEXT: movl %eax, %ebx
186 ; X86-NEXT: orl %esi, %ebx
187 ; X86-NEXT: orl %edi, %ebx
189 ; X86-NEXT: pushl %edx
190 ; X86-NEXT: pushl %esi
191 ; X86-NEXT: pushl %ecx
192 ; X86-NEXT: pushl %eax
193 ; X86-NEXT: calll use@PLT
194 ; X86-NEXT: addl $16, %esp
195 ; X86-NEXT: movl %ebx, %eax
196 ; X86-NEXT: popl %esi
197 ; X86-NEXT: popl %edi
198 ; X86-NEXT: popl %ebx
201 ; X64-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
203 ; X64-NEXT: pushq %rbx
204 ; X64-NEXT: shldq $17, %rdi, %rsi
205 ; X64-NEXT: shlq $17, %rdi
206 ; X64-NEXT: movq %rdi, %rax
207 ; X64-NEXT: orq %rsi, %rax
209 ; X64-NEXT: callq use@PLT
210 ; X64-NEXT: movl %ebx, %eax
211 ; X64-NEXT: popq %rbx
213 %shl = shl i128 %a, 17
214 %cmp = icmp eq i128 %shl, 0
215 call void @use(i128 %shl)
219 ; Check that optimization is applied to DAG having appropriate shape
220 ; even if there were no actual shift's expansion.
221 define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
222 ; X86-LABEL: opt_setcc_expanded_shl_correct_shifts:
224 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
225 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
226 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
227 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
228 ; X86-NEXT: orl %eax, %ecx
229 ; X86-NEXT: shldl $17, %eax, %ecx
233 ; X64-LABEL: opt_setcc_expanded_shl_correct_shifts:
235 ; X64-NEXT: shlq $17, %rdi
236 ; X64-NEXT: orq %rsi, %rdi
239 %shl.a = shl i64 %a, 17
240 %srl.b = lshr i64 %b, 47
241 %or.0 = or i64 %shl.a, %srl.b
242 %shl.b = shl i64 %b, 17
243 %or.1 = or i64 %or.0, %shl.b
244 %cmp = icmp eq i64 %or.1, 0
248 ; Negative test: optimization should not be applied as
249 ; constants used in shifts do not match.
250 define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind {
251 ; X86-LABEL: opt_setcc_expanded_shl_wrong_shifts:
253 ; X86-NEXT: pushl %esi
254 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
255 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
256 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
257 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
258 ; X86-NEXT: shldl $17, %edx, %esi
259 ; X86-NEXT: shldl $17, %ecx, %edx
260 ; X86-NEXT: shldl $18, %eax, %ecx
261 ; X86-NEXT: shll $18, %eax
262 ; X86-NEXT: orl %edx, %eax
263 ; X86-NEXT: orl %esi, %ecx
264 ; X86-NEXT: orl %eax, %ecx
266 ; X86-NEXT: popl %esi
269 ; X64-LABEL: opt_setcc_expanded_shl_wrong_shifts:
271 ; X64-NEXT: shldq $17, %rsi, %rdi
272 ; X64-NEXT: shlq $18, %rsi
273 ; X64-NEXT: orq %rdi, %rsi
276 %shl.a = shl i64 %a, 17
277 %srl.b = lshr i64 %b, 47
278 %or.0 = or i64 %shl.a, %srl.b
279 %shl.b = shl i64 %b, 18
280 %or.1 = or i64 %or.0, %shl.b
281 %cmp = icmp eq i64 %or.1, 0
285 declare void @use(i128 %a)