1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=i686-- < %s | FileCheck %s --check-prefix=X86
3 ; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s --check-prefix=X64
5 ; Optimize expanded SRL/SHL used as an input of
6 ; SETCC comparing it with zero by removing rotation.
8 ; See https://bugs.llvm.org/show_bug.cgi?id=50197
9 define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
10 ; X86-LABEL: opt_setcc_lt_power_of_2:
12 ; X86-NEXT: pushl %ebp
13 ; X86-NEXT: pushl %ebx
14 ; X86-NEXT: pushl %edi
15 ; X86-NEXT: pushl %esi
16 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
17 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
18 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
19 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
20 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
21 ; X86-NEXT: .p2align 4, 0x90
22 ; X86-NEXT: .LBB0_1: # %loop
23 ; X86-NEXT: # =>This Inner Loop Header: Depth=1
24 ; X86-NEXT: addl $1, %edi
25 ; X86-NEXT: adcl $0, %esi
26 ; X86-NEXT: adcl $0, %edx
27 ; X86-NEXT: adcl $0, %ecx
28 ; X86-NEXT: movl %edx, %ebx
29 ; X86-NEXT: orl %ecx, %ebx
30 ; X86-NEXT: movl %esi, %ebp
31 ; X86-NEXT: orl %edx, %ebp
32 ; X86-NEXT: orl %ecx, %ebp
33 ; X86-NEXT: shrdl $28, %ebx, %ebp
34 ; X86-NEXT: jne .LBB0_1
35 ; X86-NEXT: # %bb.2: # %exit
36 ; X86-NEXT: movl %edi, (%eax)
37 ; X86-NEXT: movl %esi, 4(%eax)
38 ; X86-NEXT: movl %edx, 8(%eax)
39 ; X86-NEXT: movl %ecx, 12(%eax)
46 ; X64-LABEL: opt_setcc_lt_power_of_2:
48 ; X64-NEXT: movq %rsi, %rdx
49 ; X64-NEXT: movq %rdi, %rax
50 ; X64-NEXT: .p2align 4, 0x90
51 ; X64-NEXT: .LBB0_1: # %loop
52 ; X64-NEXT: # =>This Inner Loop Header: Depth=1
53 ; X64-NEXT: addq $1, %rax
54 ; X64-NEXT: adcq $0, %rdx
55 ; X64-NEXT: movq %rax, %rcx
56 ; X64-NEXT: shrq $60, %rcx
57 ; X64-NEXT: orq %rdx, %rcx
58 ; X64-NEXT: jne .LBB0_1
59 ; X64-NEXT: # %bb.2: # %exit
64 %phi.a = phi i128 [ %a, %0 ], [ %inc, %loop ]
65 %inc = add i128 %phi.a, 1
66 %cmp = icmp ult i128 %inc, 1152921504606846976
67 br i1 %cmp, label %exit, label %loop
73 define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
74 ; X86-LABEL: opt_setcc_srl_eq_zero:
76 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
77 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
78 ; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
79 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
80 ; X86-NEXT: orl %eax, %edx
81 ; X86-NEXT: orl %ecx, %edx
82 ; X86-NEXT: orl %eax, %ecx
83 ; X86-NEXT: shldl $15, %edx, %ecx
87 ; X64-LABEL: opt_setcc_srl_eq_zero:
89 ; X64-NEXT: shrq $17, %rdi
90 ; X64-NEXT: orq %rsi, %rdi
93 %srl = lshr i128 %a, 17
94 %cmp = icmp eq i128 %srl, 0
98 define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
99 ; X86-LABEL: opt_setcc_srl_ne_zero:
101 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
102 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
103 ; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
104 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
105 ; X86-NEXT: orl %eax, %edx
106 ; X86-NEXT: orl %ecx, %edx
107 ; X86-NEXT: orl %eax, %ecx
108 ; X86-NEXT: shldl $15, %edx, %ecx
109 ; X86-NEXT: setne %al
112 ; X64-LABEL: opt_setcc_srl_ne_zero:
114 ; X64-NEXT: shrq $17, %rdi
115 ; X64-NEXT: orq %rsi, %rdi
116 ; X64-NEXT: setne %al
118 %srl = lshr i128 %a, 17
119 %cmp = icmp ne i128 %srl, 0
123 define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
124 ; X86-LABEL: opt_setcc_shl_eq_zero:
126 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
127 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
128 ; X86-NEXT: shll $17, %ecx
129 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
130 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
131 ; X86-NEXT: orl %ecx, %eax
135 ; X64-LABEL: opt_setcc_shl_eq_zero:
137 ; X64-NEXT: shlq $17, %rsi
138 ; X64-NEXT: orq %rdi, %rsi
141 %shl = shl i128 %a, 17
142 %cmp = icmp eq i128 %shl, 0
146 define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
147 ; X86-LABEL: opt_setcc_shl_ne_zero:
149 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
150 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
151 ; X86-NEXT: shll $17, %ecx
152 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
153 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
154 ; X86-NEXT: orl %ecx, %eax
155 ; X86-NEXT: setne %al
158 ; X64-LABEL: opt_setcc_shl_ne_zero:
160 ; X64-NEXT: shlq $17, %rsi
161 ; X64-NEXT: orq %rdi, %rsi
162 ; X64-NEXT: setne %al
164 %shl = shl i128 %a, 17
165 %cmp = icmp ne i128 %shl, 0
169 ; Negative test: optimization should not be applied if shift has multiple users.
170 define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
171 ; X86-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
173 ; X86-NEXT: pushl %ebx
174 ; X86-NEXT: pushl %edi
175 ; X86-NEXT: pushl %esi
176 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
177 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
178 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
179 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
180 ; X86-NEXT: shldl $17, %edx, %esi
181 ; X86-NEXT: shldl $17, %ecx, %edx
182 ; X86-NEXT: shldl $17, %eax, %ecx
183 ; X86-NEXT: shll $17, %eax
184 ; X86-NEXT: movl %ecx, %edi
185 ; X86-NEXT: orl %esi, %edi
186 ; X86-NEXT: movl %eax, %ebx
187 ; X86-NEXT: orl %edx, %ebx
188 ; X86-NEXT: orl %edi, %ebx
190 ; X86-NEXT: pushl %esi
191 ; X86-NEXT: pushl %edx
192 ; X86-NEXT: pushl %ecx
193 ; X86-NEXT: pushl %eax
194 ; X86-NEXT: calll use@PLT
195 ; X86-NEXT: addl $16, %esp
196 ; X86-NEXT: movl %ebx, %eax
197 ; X86-NEXT: popl %esi
198 ; X86-NEXT: popl %edi
199 ; X86-NEXT: popl %ebx
202 ; X64-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
204 ; X64-NEXT: pushq %rbx
205 ; X64-NEXT: shldq $17, %rdi, %rsi
206 ; X64-NEXT: shlq $17, %rdi
207 ; X64-NEXT: movq %rdi, %rax
208 ; X64-NEXT: orq %rsi, %rax
210 ; X64-NEXT: callq use@PLT
211 ; X64-NEXT: movl %ebx, %eax
212 ; X64-NEXT: popq %rbx
214 %shl = shl i128 %a, 17
215 %cmp = icmp eq i128 %shl, 0
216 call void @use(i128 %shl)
220 ; Check that optimization is applied to DAG having appropriate shape
221 ; even if there were no actual shift's expansion.
222 define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
223 ; X86-LABEL: opt_setcc_expanded_shl_correct_shifts:
225 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
226 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
227 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
228 ; X86-NEXT: shll $17, %ecx
229 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
230 ; X86-NEXT: orl %ecx, %eax
234 ; X64-LABEL: opt_setcc_expanded_shl_correct_shifts:
236 ; X64-NEXT: shlq $17, %rdi
237 ; X64-NEXT: orq %rsi, %rdi
240 %shl.a = shl i64 %a, 17
241 %srl.b = lshr i64 %b, 47
242 %or.0 = or i64 %shl.a, %srl.b
243 %shl.b = shl i64 %b, 17
244 %or.1 = or i64 %or.0, %shl.b
245 %cmp = icmp eq i64 %or.1, 0
249 ; Negative test: optimization should not be applied as
250 ; constants used in shifts do not match.
251 define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind {
252 ; X86-LABEL: opt_setcc_expanded_shl_wrong_shifts:
254 ; X86-NEXT: pushl %esi
255 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
256 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
257 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
258 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
259 ; X86-NEXT: shldl $17, %edx, %esi
260 ; X86-NEXT: shldl $17, %ecx, %edx
261 ; X86-NEXT: shldl $18, %eax, %ecx
262 ; X86-NEXT: shll $18, %eax
263 ; X86-NEXT: orl %edx, %eax
264 ; X86-NEXT: orl %esi, %ecx
265 ; X86-NEXT: orl %eax, %ecx
267 ; X86-NEXT: popl %esi
270 ; X64-LABEL: opt_setcc_expanded_shl_wrong_shifts:
272 ; X64-NEXT: shldq $17, %rsi, %rdi
273 ; X64-NEXT: shlq $18, %rsi
274 ; X64-NEXT: orq %rdi, %rsi
277 %shl.a = shl i64 %a, 17
278 %srl.b = lshr i64 %b, 47
279 %or.0 = or i64 %shl.a, %srl.b
280 %shl.b = shl i64 %b, 18
281 %or.1 = or i64 %or.0, %shl.b
282 %cmp = icmp eq i64 %or.1, 0
286 declare void @use(i128 %a)