1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86
3 ; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64
5 ; fold (shl (zext (lshr (A, X))), X) -> (zext (shl (lshr (A, X)), X))
7 ; Canolicalize the sequence shl/zext/lshr performing the zeroextend
8 ; as the last instruction of the sequence.
9 ; This will help DAGCombiner to identify and then fold the sequence
10 ; of shifts into a single AND.
11 ; This transformation is profitable if the shift amounts are the same
12 ; and if there is only one use of the zext.
14 define i16 @fun1(i8 zeroext %v) {
16 ; X86: # %bb.0: # %entry
17 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
18 ; X86-NEXT: andl $-16, %eax
19 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
23 ; X64: # %bb.0: # %entry
24 ; X64-NEXT: movl %edi, %eax
25 ; X64-NEXT: andl $-16, %eax
26 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
30 %ext = zext i8 %shr to i16
31 %shl = shl i16 %ext, 4
35 define i32 @fun2(i8 zeroext %v) {
37 ; X86: # %bb.0: # %entry
38 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
39 ; X86-NEXT: andl $-16, %eax
43 ; X64: # %bb.0: # %entry
44 ; X64-NEXT: movl %edi, %eax
45 ; X64-NEXT: andl $-16, %eax
49 %ext = zext i8 %shr to i32
50 %shl = shl i32 %ext, 4
54 define i32 @fun3(i16 zeroext %v) {
56 ; X86: # %bb.0: # %entry
57 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
58 ; X86-NEXT: andl $-16, %eax
62 ; X64: # %bb.0: # %entry
63 ; X64-NEXT: movl %edi, %eax
64 ; X64-NEXT: andl $-16, %eax
68 %ext = zext i16 %shr to i32
69 %shl = shl i32 %ext, 4
73 define i64 @fun4(i8 zeroext %v) {
75 ; X86: # %bb.0: # %entry
76 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
77 ; X86-NEXT: andl $-16, %eax
78 ; X86-NEXT: xorl %edx, %edx
82 ; X64: # %bb.0: # %entry
83 ; X64-NEXT: movl %edi, %eax
84 ; X64-NEXT: andl $-16, %eax
88 %ext = zext i8 %shr to i64
89 %shl = shl i64 %ext, 4
93 define i64 @fun5(i16 zeroext %v) {
95 ; X86: # %bb.0: # %entry
96 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
97 ; X86-NEXT: andl $-16, %eax
98 ; X86-NEXT: xorl %edx, %edx
102 ; X64: # %bb.0: # %entry
103 ; X64-NEXT: movl %edi, %eax
104 ; X64-NEXT: andl $-16, %eax
107 %shr = lshr i16 %v, 4
108 %ext = zext i16 %shr to i64
109 %shl = shl i64 %ext, 4
113 define i64 @fun6(i32 zeroext %v) {
115 ; X86: # %bb.0: # %entry
116 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
117 ; X86-NEXT: andl $-16, %eax
118 ; X86-NEXT: xorl %edx, %edx
122 ; X64: # %bb.0: # %entry
123 ; X64-NEXT: movl %edi, %eax
124 ; X64-NEXT: andl $-16, %eax
127 %shr = lshr i32 %v, 4
128 %ext = zext i32 %shr to i64
129 %shl = shl i64 %ext, 4
133 ; Don't fold the pattern if we use arithmetic shifts.
135 define i64 @fun7(i8 zeroext %v) {
137 ; X86: # %bb.0: # %entry
138 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
139 ; X86-NEXT: sarb $4, %al
140 ; X86-NEXT: movzbl %al, %eax
141 ; X86-NEXT: shll $4, %eax
142 ; X86-NEXT: xorl %edx, %edx
146 ; X64: # %bb.0: # %entry
147 ; X64-NEXT: sarb $4, %dil
148 ; X64-NEXT: movzbl %dil, %eax
149 ; X64-NEXT: shll $4, %eax
153 %ext = zext i8 %shr to i64
154 %shl = shl i64 %ext, 4
158 define i64 @fun8(i16 zeroext %v) {
160 ; X86: # %bb.0: # %entry
161 ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
162 ; X86-NEXT: andl $1048560, %eax # imm = 0xFFFF0
163 ; X86-NEXT: xorl %edx, %edx
167 ; X64: # %bb.0: # %entry
168 ; X64-NEXT: movswl %di, %eax
169 ; X64-NEXT: andl $1048560, %eax # imm = 0xFFFF0
172 %shr = ashr i16 %v, 4
173 %ext = zext i16 %shr to i64
174 %shl = shl i64 %ext, 4
178 define i64 @fun9(i32 zeroext %v) {
180 ; X86: # %bb.0: # %entry
181 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
182 ; X86-NEXT: movl %eax, %edx
183 ; X86-NEXT: sarl $4, %edx
184 ; X86-NEXT: andl $-16, %eax
185 ; X86-NEXT: shrl $28, %edx
189 ; X64: # %bb.0: # %entry
190 ; X64-NEXT: movl %edi, %eax
191 ; X64-NEXT: sarl $4, %eax
192 ; X64-NEXT: shlq $4, %rax
195 %shr = ashr i32 %v, 4
196 %ext = zext i32 %shr to i64
197 %shl = shl i64 %ext, 4
201 ; Don't fold the pattern if there is more than one use of the
202 ; operand in input to the shift left.
204 define i64 @fun10(i8 zeroext %v) {
206 ; X86: # %bb.0: # %entry
207 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
208 ; X86-NEXT: shrb $4, %al
209 ; X86-NEXT: movzbl %al, %ecx
210 ; X86-NEXT: movl %ecx, %eax
211 ; X86-NEXT: shll $4, %eax
212 ; X86-NEXT: orl %ecx, %eax
213 ; X86-NEXT: xorl %edx, %edx
217 ; X64: # %bb.0: # %entry
218 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
219 ; X64-NEXT: movl %edi, %eax
220 ; X64-NEXT: shrb $4, %al
221 ; X64-NEXT: movzbl %al, %eax
222 ; X64-NEXT: andl $-16, %edi
223 ; X64-NEXT: orq %rdi, %rax
227 %ext = zext i8 %shr to i64
228 %shl = shl i64 %ext, 4
229 %add = add i64 %shl, %ext
233 define i64 @fun11(i16 zeroext %v) {
235 ; X86: # %bb.0: # %entry
236 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
237 ; X86-NEXT: movl %eax, %ecx
238 ; X86-NEXT: shrl $4, %ecx
239 ; X86-NEXT: andl $-16, %eax
240 ; X86-NEXT: addl %ecx, %eax
241 ; X86-NEXT: xorl %edx, %edx
245 ; X64: # %bb.0: # %entry
246 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
247 ; X64-NEXT: movl %edi, %eax
248 ; X64-NEXT: shrl $4, %eax
249 ; X64-NEXT: andl $-16, %edi
250 ; X64-NEXT: addq %rdi, %rax
253 %shr = lshr i16 %v, 4
254 %ext = zext i16 %shr to i64
255 %shl = shl i64 %ext, 4
256 %add = add i64 %shl, %ext
260 define i64 @fun12(i32 zeroext %v) {
262 ; X86: # %bb.0: # %entry
263 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
264 ; X86-NEXT: movl %eax, %ecx
265 ; X86-NEXT: shrl $4, %ecx
266 ; X86-NEXT: andl $-16, %eax
267 ; X86-NEXT: xorl %edx, %edx
268 ; X86-NEXT: addl %ecx, %eax
273 ; X64: # %bb.0: # %entry
274 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
275 ; X64-NEXT: movl %edi, %eax
276 ; X64-NEXT: shrl $4, %eax
277 ; X64-NEXT: andl $-16, %edi
278 ; X64-NEXT: addq %rdi, %rax
281 %shr = lshr i32 %v, 4
282 %ext = zext i32 %shr to i64
283 %shl = shl i64 %ext, 4
284 %add = add i64 %shl, %ext
289 ; Make sure that the combined dags are legal if we run the DAGCombiner after
290 ; Legalization took place. The add instruction is redundant and increases by
291 ; one the number of uses of the zext. This prevents the transformation from
292 ; firing before dags are legalized and optimized.
293 ; Once the add is removed, the number of uses becomes one and therefore the
294 ; dags are canonicalized. After Legalization, we need to make sure that the
295 ; valuetype for the shift count is legal.
296 ; Verify also that we correctly fold the shl-shr sequence into an
299 define void @g(i32 %a) nounwind {
302 ; X86-NEXT: subl $12, %esp
303 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
304 ; X86-NEXT: andl $-4, %eax
305 ; X86-NEXT: subl $8, %esp
307 ; X86-NEXT: pushl %eax
309 ; X86-NEXT: addl $28, %esp
314 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
315 ; X64-NEXT: andl $-4, %edi
316 ; X64-NEXT: jmp f # TAILCALL
318 %c = zext i32 %b to i64
321 tail call void @f(i64 %e)
325 declare dso_local void @f(i64)