1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; bswap should be constant folded when it is passed a constant argument
4 ; RUN: llc < %s -mtriple=i686-- -mcpu=i686 | FileCheck %s
5 ; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=CHECK64
7 declare i16 @llvm.bswap.i16(i16)
8 declare i32 @llvm.bswap.i32(i32)
9 declare i64 @llvm.bswap.i64(i64)
11 define i16 @W(i16 %A) {
14 ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
15 ; CHECK-NEXT: rolw $8, %ax
20 ; CHECK64-NEXT: movl %edi, %eax
21 ; CHECK64-NEXT: rolw $8, %ax
22 ; CHECK64-NEXT: # kill: def $ax killed $ax killed $eax
24 %Z = call i16 @llvm.bswap.i16( i16 %A ) ; <i16> [#uses=1]
28 define dso_local i32 @X(i32 %A) {
31 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
32 ; CHECK-NEXT: bswapl %eax
37 ; CHECK64-NEXT: movl %edi, %eax
38 ; CHECK64-NEXT: bswapl %eax
40 %Z = call i32 @llvm.bswap.i32( i32 %A ) ; <i32> [#uses=1]
44 define i64 @Y(i64 %A) {
47 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
48 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
49 ; CHECK-NEXT: bswapl %eax
50 ; CHECK-NEXT: bswapl %edx
55 ; CHECK64-NEXT: movq %rdi, %rax
56 ; CHECK64-NEXT: bswapq %rax
58 %Z = call i64 @llvm.bswap.i64( i64 %A ) ; <i64> [#uses=1]
62 ; This isn't really a bswap test, but the potential probem is
63 ; easier to see with bswap vs. other ops. The transform in
64 ; question starts with a bitwise logic op and tries to hoist
65 ; those ahead of other ops. But that's not generally profitable
66 ; when the other ops have other uses (and it might not be safe
67 ; either due to unconstrained instruction count growth).
69 define dso_local i32 @bswap_multiuse(i32 %x, i32 %y, i32* %p1, i32* %p2) nounwind {
70 ; CHECK-LABEL: bswap_multiuse:
72 ; CHECK-NEXT: pushl %esi
73 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
74 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
75 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
76 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
77 ; CHECK-NEXT: bswapl %esi
78 ; CHECK-NEXT: bswapl %eax
79 ; CHECK-NEXT: movl %esi, (%edx)
80 ; CHECK-NEXT: movl %eax, (%ecx)
81 ; CHECK-NEXT: orl %esi, %eax
82 ; CHECK-NEXT: popl %esi
85 ; CHECK64-LABEL: bswap_multiuse:
87 ; CHECK64-NEXT: movl %esi, %eax
88 ; CHECK64-NEXT: bswapl %edi
89 ; CHECK64-NEXT: bswapl %eax
90 ; CHECK64-NEXT: movl %edi, (%rdx)
91 ; CHECK64-NEXT: movl %eax, (%rcx)
92 ; CHECK64-NEXT: orl %edi, %eax
94 %xt = call i32 @llvm.bswap.i32(i32 %x)
95 %yt = call i32 @llvm.bswap.i32(i32 %y)
96 store i32 %xt, i32* %p1
97 store i32 %yt, i32* %p2
103 define dso_local i32 @test1(i32 %a) nounwind readnone {
104 ; CHECK-LABEL: test1:
106 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
107 ; CHECK-NEXT: bswapl %eax
108 ; CHECK-NEXT: shrl $16, %eax
111 ; CHECK64-LABEL: test1:
113 ; CHECK64-NEXT: movl %edi, %eax
114 ; CHECK64-NEXT: bswapl %eax
115 ; CHECK64-NEXT: shrl $16, %eax
117 %and = lshr i32 %a, 8
118 %shr3 = and i32 %and, 255
119 %and2 = shl i32 %a, 8
120 %shl = and i32 %and2, 65280
121 %or = or i32 %shr3, %shl
125 define dso_local i32 @test2(i32 %a) nounwind readnone {
126 ; CHECK-LABEL: test2:
128 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
129 ; CHECK-NEXT: bswapl %eax
130 ; CHECK-NEXT: sarl $16, %eax
133 ; CHECK64-LABEL: test2:
135 ; CHECK64-NEXT: movl %edi, %eax
136 ; CHECK64-NEXT: bswapl %eax
137 ; CHECK64-NEXT: sarl $16, %eax
139 %and = lshr i32 %a, 8
140 %shr4 = and i32 %and, 255
141 %and2 = shl i32 %a, 8
142 %or = or i32 %shr4, %and2
143 %sext = shl i32 %or, 16
144 %conv3 = ashr exact i32 %sext, 16
148 @var8 = dso_local global i8 0
149 @var16 = dso_local global i16 0
151 ; The "shl" below can move bits into the high parts of the value, so the
152 ; operation is not a "bswap, shr" pair.
154 ; rdar://problem/14814049
155 define i64 @not_bswap() {
156 ; CHECK-LABEL: not_bswap:
158 ; CHECK-NEXT: movzwl var16, %eax
159 ; CHECK-NEXT: movl %eax, %ecx
160 ; CHECK-NEXT: shrl $8, %ecx
161 ; CHECK-NEXT: shll $8, %eax
162 ; CHECK-NEXT: orl %ecx, %eax
163 ; CHECK-NEXT: xorl %edx, %edx
166 ; CHECK64-LABEL: not_bswap:
168 ; CHECK64-NEXT: movzwl var16(%rip), %eax
169 ; CHECK64-NEXT: movq %rax, %rcx
170 ; CHECK64-NEXT: shrq $8, %rcx
171 ; CHECK64-NEXT: shlq $8, %rax
172 ; CHECK64-NEXT: orq %rcx, %rax
174 %init = load i16, i16* @var16
175 %big = zext i16 %init to i64
177 %hishifted = lshr i64 %big, 8
178 %loshifted = shl i64 %big, 8
180 %notswapped = or i64 %hishifted, %loshifted
185 ; This time, the lshr (and subsequent or) is completely useless. While it's
186 ; technically correct to convert this into a "bswap, shr", it's suboptimal. A
187 ; simple shl works better.
189 define i64 @not_useful_bswap() {
190 ; CHECK-LABEL: not_useful_bswap:
192 ; CHECK-NEXT: movzbl var8, %eax
193 ; CHECK-NEXT: shll $8, %eax
194 ; CHECK-NEXT: xorl %edx, %edx
197 ; CHECK64-LABEL: not_useful_bswap:
199 ; CHECK64-NEXT: movzbl var8(%rip), %eax
200 ; CHECK64-NEXT: shlq $8, %rax
202 %init = load i8, i8* @var8
203 %big = zext i8 %init to i64
205 %hishifted = lshr i64 %big, 8
206 %loshifted = shl i64 %big, 8
208 %notswapped = or i64 %hishifted, %loshifted
213 ; Finally, it *is* OK to just mask off the shl if we know that the value is zero
214 ; beyond 16 bits anyway. This is a legitimate bswap.
216 define i64 @finally_useful_bswap() {
217 ; CHECK-LABEL: finally_useful_bswap:
219 ; CHECK-NEXT: movzwl var16, %eax
220 ; CHECK-NEXT: bswapl %eax
221 ; CHECK-NEXT: shrl $16, %eax
222 ; CHECK-NEXT: xorl %edx, %edx
225 ; CHECK64-LABEL: finally_useful_bswap:
227 ; CHECK64-NEXT: movzwl var16(%rip), %eax
228 ; CHECK64-NEXT: bswapq %rax
229 ; CHECK64-NEXT: shrq $48, %rax
231 %init = load i16, i16* @var16
232 %big = zext i16 %init to i64
234 %hishifted = lshr i64 %big, 8
235 %lomasked = and i64 %big, 255
236 %loshifted = shl i64 %lomasked, 8
238 %swapped = or i64 %hishifted, %loshifted
243 ; Make sure we don't assert during type legalization promoting a large
244 ; bswap due to the need for a large shift that won't fit in the i8 returned
245 ; from getShiftAmountTy.
246 define i528 @large_promotion(i528 %A) nounwind {
247 ; CHECK-LABEL: large_promotion:
249 ; CHECK-NEXT: pushl %ebp
250 ; CHECK-NEXT: pushl %ebx
251 ; CHECK-NEXT: pushl %edi
252 ; CHECK-NEXT: pushl %esi
253 ; CHECK-NEXT: subl $44, %esp
254 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
255 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
256 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
257 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
258 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
259 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
260 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
261 ; CHECK-NEXT: bswapl %eax
262 ; CHECK-NEXT: bswapl %ecx
263 ; CHECK-NEXT: shrdl $16, %ecx, %eax
264 ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
265 ; CHECK-NEXT: bswapl %edx
266 ; CHECK-NEXT: shrdl $16, %edx, %ecx
267 ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
268 ; CHECK-NEXT: bswapl %esi
269 ; CHECK-NEXT: shrdl $16, %esi, %edx
270 ; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
271 ; CHECK-NEXT: bswapl %edi
272 ; CHECK-NEXT: shrdl $16, %edi, %esi
273 ; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
274 ; CHECK-NEXT: bswapl %ebx
275 ; CHECK-NEXT: shrdl $16, %ebx, %edi
276 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
277 ; CHECK-NEXT: bswapl %ebp
278 ; CHECK-NEXT: shrdl $16, %ebp, %ebx
279 ; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
280 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
281 ; CHECK-NEXT: bswapl %ecx
282 ; CHECK-NEXT: shrdl $16, %ecx, %ebp
283 ; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
284 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
285 ; CHECK-NEXT: bswapl %eax
286 ; CHECK-NEXT: shrdl $16, %eax, %ecx
287 ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
288 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
289 ; CHECK-NEXT: bswapl %ecx
290 ; CHECK-NEXT: shrdl $16, %ecx, %eax
291 ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
292 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
293 ; CHECK-NEXT: bswapl %eax
294 ; CHECK-NEXT: shrdl $16, %eax, %ecx
295 ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
296 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
297 ; CHECK-NEXT: bswapl %ebp
298 ; CHECK-NEXT: shrdl $16, %ebp, %eax
299 ; CHECK-NEXT: movl %eax, (%esp) # 4-byte Spill
300 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
301 ; CHECK-NEXT: bswapl %ebx
302 ; CHECK-NEXT: shrdl $16, %ebx, %ebp
303 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
304 ; CHECK-NEXT: bswapl %esi
305 ; CHECK-NEXT: shrdl $16, %esi, %ebx
306 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
307 ; CHECK-NEXT: bswapl %edx
308 ; CHECK-NEXT: shrdl $16, %edx, %esi
309 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
310 ; CHECK-NEXT: bswapl %ecx
311 ; CHECK-NEXT: shrdl $16, %ecx, %edx
312 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
313 ; CHECK-NEXT: bswapl %edi
314 ; CHECK-NEXT: shrdl $16, %edi, %ecx
315 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
316 ; CHECK-NEXT: movl %ecx, 60(%eax)
317 ; CHECK-NEXT: movl %edx, 56(%eax)
318 ; CHECK-NEXT: movl %esi, 52(%eax)
319 ; CHECK-NEXT: movl %ebx, 48(%eax)
320 ; CHECK-NEXT: movl %ebp, 44(%eax)
321 ; CHECK-NEXT: movl (%esp), %ecx # 4-byte Reload
322 ; CHECK-NEXT: movl %ecx, 40(%eax)
323 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
324 ; CHECK-NEXT: movl %ecx, 36(%eax)
325 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
326 ; CHECK-NEXT: movl %ecx, 32(%eax)
327 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
328 ; CHECK-NEXT: movl %ecx, 28(%eax)
329 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
330 ; CHECK-NEXT: movl %ecx, 24(%eax)
331 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
332 ; CHECK-NEXT: movl %ecx, 20(%eax)
333 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
334 ; CHECK-NEXT: movl %ecx, 16(%eax)
335 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
336 ; CHECK-NEXT: movl %ecx, 12(%eax)
337 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
338 ; CHECK-NEXT: movl %ecx, 8(%eax)
339 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
340 ; CHECK-NEXT: movl %ecx, 4(%eax)
341 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
342 ; CHECK-NEXT: movl %ecx, (%eax)
343 ; CHECK-NEXT: shrl $16, %edi
344 ; CHECK-NEXT: movw %di, 64(%eax)
345 ; CHECK-NEXT: addl $44, %esp
346 ; CHECK-NEXT: popl %esi
347 ; CHECK-NEXT: popl %edi
348 ; CHECK-NEXT: popl %ebx
349 ; CHECK-NEXT: popl %ebp
350 ; CHECK-NEXT: retl $4
352 ; CHECK64-LABEL: large_promotion:
354 ; CHECK64-NEXT: pushq %rbx
355 ; CHECK64-NEXT: movq %rdi, %rax
356 ; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
357 ; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r11
358 ; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rdi
359 ; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r10
360 ; CHECK64-NEXT: bswapq %r10
361 ; CHECK64-NEXT: bswapq %rdi
362 ; CHECK64-NEXT: shrdq $48, %rdi, %r10
363 ; CHECK64-NEXT: bswapq %r11
364 ; CHECK64-NEXT: shrdq $48, %r11, %rdi
365 ; CHECK64-NEXT: bswapq %rbx
366 ; CHECK64-NEXT: shrdq $48, %rbx, %r11
367 ; CHECK64-NEXT: bswapq %r9
368 ; CHECK64-NEXT: shrdq $48, %r9, %rbx
369 ; CHECK64-NEXT: bswapq %r8
370 ; CHECK64-NEXT: shrdq $48, %r8, %r9
371 ; CHECK64-NEXT: bswapq %rcx
372 ; CHECK64-NEXT: shrdq $48, %rcx, %r8
373 ; CHECK64-NEXT: bswapq %rdx
374 ; CHECK64-NEXT: shrdq $48, %rdx, %rcx
375 ; CHECK64-NEXT: bswapq %rsi
376 ; CHECK64-NEXT: shrdq $48, %rsi, %rdx
377 ; CHECK64-NEXT: shrq $48, %rsi
378 ; CHECK64-NEXT: movq %rdx, 56(%rax)
379 ; CHECK64-NEXT: movq %rcx, 48(%rax)
380 ; CHECK64-NEXT: movq %r8, 40(%rax)
381 ; CHECK64-NEXT: movq %r9, 32(%rax)
382 ; CHECK64-NEXT: movq %rbx, 24(%rax)
383 ; CHECK64-NEXT: movq %r11, 16(%rax)
384 ; CHECK64-NEXT: movq %rdi, 8(%rax)
385 ; CHECK64-NEXT: movq %r10, (%rax)
386 ; CHECK64-NEXT: movw %si, 64(%rax)
387 ; CHECK64-NEXT: popq %rbx
389 %Z = call i528 @llvm.bswap.i528(i528 %A)
392 declare i528 @llvm.bswap.i528(i528)