1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
5 ; These tests just check that the plumbing is in place for @llvm.bswap. The
6 ; actual output is massive at the moment as llvm.bswap is not yet legal.
8 declare i16 @llvm.bswap.i16(i16) readnone
9 declare i32 @llvm.bswap.i32(i32) readnone
10 declare i64 @llvm.bswap.i64(i64) readnone
11 declare i32 @llvm.bswap.v4i32(i32) readnone
13 ; fold (bswap undef) -> undef
14 define i32 @test_undef() nounwind {
15 ; X86-LABEL: test_undef:
19 ; X64-LABEL: test_undef:
22 %b = call i32 @llvm.bswap.i32(i32 undef)
26 ; fold (bswap (bswap x)) -> x
27 define i32 @test_bswap_bswap(i32 %a0) nounwind {
28 ; X86-LABEL: test_bswap_bswap:
30 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
33 ; X64-LABEL: test_bswap_bswap:
35 ; X64-NEXT: movl %edi, %eax
37 %b = call i32 @llvm.bswap.i32(i32 %a0)
38 %c = call i32 @llvm.bswap.i32(i32 %b)
42 define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind {
43 ; X86-LABEL: test_bswap_srli_8_bswap_i16:
45 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
46 ; X86-NEXT: shll $8, %eax
47 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
50 ; X64-LABEL: test_bswap_srli_8_bswap_i16:
52 ; X64-NEXT: movl %edi, %eax
53 ; X64-NEXT: shll $8, %eax
54 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
56 %1 = call i16 @llvm.bswap.i16(i16 %a)
58 %3 = call i16 @llvm.bswap.i16(i16 %2)
62 define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind {
63 ; X86-LABEL: test_bswap_srli_8_bswap_i32:
65 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
66 ; X86-NEXT: shll $8, %eax
69 ; X64-LABEL: test_bswap_srli_8_bswap_i32:
71 ; X64-NEXT: movl %edi, %eax
72 ; X64-NEXT: shll $8, %eax
74 %1 = call i32 @llvm.bswap.i32(i32 %a)
76 %3 = call i32 @llvm.bswap.i32(i32 %2)
80 define i64 @test_bswap_srli_16_bswap_i64(i64 %a) nounwind {
81 ; X86-LABEL: test_bswap_srli_16_bswap_i64:
83 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
84 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
85 ; X86-NEXT: shll $16, %eax
88 ; X64-LABEL: test_bswap_srli_16_bswap_i64:
90 ; X64-NEXT: movq %rdi, %rax
91 ; X64-NEXT: shlq $16, %rax
93 %1 = call i64 @llvm.bswap.i64(i64 %a)
95 %3 = call i64 @llvm.bswap.i64(i64 %2)
99 define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind {
100 ; X86-LABEL: test_bswap_shli_8_bswap_i16:
102 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
103 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
106 ; X64-LABEL: test_bswap_shli_8_bswap_i16:
108 ; X64-NEXT: movl %edi, %eax
109 ; X64-NEXT: movzbl %ah, %eax
110 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
112 %1 = call i16 @llvm.bswap.i16(i16 %a)
114 %3 = call i16 @llvm.bswap.i16(i16 %2)
118 define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind {
119 ; X86-LABEL: test_bswap_shli_8_bswap_i32:
121 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
122 ; X86-NEXT: shrl $8, %eax
125 ; X64-LABEL: test_bswap_shli_8_bswap_i32:
127 ; X64-NEXT: movl %edi, %eax
128 ; X64-NEXT: shrl $8, %eax
130 %1 = call i32 @llvm.bswap.i32(i32 %a)
132 %3 = call i32 @llvm.bswap.i32(i32 %2)
136 define i64 @test_bswap_shli_16_bswap_i64(i64 %a) nounwind {
137 ; X86-LABEL: test_bswap_shli_16_bswap_i64:
139 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
140 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
143 ; X64-LABEL: test_bswap_shli_16_bswap_i64:
145 ; X64-NEXT: movq %rdi, %rax
146 ; X64-NEXT: shrq $16, %rax
148 %1 = call i64 @llvm.bswap.i64(i64 %a)
150 %3 = call i64 @llvm.bswap.i64(i64 %2)
154 define i32 @test_demandedbits_bswap(i32 %a0) nounwind {
155 ; X86-LABEL: test_demandedbits_bswap:
157 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
158 ; X86-NEXT: bswapl %eax
159 ; X86-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
162 ; X64-LABEL: test_demandedbits_bswap:
164 ; X64-NEXT: movl %edi, %eax
165 ; X64-NEXT: bswapl %eax
166 ; X64-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
168 %b = or i32 %a0, 4278190080
169 %c = call i32 @llvm.bswap.i32(i32 %b)
170 %d = and i32 %c, 4294901760
174 define void @demand_one_loaded_byte(ptr %xp, ptr %yp) {
175 ; X86-LABEL: demand_one_loaded_byte:
177 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
178 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
179 ; X86-NEXT: movzbl 4(%ecx), %ecx
180 ; X86-NEXT: movb %cl, (%eax)
183 ; X64-LABEL: demand_one_loaded_byte:
185 ; X64-NEXT: movzbl 4(%rdi), %eax
186 ; X64-NEXT: movb %al, (%rsi)
188 %x = load i64, ptr %xp, align 8
189 %x_zzzz7654 = lshr i64 %x, 32
190 %x_z7654zzz = shl nuw nsw i64 %x_zzzz7654, 24
191 %x_4zzz = trunc i64 %x_z7654zzz to i32
192 %y = load i32, ptr %yp, align 4
193 %y_321z = and i32 %y, -256
194 %x_zzz4 = call i32 @llvm.bswap.i32(i32 %x_4zzz)
195 %r = or i32 %x_zzz4, %y_321z
196 store i32 %r, ptr %yp, align 4
200 define i64 @test_bswap64_shift48_zext(i16 %a0) {
201 ; X86-LABEL: test_bswap64_shift48_zext:
203 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
204 ; X86-NEXT: rolw $8, %ax
205 ; X86-NEXT: movzwl %ax, %eax
206 ; X86-NEXT: xorl %edx, %edx
209 ; X64-LABEL: test_bswap64_shift48_zext:
211 ; X64-NEXT: rolw $8, %di
212 ; X64-NEXT: movzwl %di, %eax
214 %z = zext i16 %a0 to i64
216 %b = call i64 @llvm.bswap.i64(i64 %s)
220 define i64 @test_bswap64_shift48(i64 %a0) {
221 ; X86-LABEL: test_bswap64_shift48:
223 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
224 ; X86-NEXT: rolw $8, %ax
225 ; X86-NEXT: movzwl %ax, %eax
226 ; X86-NEXT: xorl %edx, %edx
229 ; X64-LABEL: test_bswap64_shift48:
231 ; X64-NEXT: rolw $8, %di
232 ; X64-NEXT: movzwl %di, %eax
235 %b = call i64 @llvm.bswap.i64(i64 %s)
239 define i32 @test_bswap32_shift17(i32 %a0) {
240 ; X86-LABEL: test_bswap32_shift17:
242 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
243 ; X86-NEXT: shll $17, %eax
244 ; X86-NEXT: bswapl %eax
247 ; X64-LABEL: test_bswap32_shift17:
249 ; X64-NEXT: movl %edi, %eax
250 ; X64-NEXT: shll $17, %eax
251 ; X64-NEXT: bswapl %eax
254 %b = call i32 @llvm.bswap.i32(i32 %s)
258 define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 {
259 ; X86-LABEL: bs_and_lhs_bs32:
261 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
262 ; X86-NEXT: bswapl %eax
263 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
266 ; X64-LABEL: bs_and_lhs_bs32:
268 ; X64-NEXT: movl %esi, %eax
269 ; X64-NEXT: bswapl %eax
270 ; X64-NEXT: andl %edi, %eax
272 %1 = tail call i32 @llvm.bswap.i32(i32 %a)
274 %3 = tail call i32 @llvm.bswap.i32(i32 %2)
278 define i64 @bs_or_lhs_bs64(i64 %a, i64 %b) #0 {
279 ; X86-LABEL: bs_or_lhs_bs64:
281 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
282 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
283 ; X86-NEXT: bswapl %eax
284 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
285 ; X86-NEXT: bswapl %edx
286 ; X86-NEXT: orl {{[0-9]+}}(%esp), %edx
289 ; X64-LABEL: bs_or_lhs_bs64:
291 ; X64-NEXT: movq %rsi, %rax
292 ; X64-NEXT: bswapq %rax
293 ; X64-NEXT: orq %rdi, %rax
295 %1 = tail call i64 @llvm.bswap.i64(i64 %a)
297 %3 = tail call i64 @llvm.bswap.i64(i64 %2)
301 define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 {
302 ; X86-LABEL: bs_xor_rhs_bs64:
304 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
305 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
306 ; X86-NEXT: bswapl %eax
307 ; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
308 ; X86-NEXT: bswapl %edx
309 ; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
312 ; X64-LABEL: bs_xor_rhs_bs64:
314 ; X64-NEXT: movq %rdi, %rax
315 ; X64-NEXT: bswapq %rax
316 ; X64-NEXT: xorq %rsi, %rax
318 %1 = tail call i64 @llvm.bswap.i64(i64 %b)
320 %3 = tail call i64 @llvm.bswap.i64(i64 %2)
324 define i32 @bs_and_all_operand_multiuse(i32 %a, i32 %b) #0 {
325 ; X86-LABEL: bs_and_all_operand_multiuse:
327 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
328 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
329 ; X86-NEXT: movl %eax, %edx
330 ; X86-NEXT: bswapl %edx
331 ; X86-NEXT: andl %ecx, %eax
332 ; X86-NEXT: bswapl %ecx
333 ; X86-NEXT: imull %edx, %eax
334 ; X86-NEXT: imull %ecx, %eax
337 ; X64-LABEL: bs_and_all_operand_multiuse:
339 ; X64-NEXT: movl %edi, %eax
340 ; X64-NEXT: bswapl %eax
341 ; X64-NEXT: andl %esi, %edi
342 ; X64-NEXT: bswapl %esi
343 ; X64-NEXT: imull %edi, %eax
344 ; X64-NEXT: imull %esi, %eax
346 %1 = tail call i32 @llvm.bswap.i32(i32 %a)
347 %2 = tail call i32 @llvm.bswap.i32(i32 %b)
349 %4 = tail call i32 @llvm.bswap.i32(i32 %3)
350 %5 = mul i32 %1, %4 ;increase use of left bswap
351 %6 = mul i32 %2, %5 ;increase use of right bswap
357 define i32 @bs_and_rhs_bs32_multiuse1(i32 %a, i32 %b) #0 {
358 ; X86-LABEL: bs_and_rhs_bs32_multiuse1:
360 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
361 ; X86-NEXT: bswapl %ecx
362 ; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx
363 ; X86-NEXT: movl %ecx, %eax
364 ; X86-NEXT: bswapl %eax
365 ; X86-NEXT: imull %ecx, %eax
368 ; X64-LABEL: bs_and_rhs_bs32_multiuse1:
370 ; X64-NEXT: bswapl %esi
371 ; X64-NEXT: andl %edi, %esi
372 ; X64-NEXT: movl %esi, %eax
373 ; X64-NEXT: bswapl %eax
374 ; X64-NEXT: imull %esi, %eax
376 %1 = tail call i32 @llvm.bswap.i32(i32 %b)
378 %3 = tail call i32 @llvm.bswap.i32(i32 %2)
379 %4 = mul i32 %2, %3 ;increase use of logical op
384 define i32 @bs_and_rhs_bs32_multiuse2(i32 %a, i32 %b) #0 {
385 ; X86-LABEL: bs_and_rhs_bs32_multiuse2:
387 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
388 ; X86-NEXT: bswapl %ecx
389 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
390 ; X86-NEXT: andl %ecx, %eax
391 ; X86-NEXT: bswapl %eax
392 ; X86-NEXT: imull %ecx, %eax
395 ; X64-LABEL: bs_and_rhs_bs32_multiuse2:
397 ; X64-NEXT: movl %edi, %eax
398 ; X64-NEXT: bswapl %esi
399 ; X64-NEXT: andl %esi, %eax
400 ; X64-NEXT: bswapl %eax
401 ; X64-NEXT: imull %esi, %eax
403 %1 = tail call i32 @llvm.bswap.i32(i32 %b)
405 %3 = tail call i32 @llvm.bswap.i32(i32 %2)
406 %4 = mul i32 %1, %3 ;increase use of inner bswap
411 define i64 @test_bswap64_shift17(i64 %a0) {
412 ; X86-LABEL: test_bswap64_shift17:
414 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
415 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
416 ; X86-NEXT: shldl $17, %edx, %eax
417 ; X86-NEXT: shll $17, %edx
418 ; X86-NEXT: bswapl %eax
419 ; X86-NEXT: bswapl %edx
422 ; X64-LABEL: test_bswap64_shift17:
424 ; X64-NEXT: movq %rdi, %rax
425 ; X64-NEXT: shlq $17, %rax
426 ; X64-NEXT: bswapq %rax
429 %b = call i64 @llvm.bswap.i64(i64 %s)
434 define i64 @test_bswap64_shift48_multiuse(i64 %a0, ptr %a1) {
435 ; X86-LABEL: test_bswap64_shift48_multiuse:
437 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
438 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
439 ; X86-NEXT: shll $16, %eax
440 ; X86-NEXT: movl %eax, 4(%ecx)
441 ; X86-NEXT: bswapl %eax
442 ; X86-NEXT: movl %eax, (%ecx)
443 ; X86-NEXT: xorl %edx, %edx
446 ; X64-LABEL: test_bswap64_shift48_multiuse:
448 ; X64-NEXT: shlq $48, %rdi
449 ; X64-NEXT: movq %rdi, %rax
450 ; X64-NEXT: bswapq %rax
451 ; X64-NEXT: orq %rax, %rdi
452 ; X64-NEXT: movq %rdi, (%rsi)
455 %b = call i64 @llvm.bswap.i64(i64 %s)
457 store i64 %a, ptr %a1