Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / test / X86 / tail-duplication-cache.s
blobd7955eff9ead9bd480b90ad8c0282af5c28ad82c
1 # REQUIRES: system-linux
3 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
4 # RUN: %s -o %t.o
5 # RUN: link_fdata %s %t.o %t.fdata
6 # RUN: link_fdata %s %t.o %t.fdata2 "FDATA2"
7 # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
8 # RUN: llvm-bolt %t.exe --data %t.fdata --reorder-blocks=none \
9 # RUN: --print-finalized --tail-duplication=cache -o %t.out | FileCheck %s
10 # RUN: llvm-bolt %t.exe --data %t.fdata2 --reorder-blocks=none \
11 # RUN: --print-finalized --tail-duplication=cache -o %t.out2 \
12 # RUN: | FileCheck --check-prefix="CHECK2" %s
14 # A test where the tail is duplicated to eliminate an uncoditional jump
15 # FDATA: 1 main #.BB0_br# 1 main #.BB4# 0 100
16 # FDATA: 1 main #.BB0_br# 1 main #.BB1# 0 100
17 # FDATA: 1 main #.BB1_br# 1 main #.BB3# 0 50
18 # FDATA: 1 main #.BB1_br# 1 main #.BB2# 0 50
19 # FDATA: 1 main #.BB3_br# 1 main #.BB2# 0 50
20 # CHECK: BOLT-INFO: tail duplication modified 1 ({{.*}}%) functions; duplicated 1 blocks (13 bytes) responsible for 50 dynamic executions ({{.*}}% of all block executions)
21 # CHECK: BB Layout : .LBB00, .Ltmp0, .Ltmp1, .Ltmp2, .Ltmp3, .Ltmp4, .Ltmp5, .Ltail-dup0, .Ltmp6
23 # A test where the tail is not duplicated due to the cache score
24 # FDATA2: 1 main #.BB0_br# 1 main #.BB4# 0 100
25 # FDATA2: 1 main #.BB0_br# 1 main #.BB1# 0 2
26 # FDATA2: 1 main #.BB1_br# 1 main #.BB3# 0 1
27 # FDATA2: 1 main #.BB1_br# 1 main #.BB2# 0 1
28 # FDATA2: 1 main #.BB3_br# 1 main #.BB2# 0 1
29 # CHECK2: BOLT-INFO: tail duplication modified 0 (0.00%) functions; duplicated 0 blocks (0 bytes) responsible for 0 dynamic executions (0.00% of all block executions)
30 # CHECK2: BB Layout : .LBB00, .Ltmp0, .Ltmp1, .Ltmp2, .Ltmp3, .Ltmp4, .Ltmp5, .Ltmp6
32 .text
33 .globl main
34 .type main, %function
35 .size main, .Lend-main
36 main:
37 .BB0:
38 xor %eax, %eax
39 cmpl %eax, %ebx
40 .BB0_br:
41 je .BB4
42 .BB1:
43 inc %rax
44 .BB1_br:
45 je .BB3
46 .BB2:
47 inc %rax
48 inc %rax
49 inc %rax
50 inc %rax
51 retq
52 .BB3:
53 inc %rax
54 .BB3_br:
55 jmp .BB2
56 .BB4:
57 retq
58 # For relocations against .text
59 call exit
60 .Lend: