llvm/test/Transforms/SampleProfile/profile-correlation-irreducible-loops.ll

   1 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-correlation-irreducible-loops.prof -sample-profile-use-profi=0 | opt -passes='print<block-freq>' -disable-output  -use-iterative-bfi-inference 2>&1 | FileCheck %s
   2 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-correlation-irreducible-loops.prof -sample-profile-use-profi=0 -S | FileCheck %s --check-prefix=CHECK2
   3 ; RUN: opt < %s -passes='print<block-freq>' -use-iterative-bfi-inference -disable-output 2>&1 | FileCheck %s --check-prefix=CHECK3
   4
   5 ; The C++ code for this test case is from c-parse.c in 403.gcc (SPEC2006)
   6 ; The problem with BFI for the test is solved by applying iterative inference.
   7 ; The corresponding CFG graph is shown below, with intended counts for every
   8 ; basic block. The hot loop, b3->b4->b2, is not getting proper (large) counts
   9 ; unless the -use-iterative-bfi-inference option is specified.
  10 ;
  11 ;   +-------------------------------------------+
  12 ;   |                                           |
  13 ;   |                   +----------+            |
  14 ;   |                   |  b1 [1]  |            |
  15 ;   |                   +----------+            |
  16 ;   |                     |                     |
  17 ;   |                     |                     |
  18 ;   |                     v                     |
  19 ;   |                   +----------+            |
  20 ;   |    +------------> | b2 [625] | -+         |
  21 ;   |    |              +----------+  |         |
  22 ;   |    |                |           |         |
  23 ;   |    |                |           |         |
  24 ;   |    |                v           |         |
  25 ;   |  +----------+     +----------+  |         |
  26 ;   |  | b4 [624] | <-- | b3 [625] | <+---------+
  27 ;   |  +----------+     +----------+  |
  28 ;   |                     |           |
  29 ;   +----+                |           |
  30 ;        |                v           v
  31 ;      +----------+     +--------------------+
  32 ;      |  b8 [1]  | <-- |       b7 [2]       |
  33 ;      +----------+     +--------------------+
  34 ;                         |           ^
  35 ;                         |           |
  36 ;                         v           |
  37 ;      +----------+     +----------+  |
  38 ;      |  b9 [1]  | <-- |  b5 [2]  |  |
  39 ;      +----------+     +----------+  |
  40 ;                         |           |
  41 ;                         |           |
  42 ;                         v           |
  43 ;                       +----------+  |
  44 ;                       |  b6 [1]  | -+
  45 ;                       +----------+
  46
  47 @yydebug = dso_local global i32 0, align 4
  48
  49 ; Function Attrs: noinline nounwind uwtable
  50 define dso_local i32 @yyparse_1() #0 {
  51 b1:
  52   call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 1, i32 0, i64 -1)
  53   %0 = load i32, ptr @yydebug, align 4
  54   %cmp = icmp ne i32 %0, 0
  55   br label %b2
  56 ; CHECK: - b1: float = {{.*}}, int = {{.*}}, count = 1
  57
  58 b2:
  59   call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 2, i32 0, i64 -1)
  60   br i1 %cmp, label %b7, label %b3
  61 ; CHECK: - b2: float = {{.*}}, int = {{.*}}, count = 586
  62
  63 b3:
  64   call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 3, i32 0, i64 -1)
  65   br i1 %cmp, label %b7, label %b4
  66 ; CHECK: - b3: float = {{.*}}, int = {{.*}}, count = 586
  67 ; CHECK2: br i1 %cmp, label %b7, label %b4,
  68 ; CHECK2-SAME: !prof ![[END172_PROF:[0-9]+]]
  69
  70 b4:
  71   call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 4, i32 0, i64 -1)
  72   br label %b2
  73 ; CHECK: - b4: float = {{.*}}, int = {{.*}}, count = 585
  74
  75 b5:
  76   call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 5, i32 0, i64 -1)
  77   br i1 %cmp, label %b9, label %b6
  78 ; CHECK: - b5: float = {{.*}}, int = {{.*}}, count = 2
  79
  80 b6:
  81   call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 6, i32 0, i64 -1)
  82   br label %b7
  83 ; CHECK: - b6: float = {{.*}}, int = {{.*}}, count = 1
  84
  85 b7:
  86   call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 7, i32 0, i64 -1)
  87   br i1 %cmp, label %b5, label %b8
  88 ; CHECK: - b7: float = {{.*}}, int = {{.*}}, count = 2
  89 ; CHECK2: br i1 %cmp, label %b5, label %b8,
  90 ; CHECK2-SAME: !prof ![[FALSE4858_PROF:[0-9]+]]
  91
  92 b8:
  93   call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 8, i32 0, i64 -1)
  94   br label %b3
  95 ; CHECK: - b8: float = {{.*}}, int = {{.*}}, count = 1
  96
  97 b9:
  98   call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 9, i32 0, i64 -1)
  99   %1 = load i32, ptr @yydebug, align 4
 100   ret i32 %1
 101 ; CHECK: - b9: float = {{.*}}, int = {{.*}}, count = 1
 102
 103 }
 104
 105 ; Another difficult (for BFI) instance with irreducible loops,
 106 ; containing 'indirectbr'. The corresponding CFG graph is shown below, with
 107 ; intended counts for every basic block.
 108 ;
 109 ;      +-----------+
 110 ;      |  b1 [1]   |
 111 ;      +-----------+
 112 ;        |
 113 ;        |
 114 ;        v
 115 ;      +------------------------+
 116 ;   +- |        b2 [86]         | <+
 117 ;   |  +------------------------+  |
 118 ;   |    |            |            |
 119 ;   |    |            |            |
 120 ;   |    v            |            |
 121 ;   |  +-----------+  |            |
 122 ;   |  | b3 [8212] | <+-------+    |
 123 ;   |  +-----------+  |       |    |
 124 ;   |    |            |       |    |
 125 ;   |    |            |       |    |
 126 ;   |    v            v       |    |
 127 ;   |  +------------------------+  |
 128 ;   |  |  indirectgoto [17747]  | -+
 129 ;   |  +------------------------+
 130 ;   |    |            ^  |
 131 ;   |    |            +--+
 132 ;   |    v
 133 ;   |  +-----------+
 134 ;   +> |  b4 [1]   |
 135 ;      +-----------+
 136
 137 ; Function Attrs: nounwind uwtable
 138 define dso_local i32 @foo1() #0 !prof !132 {
 139 b1:
 140   call void @llvm.pseudoprobe(i64 7682762345278052905, i64 1, i32 0, i64 -1)
 141   %0 = load i32, ptr @yydebug, align 4
 142   %cmp = icmp ne i32 %0, 0
 143   br label %b2
 144 ; CHECK3: - b1: float = {{.*}}, int = {{.*}}, count = 1
 145
 146 b2:
 147   call void @llvm.pseudoprobe(i64 7682762345278052905, i64 2, i32 0, i64 -1)
 148   %1 = load i32, ptr @yydebug, align 4
 149   switch i32 %1, label %b4 [
 150     i32 1, label %indirectgoto
 151     i32 2, label %b3
 152   ], !prof !133
 153 ; CHECK3: - b2: float = {{.*}}, int = {{.*}}, count = 86
 154
 155 b3:
 156   call void @llvm.pseudoprobe(i64 7682762345278052905, i64 3, i32 0, i64 -1)
 157   br label %indirectgoto
 158 ; CHECK3: - b3: float = {{.*}}, int = {{.*}}, count = 8212
 159
 160 b4:
 161   call void @llvm.pseudoprobe(i64 7682762345278052905, i64 4, i32 0, i64 -1)
 162   %2 = load i32, ptr @yydebug, align 4
 163   ret i32 %2
 164 ; CHECK3: - b4: float = {{.*}}, int = {{.*}}, count = 1
 165
 166 indirectgoto:
 167   %indirect.goto.dest = alloca i8, align 4
 168   call void @llvm.pseudoprobe(i64 7682762345278052905, i64 5, i32 0, i64 -1)
 169   indirectbr ptr %indirect.goto.dest, [label %b2, label %indirectgoto, label %b4, label %b3], !prof !134
 170 ; CHECK3: - indirectgoto: float = {{.*}}, int = {{.*}}, count = 17747
 171
 172 }
 173
 174 declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1
 175
 176 attributes #0 = { noinline nounwind uwtable "use-sample-profile"}
 177 attributes #1 = { nounwind }
 178
 179 !llvm.pseudo_probe_desc = !{!1079, !4496}
 180 !1079 = !{i64 -7702751003264189226, i64 158496288380146391, !"yyparse_1", null}
 181 !4496 = !{i64 7682762345278052905, i64 404850113186107133, !"foo1", null}
 182 !132 = !{!"function_entry_count", i64 1}
 183 !133 = !{!"branch_weights", i32 0, i32 86, i32 0}
 184 !134 = !{!"branch_weights", i32 85, i32 9449, i32 1, i32 8212}
 185
 186 ; CHECK2: ![[END172_PROF]] = !{!"branch_weights", i32 1, i32 1003}
 187 ; CHECK2: ![[FALSE4858_PROF]] = !{!"branch_weights", i32 2, i32 1}