test/Transforms/Inline/last-callsite.ll

   1 ; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=0 -S | FileCheck %s
   2
   3 ; The 'test1_' prefixed functions test the basic 'last callsite' inline
   4 ; threshold adjustment where we specifically inline the last call site of an
   5 ; internal function regardless of cost.
   6
   7 define internal void @test1_f() {
   8 entry:
   9   %p = alloca i32
  10   store volatile i32 0, i32* %p
  11   store volatile i32 0, i32* %p
  12   store volatile i32 0, i32* %p
  13   store volatile i32 0, i32* %p
  14   store volatile i32 0, i32* %p
  15   store volatile i32 0, i32* %p
  16   store volatile i32 0, i32* %p
  17   store volatile i32 0, i32* %p
  18   ret void
  19 }
  20
  21 ; Identical to @test1_f but doesn't get inlined because there is more than one
  22 ; call. If this *does* get inlined, the body used both here and in @test1_f
  23 ; isn't a good test for different threshold based on the last call.
  24 define internal void @test1_g() {
  25 entry:
  26   %p = alloca i32
  27   store volatile i32 0, i32* %p
  28   store volatile i32 0, i32* %p
  29   store volatile i32 0, i32* %p
  30   store volatile i32 0, i32* %p
  31   store volatile i32 0, i32* %p
  32   store volatile i32 0, i32* %p
  33   store volatile i32 0, i32* %p
  34   store volatile i32 0, i32* %p
  35   ret void
  36 }
  37
  38 define void @test1() {
  39 ; CHECK-LABEL: define void @test1()
  40 entry:
  41   call void @test1_f()
  42 ; CHECK-NOT: @test1_f
  43
  44   call void @test1_g()
  45   call void @test1_g()
  46 ; CHECK: call void @test1_g()
  47 ; CHECK: call void @test1_g()
  48
  49   ret void
  50 }
  51
  52
  53 ; The 'test2_' prefixed functions test that we can discover the last callsite
  54 ; bonus after having inlined the prior call site. For this to work, we need
  55 ; a callsite dependent cost so we have a trivial predicate guarding all the
  56 ; cost, and set that in a particular direction.
  57
  58 define internal void @test2_f(i1 %b) {
  59 entry:
  60   %p = alloca i32
  61   br i1 %b, label %then, label %exit
  62
  63 then:
  64   store volatile i32 0, i32* %p
  65   store volatile i32 0, i32* %p
  66   store volatile i32 0, i32* %p
  67   store volatile i32 0, i32* %p
  68   store volatile i32 0, i32* %p
  69   store volatile i32 0, i32* %p
  70   store volatile i32 0, i32* %p
  71   store volatile i32 0, i32* %p
  72   br label %exit
  73
  74 exit:
  75   ret void
  76 }
  77
  78 ; Identical to @test2_f but doesn't get inlined because there is more than one
  79 ; call. If this *does* get inlined, the body used both here and in @test2_f
  80 ; isn't a good test for different threshold based on the last call.
  81 define internal void @test2_g(i1 %b) {
  82 entry:
  83   %p = alloca i32
  84   br i1 %b, label %then, label %exit
  85
  86 then:
  87   store volatile i32 0, i32* %p
  88   store volatile i32 0, i32* %p
  89   store volatile i32 0, i32* %p
  90   store volatile i32 0, i32* %p
  91   store volatile i32 0, i32* %p
  92   store volatile i32 0, i32* %p
  93   store volatile i32 0, i32* %p
  94   store volatile i32 0, i32* %p
  95   br label %exit
  96
  97 exit:
  98   ret void
  99 }
 100
 101 define void @test2() {
 102 ; CHECK-LABEL: define void @test2()
 103 entry:
 104   ; The first call is trivial to inline due to the argument.
 105   call void @test2_f(i1 false)
 106 ; CHECK-NOT: @test2_f
 107
 108   ; The second call is too expensive to inline unless we update the number of
 109   ; calls after inlining the second.
 110   call void @test2_f(i1 true)
 111 ; CHECK-NOT: @test2_f
 112
 113   ; Sanity check that two calls with the hard predicate remain uninlined.
 114   call void @test2_g(i1 true)
 115   call void @test2_g(i1 true)
 116 ; CHECK: call void @test2_g(i1 true)
 117 ; CHECK: call void @test2_g(i1 true)
 118
 119   ret void
 120 }
 121
 122
 123 ; The 'test3_' prefixed functions are similar to the 'test2_' functions but the
 124 ; relative order of the trivial and hard to inline callsites is reversed. This
 125 ; checks that the order of calls isn't significant to whether we observe the
 126 ; "last callsite" threshold difference because the next-to-last gets inlined.
 127 ; FIXME: We don't currently catch this case.
 128
 129 define internal void @test3_f(i1 %b) {
 130 entry:
 131   %p = alloca i32
 132   br i1 %b, label %then, label %exit
 133
 134 then:
 135   store volatile i32 0, i32* %p
 136   store volatile i32 0, i32* %p
 137   store volatile i32 0, i32* %p
 138   store volatile i32 0, i32* %p
 139   store volatile i32 0, i32* %p
 140   store volatile i32 0, i32* %p
 141   store volatile i32 0, i32* %p
 142   store volatile i32 0, i32* %p
 143   br label %exit
 144
 145 exit:
 146   ret void
 147 }
 148
 149 ; Identical to @test3_f but doesn't get inlined because there is more than one
 150 ; call. If this *does* get inlined, the body used both here and in @test3_f
 151 ; isn't a good test for different threshold based on the last call.
 152 define internal void @test3_g(i1 %b) {
 153 entry:
 154   %p = alloca i32
 155   br i1 %b, label %then, label %exit
 156
 157 then:
 158   store volatile i32 0, i32* %p
 159   store volatile i32 0, i32* %p
 160   store volatile i32 0, i32* %p
 161   store volatile i32 0, i32* %p
 162   store volatile i32 0, i32* %p
 163   store volatile i32 0, i32* %p
 164   store volatile i32 0, i32* %p
 165   store volatile i32 0, i32* %p
 166   br label %exit
 167
 168 exit:
 169   ret void
 170 }
 171
 172 define void @test3() {
 173 ; CHECK-LABEL: define void @test3()
 174 entry:
 175   ; The first call is too expensive to inline unless we update the number of
 176   ; calls after inlining the second.
 177   call void @test3_f(i1 true)
 178 ; FIXME: We should inline this call without iteration.
 179 ; CHECK: call void @test3_f(i1 true)
 180
 181   ; But the second call is trivial to inline due to the argument.
 182   call void @test3_f(i1 false)
 183 ; CHECK-NOT: @test3_f
 184
 185   ; Sanity check that two calls with the hard predicate remain uninlined.
 186   call void @test3_g(i1 true)
 187   call void @test3_g(i1 true)
 188 ; CHECK: call void @test3_g(i1 true)
 189 ; CHECK: call void @test3_g(i1 true)
 190
 191   ret void
 192 }
 193
 194
 195 ; The 'test4_' prefixed functions are similar to the 'test2_' prefixed
 196 ; functions but include unusual constant expressions that make discovering that
 197 ; a function is dead harder.
 198
 199 define internal void @test4_f(i1 %b) {
 200 entry:
 201   %p = alloca i32
 202   br i1 %b, label %then, label %exit
 203
 204 then:
 205   store volatile i32 0, i32* %p
 206   store volatile i32 0, i32* %p
 207   store volatile i32 0, i32* %p
 208   store volatile i32 0, i32* %p
 209   store volatile i32 0, i32* %p
 210   store volatile i32 0, i32* %p
 211   store volatile i32 0, i32* %p
 212   store volatile i32 0, i32* %p
 213   br label %exit
 214
 215 exit:
 216   ret void
 217 }
 218
 219 ; Identical to @test4_f but doesn't get inlined because there is more than one
 220 ; call. If this *does* get inlined, the body used both here and in @test4_f
 221 ; isn't a good test for different threshold based on the last call.
 222 define internal void @test4_g(i1 %b) {
 223 entry:
 224   %p = alloca i32
 225   br i1 %b, label %then, label %exit
 226
 227 then:
 228   store volatile i32 0, i32* %p
 229   store volatile i32 0, i32* %p
 230   store volatile i32 0, i32* %p
 231   store volatile i32 0, i32* %p
 232   store volatile i32 0, i32* %p
 233   store volatile i32 0, i32* %p
 234   store volatile i32 0, i32* %p
 235   store volatile i32 0, i32* %p
 236   br label %exit
 237
 238 exit:
 239   ret void
 240 }
 241
 242 define void @test4() {
 243 ; CHECK-LABEL: define void @test4()
 244 entry:
 245   ; The first call is trivial to inline due to the argument. However this
 246   ; argument also uses the function being called as part of a complex
 247   ; constant expression. Merely inlining and deleting the call isn't enough to
 248   ; drop the use count here, we need to GC the dead constant expression as
 249   ; well.
 250   call void @test4_f(i1 icmp ne (i64 ptrtoint (void (i1)* @test4_f to i64), i64 ptrtoint(void (i1)* @test4_f to i64)))
 251 ; CHECK-NOT: @test4_f
 252
 253   ; The second call is too expensive to inline unless we update the number of
 254   ; calls after inlining the second.
 255   call void @test4_f(i1 true)
 256 ; CHECK-NOT: @test4_f
 257
 258   ; And check that a single call to a function which is used by a complex
 259   ; constant expression cannot be inlined because the constant expression forms
 260   ; a second use. If this part starts failing we need to use more complex
 261   ; constant expressions to reference a particular function with them.
 262   %sink = alloca i1
 263   store volatile i1 icmp ne (i64 ptrtoint (void (i1)* @test4_g to i64), i64 ptrtoint(void (i1)* @test4_g to i64)), i1* %sink
 264   call void @test4_g(i1 true)
 265 ; CHECK: store volatile i1 false
 266 ; CHECK: call void @test4_g(i1 true)
 267
 268   ret void
 269 }