1 ;; Test callsite context graph generation for call graph with two memprof
2 ;; contexts and partial inlining, requiring generation of a new fused node to
3 ;; represent the inlined sequence while matching callsite nodes onto the graph.
4 ;; Also tests graph and IR cloning.
6 ;; Original code looks like:
9 ;; return new char[10];
20 ;; int main(int argc, char **argv) {
31 ;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
32 ;; memory freed after sleep(10) results in cold lifetimes.
34 ;; The code below was created by forcing inlining of baz into foo, and
35 ;; bar into baz. Due to the inlining of bar we will initially have two
36 ;; allocation nodes in the graph. This tests that we correctly match
37 ;; foo (with baz inlined) onto the graph nodes first, and generate a new
38 ;; fused node for it. We should then not match baz (with bar inlined) as that
39 ;; is not reached by the MIB contexts (since all calls from main will look
40 ;; like main -> foo(+baz) -> bar after the inlining reflected in this IR).
42 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
44 ;; -stats requires asserts
47 ; RUN: opt -thinlto-bc %s >%t.o
48 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
49 ; RUN: -supports-hot-cold-new \
50 ; RUN: -r=%t.o,main,plx \
51 ; RUN: -r=%t.o,_ZdaPv, \
52 ; RUN: -r=%t.o,sleep, \
53 ; RUN: -r=%t.o,_Znam, \
54 ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
55 ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
56 ; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \
57 ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
58 ; RUN: --check-prefix=STATS --check-prefix=STATS-BE \
59 ; RUN: --check-prefix=STATS-INPROCESS-BE --check-prefix=REMARKS
61 ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
62 ;; We should create clones for foo and bar for the call from main to allocate
64 ; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
66 ; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
69 ;; Try again but with distributed ThinLTO
70 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
71 ; RUN: -supports-hot-cold-new \
72 ; RUN: -thinlto-distributed-indexes \
73 ; RUN: -r=%t.o,main,plx \
74 ; RUN: -r=%t.o,_ZdaPv, \
75 ; RUN: -r=%t.o,sleep, \
76 ; RUN: -r=%t.o,_Znam, \
77 ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
78 ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \
79 ; RUN: -stats -pass-remarks=memprof-context-disambiguation \
80 ; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
81 ; RUN: --check-prefix=STATS
83 ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
84 ;; We should create clones for foo and bar for the call from main to allocate
86 ; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
88 ;; Run ThinLTO backend
89 ; RUN: opt -passes=memprof-context-disambiguation \
90 ; RUN: -memprof-import-summary=%t.o.thinlto.bc \
91 ; RUN: -stats -pass-remarks=memprof-context-disambiguation \
92 ; RUN: %t.o -S 2>&1 | FileCheck %s --check-prefix=IR \
93 ; RUN: --check-prefix=STATS-BE --check-prefix=STATS-DISTRIB-BE \
94 ; RUN: --check-prefix=REMARKS
96 source_filename = "inlined.ll"
97 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
98 target triple = "x86_64-unknown-linux-gnu"
100 define internal ptr @_Z3barv() #0 {
102 %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5
106 declare ptr @_Znam(i64)
108 define internal ptr @_Z3bazv() #0 {
110 %call.i = call ptr @_Znam(i64 0), !memprof !0, !callsite !6
114 define internal ptr @_Z3foov() #0 {
116 %call.i = call ptr @_Z3barv(), !callsite !7
120 define i32 @main() #0 {
122 %call = call ptr @_Z3foov(), !callsite !8
123 %call1 = call ptr @_Z3foov(), !callsite !9
127 declare void @_ZdaPv()
131 attributes #0 = { noinline optnone }
134 !1 = !{!2, !"notcold"}
135 !2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
137 !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
138 !5 = !{i64 9086428284934609951}
139 !6 = !{i64 9086428284934609951, i64 -5964873800580613432}
140 !7 = !{i64 -5964873800580613432, i64 2732490490862098848}
141 !8 = !{i64 8632435727821051414}
142 !9 = !{i64 -3421689549917153178}
145 ; DUMP: CCG before cloning:
146 ; DUMP: Callsite Context Graph:
148 ; DUMP: Node [[BAZ:0x[a-z0-9]+]]
149 ; DUMP: Versions: 1 MIB:
150 ; DUMP: AllocType 1 StackIds: 1, 2
151 ; DUMP: AllocType 2 StackIds: 1, 3
153 ; DUMP: AllocTypes: NotColdCold
154 ; DUMP: ContextIds: 1 2
157 ; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
159 ;; This is leftover from the MIB on the alloc inlined into baz. It is not
160 ;; matched with any call, since there is no such node in the IR. Due to the
161 ;; null call it will not participate in any context transformations.
162 ; DUMP: Node [[FOO2]]
164 ; DUMP: AllocTypes: NotColdCold
165 ; DUMP: ContextIds: 1 2
167 ; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 1 2
169 ; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
170 ; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
172 ; DUMP: Node [[MAIN1]]
173 ; DUMP: Callee: 644169328058379925 (_Z3foov) Clones: 0 StackIds: 2 (clone 0)
174 ; DUMP: AllocTypes: NotCold
175 ; DUMP: ContextIds: 1 3
177 ; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
178 ; DUMP: Edge from Callee [[FOO:0x[a-z0-9]+]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
181 ; DUMP: Node [[MAIN2]]
182 ; DUMP: Callee: 644169328058379925 (_Z3foov) Clones: 0 StackIds: 3 (clone 0)
183 ; DUMP: AllocTypes: Cold
184 ; DUMP: ContextIds: 2 4
186 ; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
187 ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
190 ; DUMP: Node [[BAR:0x[a-z0-9]+]]
191 ; DUMP: Versions: 1 MIB:
192 ; DUMP: AllocType 1 StackIds: 0, 1, 2
193 ; DUMP: AllocType 2 StackIds: 0, 1, 3
195 ; DUMP: AllocTypes: NotColdCold
196 ; DUMP: ContextIds: 3 4
199 ; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 3 4
201 ;; This is the node synthesized for the call to bar in foo that was created
202 ;; by inlining baz into foo.
204 ; DUMP: Callee: 10349908617508457487 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0)
205 ; DUMP: AllocTypes: NotColdCold
206 ; DUMP: ContextIds: 3 4
208 ; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 3 4
210 ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
211 ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
213 ; DUMP: CCG after cloning:
214 ; DUMP: Callsite Context Graph:
216 ; DUMP: Versions: 1 MIB:
217 ; DUMP: AllocType 1 StackIds: 1, 2
218 ; DUMP: AllocType 2 StackIds: 1, 3
220 ; DUMP: AllocTypes: NotColdCold
221 ; DUMP: ContextIds: 1 2
224 ; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 1 2
226 ; DUMP: Node [[FOO2]]
228 ; DUMP: AllocTypes: NotColdCold
229 ; DUMP: ContextIds: 1 2
231 ; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 1 2
233 ; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
234 ; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
236 ; DUMP: Node [[MAIN1]]
237 ; DUMP: Callee: 644169328058379925 (_Z3foov) Clones: 0 StackIds: 2 (clone 0)
238 ; DUMP: AllocTypes: NotCold
239 ; DUMP: ContextIds: 1 3
241 ; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
242 ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
245 ; DUMP: Node [[MAIN2]]
246 ; DUMP: Callee: 644169328058379925 (_Z3foov) Clones: 0 StackIds: 3 (clone 0)
247 ; DUMP: AllocTypes: Cold
248 ; DUMP: ContextIds: 2 4
250 ; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
251 ; DUMP: Edge from Callee [[FOO3:0x[a-z0-9]+]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
255 ; DUMP: Versions: 1 MIB:
256 ; DUMP: AllocType 1 StackIds: 0, 1, 2
257 ; DUMP: AllocType 2 StackIds: 0, 1, 3
259 ; DUMP: AllocTypes: NotCold
260 ; DUMP: ContextIds: 3
263 ; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 3
264 ; DUMP: Clones: [[BAR2:0x[a-z0-9]+]]
267 ; DUMP: Callee: 10349908617508457487 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0)
268 ; DUMP: AllocTypes: NotCold
269 ; DUMP: ContextIds: 3
271 ; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 3
273 ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
274 ; DUMP: Clones: [[FOO3]]
276 ; DUMP: Node [[FOO3]]
277 ; DUMP: Callee: 10349908617508457487 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0)
278 ; DUMP: AllocTypes: Cold
279 ; DUMP: ContextIds: 4
281 ; DUMP: Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 4
283 ; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
284 ; DUMP: Clone of [[FOO]]
286 ; DUMP: Node [[BAR2]]
287 ; DUMP: Versions: 1 MIB:
288 ; DUMP: AllocType 1 StackIds: 0, 1, 2
289 ; DUMP: AllocType 2 StackIds: 0, 1, 3
291 ; DUMP: AllocTypes: Cold
292 ; DUMP: ContextIds: 4
295 ; DUMP: Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 4
296 ; DUMP: Clone of [[BAR]]
299 ; REMARKS: created clone _Z3barv.memprof.1
300 ; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
301 ; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold
302 ; REMARKS: created clone _Z3foov.memprof.1
303 ; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3barv.memprof.1
304 ; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
307 ; IR: define internal {{.*}} @_Z3barv()
308 ; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
309 ; IR: define internal {{.*}} @_Z3foov()
310 ; IR: call {{.*}} @_Z3barv()
311 ; IR: define {{.*}} @main()
312 ;; The first call to foo does not allocate cold memory. It should call the
313 ;; original functions, which ultimately call the original allocation decorated
314 ;; with a "notcold" attribute.
315 ; IR: call {{.*}} @_Z3foov()
316 ;; The second call to foo allocates cold memory. It should call cloned functions
317 ;; which ultimately call a cloned allocation decorated with a "cold" attribute.
318 ; IR: call {{.*}} @_Z3foov.memprof.1()
319 ; IR: define internal {{.*}} @_Z3barv.memprof.1()
320 ; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
321 ; IR: define internal {{.*}} @_Z3foov.memprof.1()
322 ; IR: call {{.*}} @_Z3barv.memprof.1()
323 ; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
324 ; IR: attributes #[[COLD]] = { "memprof"="cold" }
327 ; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
328 ; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
329 ; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
330 ; STATS-BE: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
331 ; STATS-INPROCESS-BE: 2 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend
332 ;; The distributed backend hasn't yet eliminated the now-dead baz with
333 ;; the allocation from bar inlined, so it has one more allocation.
334 ; STATS-DISTRIB-BE: 3 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend
335 ; STATS: 2 memprof-context-disambiguation - Number of function clones created during whole program analysis
336 ; STATS-BE: 2 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
337 ; STATS-BE: 2 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend
338 ; STATS-BE: 2 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend
339 ; STATS-INPROCESS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend
340 ;; The distributed backend hasn't yet eliminated the now-dead baz with
341 ;; the allocation from bar inlined, so it has one more allocation.
342 ; STATS-DISTRIB-BE: 2 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend
345 ; DOT: digraph "postbuild" {
346 ; DOT: label="postbuild";
347 ; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3bazv -\> alloc}"];
348 ; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"];
349 ; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
350 ; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
351 ; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"];
352 ; DOT: Node[[MAIN1]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="brown1"];
353 ; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
354 ; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"];
355 ; DOT: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 4",fillcolor="cyan"];
356 ; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3barv -\> alloc}"];
357 ; DOT: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
358 ; DOT: Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"];
362 ; DOTCLONED: digraph "cloned" {
363 ; DOTCLONED: label="cloned";
364 ; DOTCLONED: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3bazv -\> alloc}"];
365 ; DOTCLONED: Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"];
366 ; DOTCLONED: Node[[FOO2]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
367 ; DOTCLONED: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
368 ; DOTCLONED: Node[[MAIN1]] -> Node[[FOO2]][tooltip="ContextIds: 1",fillcolor="brown1"];
369 ; DOTCLONED: Node[[MAIN1]] -> Node[[FOO:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="brown1"];
370 ; DOTCLONED: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
371 ; DOTCLONED: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 2",fillcolor="cyan"];
372 ; DOTCLONED: Node[[MAIN2]] -> Node[[FOO3:0x[a-z0-9]+]][tooltip="ContextIds: 4",fillcolor="cyan"];
373 ; DOTCLONED: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3barv -\> alloc}"];
374 ; DOTCLONED: Node[[FOO]] [shape=record,tooltip="N[[FOO]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
375 ; DOTCLONED: Node[[FOO]] -> Node[[BAR]][tooltip="ContextIds: 3",fillcolor="brown1"];
376 ; DOTCLONED: Node[[FOO3]] [shape=record,tooltip="N[[FOO3]] ContextIds: 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
377 ; DOTCLONED: Node[[FOO3]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 4",fillcolor="cyan"];
378 ; DOTCLONED: Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"];