1 ;; Test callsite context graph generation for call graph with with MIBs
2 ;; that have pruned contexts that partially match multiple inlined
3 ;; callsite contexts, requiring duplication of context ids and nodes
4 ;; while matching callsite nodes onto the graph. Also tests graph and IR
7 ;; Original code looks like:
10 ;; return new char[10];
28 ;; int main(int argc, char **argv) {
29 ;; char *x = B(); // cold
30 ;; char *y = E(); // cold
31 ;; char *z = F(); // default
42 ;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
43 ;; memory freed after sleep(10) results in cold lifetimes.
45 ;; The code below was created by forcing inlining of C into both B and E.
46 ;; Since both allocation contexts via C are cold, the matched memprof
47 ;; metadata has the context pruned above C's callsite. This requires
48 ;; matching the stack node for C to callsites where it was inlined (i.e.
49 ;; the callsites in B and E that have callsite metadata that includes C's).
50 ;; It also requires duplication of that node in the graph as well as the
51 ;; duplication of the context ids along that path through the graph,
52 ;; so that we can represent the duplicated (via inlining) C callsite.
54 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
56 ;; -stats requires asserts
59 ; RUN: opt -thinlto-bc %s >%t.o
60 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
61 ; RUN: -supports-hot-cold-new \
62 ; RUN: -r=%t.o,main,plx \
63 ; RUN: -r=%t.o,_ZdaPv, \
64 ; RUN: -r=%t.o,sleep, \
65 ; RUN: -r=%t.o,_Znam, \
66 ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
67 ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
68 ; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \
69 ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
70 ; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
72 ; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
73 ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
74 ;; We should clone D once for the cold allocations via C.
75 ; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
77 ; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
80 ;; Try again but with distributed ThinLTO
81 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
82 ; RUN: -supports-hot-cold-new \
83 ; RUN: -thinlto-distributed-indexes \
84 ; RUN: -r=%t.o,main,plx \
85 ; RUN: -r=%t.o,_ZdaPv, \
86 ; RUN: -r=%t.o,sleep, \
87 ; RUN: -r=%t.o,_Znam, \
88 ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
89 ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \
90 ; RUN: -stats -pass-remarks=memprof-context-disambiguation \
91 ; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
92 ; RUN: --check-prefix=STATS
94 ; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
95 ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
96 ;; We should clone D once for the cold allocations via C.
97 ; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
99 ;; Check distributed index
100 ; RUN: llvm-dis %t.o.thinlto.bc -o - | FileCheck %s --check-prefix=DISTRIB
102 ;; Run ThinLTO backend
103 ; RUN: opt -passes=memprof-context-disambiguation \
104 ; RUN: -memprof-import-summary=%t.o.thinlto.bc \
105 ; RUN: -stats -pass-remarks=memprof-context-disambiguation \
106 ; RUN: %t.o -S 2>&1 | FileCheck %s --check-prefix=IR \
107 ; RUN: --check-prefix=STATS-BE --check-prefix=REMARKS
109 source_filename = "duplicate-context-ids.ll"
110 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
111 target triple = "x86_64-unknown-linux-gnu"
113 define internal ptr @_Z1Dv() #0 {
115 %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5
119 declare ptr @_Znam(i64)
121 define internal ptr @_Z1Fv() #0 {
123 %call = call ptr @_Z1Dv(), !callsite !6
127 define internal ptr @_Z1Cv() #0 {
129 %call = call ptr @_Z1Dv(), !callsite !7
133 define internal ptr @_Z1Bv() #0 {
135 %call.i = call ptr @_Z1Dv(), !callsite !8
139 define internal ptr @_Z1Ev() #0 {
141 %call.i = call ptr @_Z1Dv(), !callsite !9
145 define i32 @main() #0 {
153 declare void @_ZdaPv()
157 attributes #0 = { noinline optnone}
161 !2 = !{i64 6541423618768552252, i64 -6270142974039008131}
162 !3 = !{!4, !"notcold"}
163 !4 = !{i64 6541423618768552252, i64 -4903163940066524832}
164 !5 = !{i64 6541423618768552252}
165 !6 = !{i64 -4903163940066524832}
166 !7 = !{i64 -6270142974039008131}
167 !8 = !{i64 -6270142974039008131, i64 -184525619819294889}
168 !9 = !{i64 -6270142974039008131, i64 1905834578520680781}
171 ;; After adding only the alloc node memprof metadata, we only have 2 contexts.
173 ; DUMP: CCG before updating call stack chains:
174 ; DUMP: Callsite Context Graph:
175 ; DUMP: Node [[D:0x[a-z0-9]+]]
176 ; DUMP: Versions: 1 MIB:
177 ; DUMP: AllocType 2 StackIds: 0
178 ; DUMP: AllocType 1 StackIds: 1
180 ; DUMP: AllocTypes: NotColdCold
181 ; DUMP: ContextIds: 1 2
184 ; DUMP: Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
185 ; DUMP: Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
189 ; DUMP: AllocTypes: Cold
190 ; DUMP: ContextIds: 1
192 ; DUMP: Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1
197 ; DUMP: AllocTypes: NotCold
198 ; DUMP: ContextIds: 2
200 ; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
203 ;; After updating for callsite metadata, we should have generated context ids 3 and 4,
204 ;; along with 2 new nodes for those callsites. All have the same allocation type
205 ;; behavior as the original C node.
207 ; DUMP: CCG before cloning:
208 ; DUMP: Callsite Context Graph:
210 ; DUMP: Versions: 1 MIB:
211 ; DUMP: AllocType 2 StackIds: 0
212 ; DUMP: AllocType 1 StackIds: 1
214 ; DUMP: AllocTypes: NotColdCold
215 ; DUMP: ContextIds: 1 2 3 4
218 ; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
219 ; DUMP: Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
220 ; DUMP: Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
221 ; DUMP: Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
224 ; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 1 (clone 0)
225 ; DUMP: AllocTypes: NotCold
226 ; DUMP: ContextIds: 2
228 ; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
232 ; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0 (clone 0)
233 ; DUMP: AllocTypes: Cold
234 ; DUMP: ContextIds: 3
236 ; DUMP: Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
240 ; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 2 (clone 0)
241 ; DUMP: AllocTypes: Cold
242 ; DUMP: ContextIds: 4
244 ; DUMP: Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
248 ; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 3 (clone 0)
249 ; DUMP: AllocTypes: Cold
250 ; DUMP: ContextIds: 1
252 ; DUMP: Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
256 ; DUMP: CCG after cloning:
257 ; DUMP: Callsite Context Graph:
259 ; DUMP: Versions: 1 MIB:
260 ; DUMP: AllocType 2 StackIds: 0
261 ; DUMP: AllocType 1 StackIds: 1
263 ; DUMP: AllocTypes: NotCold
264 ; DUMP: ContextIds: 2
267 ; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
268 ; DUMP: Clones: [[D2:0x[a-z0-9]+]]
271 ; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 1 (clone 0)
272 ; DUMP: AllocTypes: NotCold
273 ; DUMP: ContextIds: 2
275 ; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
279 ; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0 (clone 0)
280 ; DUMP: AllocTypes: Cold
281 ; DUMP: ContextIds: 3
283 ; DUMP: Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
287 ; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 2 (clone 0)
288 ; DUMP: AllocTypes: Cold
289 ; DUMP: ContextIds: 4
291 ; DUMP: Edge from Callee [[D2]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
295 ; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 3 (clone 0)
296 ; DUMP: AllocTypes: Cold
297 ; DUMP: ContextIds: 1
299 ; DUMP: Edge from Callee [[D2]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
303 ; DUMP: Versions: 1 MIB:
304 ; DUMP: AllocType 2 StackIds: 0
305 ; DUMP: AllocType 1 StackIds: 1
307 ; DUMP: AllocTypes: Cold
308 ; DUMP: ContextIds: 1 3 4
311 ; DUMP: Edge from Callee [[D2]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
312 ; DUMP: Edge from Callee [[D2]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
313 ; DUMP: Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
314 ; DUMP: Clone of [[D]]
316 ; REMARKS: created clone _Z1Dv.memprof.1
317 ; REMARKS: call in clone _Z1Dv marked with memprof allocation attribute notcold
318 ; REMARKS: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold
319 ; REMARKS: call in clone _Z1Bv assigned to call function clone _Z1Dv.memprof.1
320 ; REMARKS: call in clone _Z1Ev assigned to call function clone _Z1Dv.memprof.1
323 ;; The allocation via F does not allocate cold memory. It should call the
324 ;; original D, which ultimately call the original allocation decorated
325 ;; with a "notcold" attribute.
326 ; IR: define internal {{.*}} @_Z1Dv()
327 ; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
328 ; IR: define internal {{.*}} @_Z1Fv()
329 ; IR: call {{.*}} @_Z1Dv()
330 ;; The allocations via B and E allocate cold memory. They should call the
331 ;; cloned D, which ultimately call the cloned allocation decorated with a
333 ; IR: define internal {{.*}} @_Z1Bv()
334 ; IR: call {{.*}} @_Z1Dv.memprof.1()
335 ; IR: define internal {{.*}} @_Z1Ev()
336 ; IR: call {{.*}} @_Z1Dv.memprof.1()
337 ; IR: define internal {{.*}} @_Z1Dv.memprof.1()
338 ; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
339 ; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
340 ; IR: attributes #[[COLD]] = { "memprof"="cold" }
343 ; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
344 ; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
345 ; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
346 ; STATS-BE: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
347 ; STATS-BE: 2 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend
348 ; STATS: 1 memprof-context-disambiguation - Number of function clones created during whole program analysis
349 ; STATS-BE: 1 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
350 ; STATS-BE: 1 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend
351 ; STATS-BE: 2 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend
352 ; STATS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend
355 ; DOTPRE: digraph "prestackupdate" {
356 ; DOTPRE: label="prestackupdate";
357 ; DOTPRE: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
358 ; DOTPRE: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"];
359 ; DOTPRE: Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
360 ; DOTPRE: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"];
361 ; DOTPRE: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
365 ; DOTPOST:digraph "postbuild" {
366 ; DOTPOST: label="postbuild";
367 ; DOTPOST: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
368 ; DOTPOST: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
369 ; DOTPOST: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
370 ; DOTPOST: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
371 ; DOTPOST: Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"];
372 ; DOTPOST: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
373 ; DOTPOST: Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"];
374 ; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
375 ; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
379 ; DOTCLONED: digraph "cloned" {
380 ; DOTCLONED: label="cloned";
381 ; DOTCLONED: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
382 ; DOTCLONED: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
383 ; DOTCLONED: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
384 ; DOTCLONED: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
385 ; DOTCLONED: Node[[C]] -> Node[[D2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="cyan"];
386 ; DOTCLONED: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
387 ; DOTCLONED: Node[[B]] -> Node[[D2]][tooltip="ContextIds: 4",fillcolor="cyan"];
388 ; DOTCLONED: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
389 ; DOTCLONED: Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"];
390 ; DOTCLONED: Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
393 ; DISTRIB: ^[[C:[0-9]+]] = gv: (guid: 1643923691937891493, {{.*}} callsites: ((callee: ^[[D:[0-9]+]], clones: (1)
394 ; DISTRIB: ^[[D]] = gv: (guid: 4881081444663423788, {{.*}} allocs: ((versions: (notcold, cold)
395 ; DISTRIB: ^[[B:[0-9]+]] = gv: (guid: 14590037969532473829, {{.*}} callsites: ((callee: ^[[D]], clones: (1)
396 ; DISTRIB: ^[[F:[0-9]+]] = gv: (guid: 17035303613541779335, {{.*}} callsites: ((callee: ^[[D]], clones: (0)
397 ; DISTRIB: ^[[E:[0-9]+]] = gv: (guid: 17820708772846654376, {{.*}} callsites: ((callee: ^[[D]], clones: (1)