1 ;; Test callsite context graph generation for call graph with with MIBs
2 ;; that have pruned contexts that partially match multiple inlined
3 ;; callsite contexts, requiring duplication of context ids and nodes
4 ;; while matching callsite nodes onto the graph. Also tests graph and IR
7 ;; Original code looks like:
10 ;; return new char[10];
28 ;; int main(int argc, char **argv) {
29 ;; char *x = B(); // cold
30 ;; char *y = E(); // cold
31 ;; char *z = F(); // default
42 ;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
43 ;; memory freed after sleep(10) results in cold lifetimes.
45 ;; The code below was created by forcing inlining of C into both B and E.
46 ;; Since both allocation contexts via C are cold, the matched memprof
47 ;; metadata has the context pruned above C's callsite. This requires
48 ;; matching the stack node for C to callsites where it was inlined (i.e.
49 ;; the callsites in B and E that have callsite metadata that includes C's).
50 ;; It also requires duplication of that node in the graph as well as the
51 ;; duplication of the context ids along that path through the graph,
52 ;; so that we can represent the duplicated (via inlining) C callsite.
54 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
56 ;; -stats requires asserts
59 ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
60 ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
61 ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
62 ; RUN: -stats -pass-remarks=memprof-context-disambiguation \
63 ; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
64 ; RUN: --check-prefix=STATS --check-prefix=REMARKS
66 ; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
67 ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
68 ;; We should clone D once for the cold allocations via C.
69 ; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
71 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
72 target triple = "x86_64-unknown-linux-gnu"
74 define internal ptr @_Z1Dv() {
76 %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !5
80 declare ptr @_Znam(i64)
82 define internal ptr @_Z1Fv() #0 {
84 %call = call noundef ptr @_Z1Dv(), !callsite !6
88 ; Function Attrs: mustprogress noinline optnone uwtable
89 define internal ptr @_Z1Cv() #1 {
91 %call = call noundef ptr @_Z1Dv(), !callsite !7
95 ; Function Attrs: mustprogress noinline optnone uwtable
96 define internal ptr @_Z1Bv() #1 {
98 %call.i = call noundef ptr @_Z1Dv(), !callsite !8
102 ; Function Attrs: mustprogress noinline optnone uwtable
103 define internal ptr @_Z1Ev() #1 {
105 %call.i = call noundef ptr @_Z1Dv(), !callsite !9
109 ; Function Attrs: noinline
110 declare i32 @main() #2
112 ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
113 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
115 ; Function Attrs: nounwind
116 declare void @_ZdaPv() #4
118 declare i32 @sleep() #5
120 attributes #0 = { "disable-tail-calls"="true" }
121 attributes #1 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
122 attributes #2 = { noinline }
123 attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) }
124 attributes #4 = { nounwind }
125 attributes #5 = { "no-trapping-math"="true" }
126 attributes #6 = { builtin }
130 !2 = !{i64 6541423618768552252, i64 -6270142974039008131}
131 !3 = !{!4, !"notcold"}
132 !4 = !{i64 6541423618768552252, i64 -4903163940066524832}
133 !5 = !{i64 6541423618768552252}
134 !6 = !{i64 -4903163940066524832}
135 !7 = !{i64 -6270142974039008131}
136 !8 = !{i64 -6270142974039008131, i64 -184525619819294889}
137 !9 = !{i64 -6270142974039008131, i64 1905834578520680781}
140 ;; After adding only the alloc node memprof metadata, we only have 2 contexts.
142 ; DUMP: CCG before updating call stack chains:
143 ; DUMP: Callsite Context Graph:
144 ; DUMP: Node [[D:0x[a-z0-9]+]]
145 ; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
146 ; DUMP: AllocTypes: NotColdCold
147 ; DUMP: ContextIds: 1 2
150 ; DUMP: Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
151 ; DUMP: Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
155 ; DUMP: AllocTypes: Cold
156 ; DUMP: ContextIds: 1
158 ; DUMP: Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1
163 ; DUMP: AllocTypes: NotCold
164 ; DUMP: ContextIds: 2
166 ; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
169 ;; After updating for callsite metadata, we should have generated context ids 3 and 4,
170 ;; along with 2 new nodes for those callsites. All have the same allocation type
171 ;; behavior as the original C node.
173 ; DUMP: CCG before cloning:
174 ; DUMP: Callsite Context Graph:
176 ; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
177 ; DUMP: AllocTypes: NotColdCold
178 ; DUMP: ContextIds: 1 2 3 4
181 ; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
182 ; DUMP: Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
183 ; DUMP: Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
184 ; DUMP: Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
187 ; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0)
188 ; DUMP: AllocTypes: NotCold
189 ; DUMP: ContextIds: 2
191 ; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
195 ; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0)
196 ; DUMP: AllocTypes: Cold
197 ; DUMP: ContextIds: 3
199 ; DUMP: Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
203 ; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0)
204 ; DUMP: AllocTypes: Cold
205 ; DUMP: ContextIds: 4
207 ; DUMP: Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
211 ; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0)
212 ; DUMP: AllocTypes: Cold
213 ; DUMP: ContextIds: 1
215 ; DUMP: Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
218 ; DUMP: CCG after cloning:
219 ; DUMP: Callsite Context Graph:
221 ; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
222 ; DUMP: AllocTypes: NotCold
223 ; DUMP: ContextIds: 2
226 ; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
227 ; DUMP: Clones: [[D2:0x[a-z0-9]+]]
230 ; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0)
231 ; DUMP: AllocTypes: NotCold
232 ; DUMP: ContextIds: 2
234 ; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
238 ; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0)
239 ; DUMP: AllocTypes: Cold
240 ; DUMP: ContextIds: 3
242 ; DUMP: Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
246 ; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0)
247 ; DUMP: AllocTypes: Cold
248 ; DUMP: ContextIds: 4
250 ; DUMP: Edge from Callee [[D2]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
254 ; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0)
255 ; DUMP: AllocTypes: Cold
256 ; DUMP: ContextIds: 1
258 ; DUMP: Edge from Callee [[D2]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
262 ; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
263 ; DUMP: AllocTypes: Cold
264 ; DUMP: ContextIds: 1 3 4
267 ; DUMP: Edge from Callee [[D2]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
268 ; DUMP: Edge from Callee [[D2]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
269 ; DUMP: Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
270 ; DUMP: Clone of [[D]]
272 ; REMARKS: created clone _Z1Dv.memprof.1
273 ; REMARKS: call in clone _Z1Ev assigned to call function clone _Z1Dv.memprof.1
274 ; REMARKS: call in clone _Z1Cv assigned to call function clone _Z1Dv.memprof.1
275 ; REMARKS: call in clone _Z1Bv assigned to call function clone _Z1Dv.memprof.1
276 ; REMARKS: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold
277 ; REMARKS: call in clone _Z1Fv assigned to call function clone _Z1Dv
278 ; REMARKS: call in clone _Z1Dv marked with memprof allocation attribute notcold
281 ;; The allocation via F does not allocate cold memory. It should call the
282 ;; original D, which ultimately call the original allocation decorated
283 ;; with a "notcold" attribute.
284 ; IR: define internal {{.*}} @_Z1Dv()
285 ; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
286 ; IR: define internal {{.*}} @_Z1Fv()
287 ; IR: call {{.*}} @_Z1Dv()
288 ;; The allocations via B and E allocate cold memory. They should call the
289 ;; cloned D, which ultimately call the cloned allocation decorated with a
291 ; IR: define internal {{.*}} @_Z1Bv()
292 ; IR: call {{.*}} @_Z1Dv.memprof.1()
293 ; IR: define internal {{.*}} @_Z1Ev()
294 ; IR: call {{.*}} @_Z1Dv.memprof.1()
295 ; IR: define internal {{.*}} @_Z1Dv.memprof.1()
296 ; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
297 ; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
298 ; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
301 ; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
302 ; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
303 ; STATS: 1 memprof-context-disambiguation - Number of function clones created during whole program analysis
306 ; DOTPRE: digraph "prestackupdate" {
307 ; DOTPRE: label="prestackupdate";
308 ; DOTPRE: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
309 ; DOTPRE: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"];
310 ; DOTPRE: Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
311 ; DOTPRE: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"];
312 ; DOTPRE: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
316 ; DOTPOST:digraph "postbuild" {
317 ; DOTPOST: label="postbuild";
318 ; DOTPOST: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
319 ; DOTPOST: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
320 ; DOTPOST: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
321 ; DOTPOST: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
322 ; DOTPOST: Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"];
323 ; DOTPOST: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
324 ; DOTPOST: Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"];
325 ; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
326 ; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
330 ; DOTCLONED: digraph "cloned" {
331 ; DOTCLONED: label="cloned";
332 ; DOTCLONED: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
333 ; DOTCLONED: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
334 ; DOTCLONED: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
335 ; DOTCLONED: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
336 ; DOTCLONED: Node[[C]] -> Node[[D2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="cyan"];
337 ; DOTCLONED: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
338 ; DOTCLONED: Node[[B]] -> Node[[D2]][tooltip="ContextIds: 4",fillcolor="cyan"];
339 ; DOTCLONED: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
340 ; DOTCLONED: Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"];
341 ; DOTCLONED: Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];