1 ;; Test callsite context graph generation for call graph with with MIBs
2 ;; that have pruned contexts that partially match multiple inlined
3 ;; callsite contexts, requiring duplication of context ids and nodes
4 ;; while matching callsite nodes onto the graph. This test requires more
5 ;; complex duplication due to multiple contexts for different allocations
6 ;; that share some of the same callsite nodes.
8 ;; Original code looks like:
10 ;; char *D(bool Call1) {
12 ;; return new char[10];
14 ;; return new char[10];
17 ;; char *C(bool Call1) {
21 ;; char *B(bool Call1) {
28 ;; char *A(bool Call1) {
56 ;; int main(int argc, char **argv) {
57 ;; char *a1 = A1(); // cold
58 ;; char *a2 = A2(); // cold
59 ;; char *e = E(); // default
60 ;; char *a3 = A3(); // default
61 ;; char *a4 = A4(); // default
62 ;; char *f = F(); // cold
79 ;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
80 ;; memory freed after sleep(10) results in cold lifetimes.
82 ;; The code below was created by forcing inlining of A into its callers,
83 ;; without any other inlining or optimizations. Since both allocation contexts
84 ;; via A for each allocation in D have the same allocation type (cold via
85 ;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second
86 ;; new in D, the contexts for those respective allocations are pruned above A.
87 ;; The allocations via E and F are to ensure we don't prune above B.
89 ;; The matching onto the inlined A[1234]->A sequences will require duplication
90 ;; of the context id assigned to the context from A for each allocation in D.
91 ;; This test ensures that we do this correctly in the presence of callsites
92 ;; shared by the different duplicated context ids (i.e. callsite in C).
94 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
96 ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
97 ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
98 ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
99 ; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
102 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
103 target triple = "x86_64-unknown-linux-gnu"
105 ; Function Attrs: mustprogress noinline uwtable
106 define ptr @_Z1Db(i1 %Call1) #0 {
108 %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5
111 if.else: ; No predecessors!
112 %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !6, !callsite !11
115 return: ; preds = %if.else, %entry
119 ; Function Attrs: nobuiltin
120 declare ptr @_Znam(i64) #1
122 define ptr @_Z1Cb(i1 %Call1) {
124 %tobool = trunc i8 0 to i1
125 %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool), !callsite !12
129 ; Function Attrs: mustprogress noinline uwtable
130 define ptr @_Z1Bb(i1 %Call1) #0 {
132 %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true), !callsite !13
135 if.else: ; No predecessors!
136 %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false), !callsite !14
139 return: ; preds = %if.else, %entry
143 define ptr @_Z1Ab(i1 %tobool) #2 {
145 %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool), !callsite !15
149 ; Function Attrs: mustprogress noinline uwtable
150 define ptr @_Z2A1v(i1 %tobool.i) #0 {
152 %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !16
156 ; Function Attrs: mustprogress noinline uwtable
157 define ptr @_Z2A2v(i1 %tobool.i) #0 {
159 %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !17
163 ; Function Attrs: mustprogress noinline uwtable
164 define ptr @_Z2A3v(i1 %tobool.i) #0 {
166 %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !18
170 ; Function Attrs: mustprogress noinline uwtable
171 define ptr @_Z2A4v(i1 %tobool.i) #0 {
173 %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !19
177 ; Function Attrs: mustprogress noinline uwtable
178 define ptr @_Z1Ev() #0 {
180 %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true), !callsite !20
184 ; Function Attrs: mustprogress noinline uwtable
185 define ptr @_Z1Fv() #0 {
187 %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false), !callsite !21
191 ; Function Attrs: noinline
192 declare i32 @main() #3
194 ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
195 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
197 declare void @_ZdaPv() #5
199 declare i32 @sleep() #6
201 ; uselistorder directives
202 uselistorder ptr @_Znam, { 1, 0 }
204 attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
205 attributes #1 = { nobuiltin }
206 attributes #2 = { "tune-cpu"="generic" }
207 attributes #3 = { noinline }
208 attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
209 attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
210 attributes #6 = { "disable-tail-calls"="true" }
211 attributes #7 = { builtin allocsize(0) }
214 !1 = !{!2, !"notcold"}
215 !2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781}
217 !4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978}
218 !5 = !{i64 4854880825882961848}
220 !7 = !{!8, !"notcold"}
221 !8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978}
223 !10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832}
224 !11 = !{i64 -8775068539491628272}
225 !12 = !{i64 -904694911315397047}
226 !13 = !{i64 6532298921261778285}
227 !14 = !{i64 7859682663773658275}
228 !15 = !{i64 -6528110295079665978}
229 !16 = !{i64 -6528110295079665978, i64 5747919905719679568}
230 !17 = !{i64 -6528110295079665978, i64 -5753238080028016843}
231 !18 = !{i64 -6528110295079665978, i64 1794685869326395337}
232 !19 = !{i64 -6528110295079665978, i64 5462047985461644151}
233 !20 = !{i64 1905834578520680781}
234 !21 = !{i64 -4903163940066524832}
237 ;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only
238 ;; match the interesting parts of the pre-update graph here).
240 ; DUMP: CCG before updating call stack chains:
241 ; DUMP: Callsite Context Graph:
243 ; DUMP: Node [[D1:0x[a-z0-9]+]]
244 ; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
245 ; DUMP: AllocTypes: NotColdCold
246 ; DUMP: ContextIds: 1 2
248 ; DUMP: Node [[C:0x[a-z0-9]+]]
250 ; DUMP: AllocTypes: NotColdCold
251 ; DUMP: ContextIds: 1 2 3 4
253 ; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2
254 ; DUMP: Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4
257 ; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
258 ; DUMP: AllocTypes: NotColdCold
259 ; DUMP: ContextIds: 3 4
262 ;; After updating for callsite metadata, we should have duplicated the context
263 ;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A,
264 ;; and used those on new nodes for those callers. Note that while in reality
265 ;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4,
266 ;; due to the pruning we have lost this information and thus end up duplicating
267 ;; both of A's contexts to all of the new nodes (which could result in some
268 ;; unnecessary cloning.
270 ; DUMP: CCG before cloning:
271 ; DUMP: Callsite Context Graph:
273 ; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
274 ; DUMP: AllocTypes: NotColdCold
275 ; DUMP: ContextIds: 1 2 5 7 9 11
278 ; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
281 ; DUMP: %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool) (clone 0)
282 ; DUMP: AllocTypes: NotColdCold
283 ; DUMP: ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12
285 ; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
286 ; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
288 ; DUMP: Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
289 ; DUMP: Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
292 ; DUMP: %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true) (clone 0)
293 ; DUMP: AllocTypes: NotColdCold
294 ; DUMP: ContextIds: 1 2 5 7 9 11
296 ; DUMP: Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
298 ; DUMP: Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
299 ; DUMP: Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5
300 ; DUMP: Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7
301 ; DUMP: Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9
302 ; DUMP: Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11
303 ; DUMP: Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
306 ; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true) (clone 0)
307 ; DUMP: AllocTypes: NotCold
308 ; DUMP: ContextIds: 1
310 ; DUMP: Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1
314 ; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
315 ; DUMP: AllocTypes: NotColdCold
316 ; DUMP: ContextIds: 3 4 6 8 10 12
319 ; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
322 ; DUMP: %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false) (clone 0)
323 ; DUMP: AllocTypes: NotColdCold
324 ; DUMP: ContextIds: 3 4 6 8 10 12
326 ; DUMP: Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
328 ; DUMP: Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
329 ; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6
330 ; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8
331 ; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10
332 ; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12
333 ; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3
336 ; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false) (clone 0)
337 ; DUMP: AllocTypes: Cold
338 ; DUMP: ContextIds: 4
340 ; DUMP: Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4
344 ; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool) (clone 0)
345 ; DUMP: AllocTypes: NotColdCold
346 ; DUMP: ContextIds: 5 6
348 ; DUMP: Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5
349 ; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6
353 ; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0)
354 ; DUMP: AllocTypes: NotColdCold
355 ; DUMP: ContextIds: 7 8
357 ; DUMP: Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7
358 ; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8
362 ; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0)
363 ; DUMP: AllocTypes: NotColdCold
364 ; DUMP: ContextIds: 9 10
366 ; DUMP: Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9
367 ; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10
371 ; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0)
372 ; DUMP: AllocTypes: NotColdCold
373 ; DUMP: ContextIds: 11 12
375 ; DUMP: Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11
376 ; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12
380 ; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0)
381 ; DUMP: AllocTypes: NotColdCold
382 ; DUMP: ContextIds: 2 3
384 ; DUMP: Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2
385 ; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3