1 ;; Test callsite context graph generation for call graph with with MIBs
2 ;; that have pruned contexts that partially match multiple inlined
3 ;; callsite contexts, requiring duplication of context ids and nodes
4 ;; while matching callsite nodes onto the graph. Also tests graph and IR
7 ;; Original code looks like:
10 ;; return new char[10];
28 ;; int main(int argc, char **argv) {
29 ;; char *x = B(); // cold
30 ;; char *y = E(); // cold
31 ;; char *z = F(); // default
42 ;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
43 ;; memory freed after sleep(10) results in cold lifetimes.
45 ;; The code below was created by forcing inlining of C into both B and E.
46 ;; Since both allocation contexts via C are cold, the matched memprof
47 ;; metadata has the context pruned above C's callsite. This requires
48 ;; matching the stack node for C to callsites where it was inlined (i.e.
49 ;; the callsites in B and E that have callsite metadata that includes C's).
50 ;; It also requires duplication of that node in the graph as well as the
51 ;; duplication of the context ids along that path through the graph,
52 ;; so that we can represent the duplicated (via inlining) C callsite.
54 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
56 ;; -stats requires asserts
59 ; RUN: opt -thinlto-bc %s >%t.o
60 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
61 ; RUN: -supports-hot-cold-new \
62 ; RUN: -r=%t.o,main,plx \
63 ; RUN: -r=%t.o,_ZdaPv, \
64 ; RUN: -r=%t.o,sleep, \
65 ; RUN: -r=%t.o,_Znam, \
66 ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
67 ; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \
68 ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
69 ; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
71 ; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
74 ;; Try again but with distributed ThinLTO
75 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
76 ; RUN: -supports-hot-cold-new \
77 ; RUN: -thinlto-distributed-indexes \
78 ; RUN: -r=%t.o,main,plx \
79 ; RUN: -r=%t.o,_ZdaPv, \
80 ; RUN: -r=%t.o,sleep, \
81 ; RUN: -r=%t.o,_Znam, \
82 ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
83 ; RUN: -stats -pass-remarks=memprof-context-disambiguation \
84 ; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
85 ; RUN: --check-prefix=STATS
88 ;; Check distributed index
89 ; RUN: llvm-dis %t.o.thinlto.bc -o - | FileCheck %s --check-prefix=DISTRIB
91 ;; Run ThinLTO backend
92 ; RUN: opt -passes=memprof-context-disambiguation \
93 ; RUN: -memprof-import-summary=%t.o.thinlto.bc \
94 ; RUN: -stats -pass-remarks=memprof-context-disambiguation \
95 ; RUN: %t.o -S 2>&1 | FileCheck %s --check-prefix=IR \
96 ; RUN: --check-prefix=STATS-BE --check-prefix=REMARKS
98 source_filename = "duplicate-context-ids.ll"
99 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
100 target triple = "x86_64-unknown-linux-gnu"
102 define internal ptr @_Z1Dv() #0 {
104 %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5
108 declare ptr @_Znam(i64)
110 define internal ptr @_Z1Fv() #0 {
112 %call = call ptr @_Z1Dv(), !callsite !6
116 define internal ptr @_Z1Cv() #0 {
118 %call = call ptr @_Z1Dv(), !callsite !7
122 define internal ptr @_Z1Bv() #0 {
124 %call.i = call ptr @_Z1Dv(), !callsite !8
128 define internal ptr @_Z1Ev() #0 {
130 %call.i = call ptr @_Z1Dv(), !callsite !9
134 define i32 @main() #0 {
142 declare void @_ZdaPv()
146 attributes #0 = { noinline optnone}
150 !2 = !{i64 6541423618768552252, i64 -6270142974039008131}
151 !3 = !{!4, !"notcold"}
152 !4 = !{i64 6541423618768552252, i64 -4903163940066524832}
153 !5 = !{i64 6541423618768552252}
154 !6 = !{i64 -4903163940066524832}
155 !7 = !{i64 -6270142974039008131}
156 !8 = !{i64 -6270142974039008131, i64 -184525619819294889}
157 !9 = !{i64 -6270142974039008131, i64 1905834578520680781}
160 ;; After adding only the alloc node memprof metadata, we only have 2 contexts.
162 ; DUMP: CCG before updating call stack chains:
163 ; DUMP: Callsite Context Graph:
164 ; DUMP: Node [[D:0x[a-z0-9]+]]
165 ; DUMP: Versions: 1 MIB:
166 ; DUMP: AllocType 2 StackIds: 0
167 ; DUMP: AllocType 1 StackIds: 1
169 ; DUMP: AllocTypes: NotColdCold
170 ; DUMP: ContextIds: 1 2
173 ; DUMP: Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
174 ; DUMP: Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
176 ;; After updating for callsite metadata, we should have generated context ids 3 and 4,
177 ;; along with 2 new nodes for those callsites. All have the same allocation type
178 ;; behavior as the original C node.
180 ; DUMP: CCG before cloning:
181 ; DUMP: Callsite Context Graph:
183 ; DUMP: Versions: 1 MIB:
184 ; DUMP: AllocType 2 StackIds: 0
185 ; DUMP: AllocType 1 StackIds: 1
187 ; DUMP: AllocTypes: NotColdCold
188 ; DUMP: ContextIds: 1 2 3 4
191 ; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
192 ; DUMP: Edge from Callee [[D]] to Caller: [[C1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
193 ; DUMP: Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
194 ; DUMP: Edge from Callee [[D]] to Caller: [[C0:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
196 ; DUMP: CCG after cloning:
197 ; DUMP: Callsite Context Graph:
199 ; DUMP: Versions: 1 MIB:
200 ; DUMP: AllocType 2 StackIds: 0
201 ; DUMP: AllocType 1 StackIds: 1
203 ; DUMP: AllocTypes: NotCold
204 ; DUMP: ContextIds: 2
207 ; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
208 ; DUMP: Clones: [[D2:0x[a-z0-9]+]]
211 ; DUMP: Versions: 1 MIB:
212 ; DUMP: AllocType 2 StackIds: 0
213 ; DUMP: AllocType 1 StackIds: 1
215 ; DUMP: AllocTypes: Cold
216 ; DUMP: ContextIds: 1 3 4
219 ; DUMP: Edge from Callee [[D2]] to Caller: [[C0]] AllocTypes: Cold ContextIds: 1
220 ; DUMP: Edge from Callee [[D2]] to Caller: [[C1]] AllocTypes: Cold ContextIds: 3
221 ; DUMP: Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 4
222 ; DUMP: Clone of [[D]]
224 ; REMARKS: created clone _Z1Dv.memprof.1
225 ; REMARKS: call in clone _Z1Dv marked with memprof allocation attribute notcold
226 ; REMARKS: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold
227 ; REMARKS: call in clone _Z1Bv assigned to call function clone _Z1Dv.memprof.1
228 ; REMARKS: call in clone _Z1Ev assigned to call function clone _Z1Dv.memprof.1
231 ;; The allocation via F does not allocate cold memory. It should call the
232 ;; original D, which ultimately call the original allocation decorated
233 ;; with a "notcold" attribute.
234 ; IR: define internal {{.*}} @_Z1Dv()
235 ; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
236 ; IR: define internal {{.*}} @_Z1Fv()
237 ; IR: call {{.*}} @_Z1Dv()
238 ;; The allocations via B and E allocate cold memory. They should call the
239 ;; cloned D, which ultimately call the cloned allocation decorated with a
241 ; IR: define internal {{.*}} @_Z1Bv()
242 ; IR: call {{.*}} @_Z1Dv.memprof.1()
243 ; IR: define internal {{.*}} @_Z1Ev()
244 ; IR: call {{.*}} @_Z1Dv.memprof.1()
245 ; IR: define internal {{.*}} @_Z1Dv.memprof.1()
246 ; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
247 ; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
248 ; IR: attributes #[[COLD]] = { "memprof"="cold" }
251 ; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
252 ; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
253 ; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
254 ; STATS-BE: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
255 ; STATS-BE: 2 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend
256 ; STATS: 1 memprof-context-disambiguation - Number of function clones created during whole program analysis
257 ; STATS-BE: 1 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
258 ; STATS-BE: 1 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend
259 ; STATS-BE: 2 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend
260 ; STATS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend
263 ; DISTRIB: ^[[E:[0-9]+]] = gv: (guid: 331966645857188136, {{.*}} callsites: ((callee: ^[[D:[0-9]+]], clones: (1)
264 ; DISTRIB: ^[[D]] = gv: (guid: 11079124245221721799, {{.*}} allocs: ((versions: (notcold, cold)
265 ; DISTRIB: ^[[F:[0-9]+]] = gv: (guid: 11254287701717398916, {{.*}} callsites: ((callee: ^[[D]], clones: (0)
266 ; DISTRIB: ^[[B:[0-9]+]] = gv: (guid: 13579056193435805313, {{.*}} callsites: ((callee: ^[[D]], clones: (1)
267 ; DISTRIB: ^[[C:[0-9]+]] = gv: (guid: 15101436305866936160, {{.*}} callsites: ((callee: ^[[D:[0-9]+]], clones: (1)