1 #include "llvm/ADT/DenseMap.h"
2 #include "llvm/ADT/StringExtras.h"
3 #include "llvm/ADT/StringSet.h"
4 #include "llvm/DebugInfo/DIContext.h"
5 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
6 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
7 #include "llvm/Object/ObjectFile.h"
9 #define DEBUG_TYPE "dwarfdump"
11 using namespace object
;
13 /// Holds statistics for one function (or other entity that has a PC range and
14 /// contains variables, such as a compile unit).
15 struct PerFunctionStats
{
16 /// Number of inlined instances of this function.
17 unsigned NumFnInlined
= 0;
18 /// Number of inlined instances that have abstract origins.
19 unsigned NumAbstractOrigins
= 0;
20 /// Number of variables and parameters with location across all inlined
22 unsigned TotalVarWithLoc
= 0;
23 /// Number of constants with location across all inlined instances.
24 unsigned ConstantMembers
= 0;
25 /// List of all Variables and parameters in this function.
26 StringSet
<> VarsInFunction
;
27 /// Compile units also cover a PC range, but have this flag set to false.
28 bool IsFunction
= false;
29 /// Verify function definition has PC addresses (for detecting when
30 /// a function has been inlined everywhere).
31 bool HasPCAddresses
= false;
32 /// Function has source location information.
33 bool HasSourceLocation
= false;
34 /// Number of function parameters.
35 unsigned NumParams
= 0;
36 /// Number of function parameters with source location.
37 unsigned NumParamSourceLocations
= 0;
38 /// Number of function parameters with type.
39 unsigned NumParamTypes
= 0;
40 /// Number of function parameters with a DW_AT_location.
41 unsigned NumParamLocations
= 0;
42 /// Number of variables.
44 /// Number of variables with source location.
45 unsigned NumVarSourceLocations
= 0;
46 /// Number of variables wtih type.
47 unsigned NumVarTypes
= 0;
48 /// Number of variables wtih DW_AT_location.
49 unsigned NumVarLocations
= 0;
52 /// Holds accumulated global statistics about DIEs.
54 /// Total number of PC range bytes covered by DW_AT_locations.
55 unsigned ScopeBytesCovered
= 0;
56 /// Total number of PC range bytes in each variable's enclosing scope,
57 /// starting from the first definition of the variable.
58 unsigned ScopeBytesFromFirstDefinition
= 0;
59 /// Total number of call site entries (DW_AT_call_file & DW_AT_call_line).
60 unsigned CallSiteEntries
= 0;
61 /// Total number of call site DIEs (DW_TAG_call_site).
62 unsigned CallSiteDIEs
= 0;
63 /// Total number of call site parameter DIEs (DW_TAG_call_site_parameter).
64 unsigned CallSiteParamDIEs
= 0;
65 /// Total byte size of concrete functions. This byte size includes
66 /// inline functions contained in the concrete functions.
67 uint64_t FunctionSize
= 0;
68 /// Total byte size of inlined functions. This is the total number of bytes
69 /// for the top inline functions within concrete functions. This can help
70 /// tune the inline settings when compiling to match user expectations.
71 uint64_t InlineFunctionSize
= 0;
74 /// Extract the low pc from a Die.
75 static uint64_t getLowPC(DWARFDie Die
) {
76 auto RangesOrError
= Die
.getAddressRanges();
77 DWARFAddressRangesVector Ranges
;
79 Ranges
= RangesOrError
.get();
81 llvm::consumeError(RangesOrError
.takeError());
83 return Ranges
[0].LowPC
;
84 return dwarf::toAddress(Die
.find(dwarf::DW_AT_low_pc
), 0);
87 /// Collect debug info quality metrics for one DIE.
88 static void collectStatsForDie(DWARFDie Die
, std::string FnPrefix
,
89 std::string VarPrefix
, uint64_t ScopeLowPC
,
90 uint64_t BytesInScope
, uint32_t InlineDepth
,
91 StringMap
<PerFunctionStats
> &FnStatMap
,
92 GlobalStats
&GlobalStats
) {
94 bool HasSrcLoc
= false;
96 bool IsArtificial
= false;
97 uint64_t BytesCovered
= 0;
98 uint64_t OffsetToFirstDefinition
= 0;
100 if (Die
.getTag() == dwarf::DW_TAG_call_site
||
101 Die
.getTag() == dwarf::DW_TAG_GNU_call_site
) {
102 GlobalStats
.CallSiteDIEs
++;
106 if (Die
.getTag() == dwarf::DW_TAG_call_site_parameter
||
107 Die
.getTag() == dwarf::DW_TAG_GNU_call_site_parameter
) {
108 GlobalStats
.CallSiteParamDIEs
++;
112 if (Die
.getTag() != dwarf::DW_TAG_formal_parameter
&&
113 Die
.getTag() != dwarf::DW_TAG_variable
&&
114 Die
.getTag() != dwarf::DW_TAG_member
) {
115 // Not a variable or constant member.
119 if (Die
.findRecursively(dwarf::DW_AT_decl_file
) &&
120 Die
.findRecursively(dwarf::DW_AT_decl_line
))
123 if (Die
.findRecursively(dwarf::DW_AT_type
))
126 if (Die
.find(dwarf::DW_AT_artificial
))
129 if (Die
.find(dwarf::DW_AT_const_value
)) {
130 // This catches constant members *and* variables.
132 BytesCovered
= BytesInScope
;
134 if (Die
.getTag() == dwarf::DW_TAG_member
) {
138 // Handle variables and function arguments.
139 auto FormValue
= Die
.find(dwarf::DW_AT_location
);
140 HasLoc
= FormValue
.hasValue();
143 if (auto DebugLocOffset
= FormValue
->getAsSectionOffset()) {
144 auto *DebugLoc
= Die
.getDwarfUnit()->getContext().getDebugLoc();
145 if (auto List
= DebugLoc
->getLocationListAtOffset(*DebugLocOffset
)) {
146 for (auto Entry
: List
->Entries
)
147 BytesCovered
+= Entry
.End
- Entry
.Begin
;
148 if (List
->Entries
.size()) {
149 uint64_t FirstDef
= List
->Entries
[0].Begin
;
150 uint64_t UnitOfs
= getLowPC(Die
.getDwarfUnit()->getUnitDIE());
151 // Ranges sometimes start before the lexical scope.
152 if (UnitOfs
+ FirstDef
>= ScopeLowPC
)
153 OffsetToFirstDefinition
= UnitOfs
+ FirstDef
- ScopeLowPC
;
154 // Or even after it. Count that as a failure.
155 if (OffsetToFirstDefinition
> BytesInScope
)
156 OffsetToFirstDefinition
= 0;
159 assert(BytesInScope
);
161 // Assume the entire range is covered by a single location.
162 BytesCovered
= BytesInScope
;
167 // Collect PC range coverage data.
168 auto &FnStats
= FnStatMap
[FnPrefix
];
170 Die
.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin
))
172 // By using the variable name + the path through the lexical block tree, the
173 // keys are consistent across duplicate abstract origins in different CUs.
174 std::string VarName
= StringRef(Die
.getName(DINameKind::ShortName
));
175 FnStats
.VarsInFunction
.insert(VarPrefix
+ VarName
);
177 FnStats
.TotalVarWithLoc
+= (unsigned)HasLoc
;
178 // Adjust for the fact the variables often start their lifetime in the
179 // middle of the scope.
180 BytesInScope
-= OffsetToFirstDefinition
;
181 // Turns out we have a lot of ranges that extend past the lexical scope.
182 GlobalStats
.ScopeBytesCovered
+= std::min(BytesInScope
, BytesCovered
);
183 GlobalStats
.ScopeBytesFromFirstDefinition
+= BytesInScope
;
184 assert(GlobalStats
.ScopeBytesCovered
<=
185 GlobalStats
.ScopeBytesFromFirstDefinition
);
186 } else if (Die
.getTag() == dwarf::DW_TAG_member
) {
187 FnStats
.ConstantMembers
++;
189 FnStats
.TotalVarWithLoc
+= (unsigned)HasLoc
;
192 if (Die
.getTag() == dwarf::DW_TAG_formal_parameter
) {
195 FnStats
.NumParamTypes
++;
197 FnStats
.NumParamSourceLocations
++;
199 FnStats
.NumParamLocations
++;
200 } else if (Die
.getTag() == dwarf::DW_TAG_variable
) {
203 FnStats
.NumVarTypes
++;
205 FnStats
.NumVarSourceLocations
++;
207 FnStats
.NumVarLocations
++;
212 /// Recursively collect debug info quality metrics.
213 static void collectStatsRecursive(DWARFDie Die
, std::string FnPrefix
,
214 std::string VarPrefix
, uint64_t ScopeLowPC
,
215 uint64_t BytesInScope
, uint32_t InlineDepth
,
216 StringMap
<PerFunctionStats
> &FnStatMap
,
217 GlobalStats
&GlobalStats
) {
218 // Handle any kind of lexical scope.
219 const dwarf::Tag Tag
= Die
.getTag();
220 const bool IsFunction
= Tag
== dwarf::DW_TAG_subprogram
;
221 const bool IsBlock
= Tag
== dwarf::DW_TAG_lexical_block
;
222 const bool IsInlinedFunction
= Tag
== dwarf::DW_TAG_inlined_subroutine
;
223 if (IsFunction
|| IsInlinedFunction
|| IsBlock
) {
225 // Reset VarPrefix when entering a new function.
226 if (Die
.getTag() == dwarf::DW_TAG_subprogram
||
227 Die
.getTag() == dwarf::DW_TAG_inlined_subroutine
)
230 // Ignore forward declarations.
231 if (Die
.find(dwarf::DW_AT_declaration
))
234 // Check for call sites.
235 if (Die
.find(dwarf::DW_AT_call_file
) && Die
.find(dwarf::DW_AT_call_line
))
236 GlobalStats
.CallSiteEntries
++;
239 auto RangesOrError
= Die
.getAddressRanges();
240 if (!RangesOrError
) {
241 llvm::consumeError(RangesOrError
.takeError());
245 auto Ranges
= RangesOrError
.get();
246 uint64_t BytesInThisScope
= 0;
247 for (auto Range
: Ranges
)
248 BytesInThisScope
+= Range
.HighPC
- Range
.LowPC
;
249 ScopeLowPC
= getLowPC(Die
);
251 // Count the function.
253 StringRef Name
= Die
.getName(DINameKind::LinkageName
);
255 Name
= Die
.getName(DINameKind::ShortName
);
257 // Skip over abstract origins.
258 if (Die
.find(dwarf::DW_AT_inline
))
260 // We've seen an (inlined) instance of this function.
261 auto &FnStats
= FnStatMap
[Name
];
262 if (IsInlinedFunction
) {
263 FnStats
.NumFnInlined
++;
264 if (Die
.findRecursively(dwarf::DW_AT_abstract_origin
))
265 FnStats
.NumAbstractOrigins
++;
267 FnStats
.IsFunction
= true;
268 if (BytesInThisScope
&& !IsInlinedFunction
)
269 FnStats
.HasPCAddresses
= true;
270 std::string FnName
= StringRef(Die
.getName(DINameKind::ShortName
));
271 if (Die
.findRecursively(dwarf::DW_AT_decl_file
) &&
272 Die
.findRecursively(dwarf::DW_AT_decl_line
))
273 FnStats
.HasSourceLocation
= true;
276 if (BytesInThisScope
) {
277 BytesInScope
= BytesInThisScope
;
279 GlobalStats
.FunctionSize
+= BytesInThisScope
;
280 else if (IsInlinedFunction
&& InlineDepth
== 0)
281 GlobalStats
.InlineFunctionSize
+= BytesInThisScope
;
284 // Not a scope, visit the Die itself. It could be a variable.
285 collectStatsForDie(Die
, FnPrefix
, VarPrefix
, ScopeLowPC
, BytesInScope
,
286 InlineDepth
, FnStatMap
, GlobalStats
);
289 // Set InlineDepth correctly for child recursion
292 else if (IsInlinedFunction
)
295 // Traverse children.
296 unsigned LexicalBlockIndex
= 0;
297 DWARFDie Child
= Die
.getFirstChild();
299 std::string ChildVarPrefix
= VarPrefix
;
300 if (Child
.getTag() == dwarf::DW_TAG_lexical_block
)
301 ChildVarPrefix
+= toHex(LexicalBlockIndex
++) + '.';
303 collectStatsRecursive(Child
, FnPrefix
, ChildVarPrefix
, ScopeLowPC
,
304 BytesInScope
, InlineDepth
, FnStatMap
, GlobalStats
);
305 Child
= Child
.getSibling();
309 /// Print machine-readable output.
310 /// The machine-readable format is single-line JSON output.
312 static void printDatum(raw_ostream
&OS
, const char *Key
, StringRef Value
) {
313 OS
<< ",\"" << Key
<< "\":\"" << Value
<< '"';
314 LLVM_DEBUG(llvm::dbgs() << Key
<< ": " << Value
<< '\n');
316 static void printDatum(raw_ostream
&OS
, const char *Key
, uint64_t Value
) {
317 OS
<< ",\"" << Key
<< "\":" << Value
;
318 LLVM_DEBUG(llvm::dbgs() << Key
<< ": " << Value
<< '\n');
322 /// Collect debug info quality metrics for an entire DIContext.
324 /// Do the impossible and reduce the quality of the debug info down to a few
325 /// numbers. The idea is to condense the data into numbers that can be tracked
326 /// over time to identify trends in newer compiler versions and gauge the effect
327 /// of particular optimizations. The raw numbers themselves are not particularly
328 /// useful, only the delta between compiling the same program with different
330 bool collectStatsForObjectFile(ObjectFile
&Obj
, DWARFContext
&DICtx
,
331 Twine Filename
, raw_ostream
&OS
) {
332 StringRef FormatName
= Obj
.getFileFormatName();
333 GlobalStats GlobalStats
;
334 StringMap
<PerFunctionStats
> Statistics
;
335 for (const auto &CU
: static_cast<DWARFContext
*>(&DICtx
)->compile_units())
336 if (DWARFDie CUDie
= CU
->getNonSkeletonUnitDIE(false))
337 collectStatsRecursive(CUDie
, "/", "g", 0, 0, 0, Statistics
, GlobalStats
);
339 /// The version number should be increased every time the algorithm is changed
340 /// (including bug fixes). New metrics may be added without increasing the
342 unsigned Version
= 3;
343 unsigned VarParamTotal
= 0;
344 unsigned VarParamUnique
= 0;
345 unsigned VarParamWithLoc
= 0;
346 unsigned NumFunctions
= 0;
347 unsigned NumInlinedFunctions
= 0;
348 unsigned NumFuncsWithSrcLoc
= 0;
349 unsigned NumAbstractOrigins
= 0;
350 unsigned ParamTotal
= 0;
351 unsigned ParamWithType
= 0;
352 unsigned ParamWithLoc
= 0;
353 unsigned ParamWithSrcLoc
= 0;
354 unsigned VarTotal
= 0;
355 unsigned VarWithType
= 0;
356 unsigned VarWithSrcLoc
= 0;
357 unsigned VarWithLoc
= 0;
358 for (auto &Entry
: Statistics
) {
359 PerFunctionStats
&Stats
= Entry
.getValue();
360 unsigned TotalVars
= Stats
.VarsInFunction
.size() * Stats
.NumFnInlined
;
361 // Count variables in concrete out-of-line functions and in global scope.
362 if (Stats
.HasPCAddresses
|| !Stats
.IsFunction
)
363 TotalVars
+= Stats
.VarsInFunction
.size();
364 unsigned Constants
= Stats
.ConstantMembers
;
365 VarParamWithLoc
+= Stats
.TotalVarWithLoc
+ Constants
;
366 VarParamTotal
+= TotalVars
;
367 VarParamUnique
+= Stats
.VarsInFunction
.size();
368 LLVM_DEBUG(for (auto &V
369 : Stats
.VarsInFunction
) llvm::dbgs()
370 << Entry
.getKey() << ": " << V
.getKey() << "\n");
371 NumFunctions
+= Stats
.IsFunction
;
372 NumFuncsWithSrcLoc
+= Stats
.HasSourceLocation
;
373 NumInlinedFunctions
+= Stats
.IsFunction
* Stats
.NumFnInlined
;
374 NumAbstractOrigins
+= Stats
.IsFunction
* Stats
.NumAbstractOrigins
;
375 ParamTotal
+= Stats
.NumParams
;
376 ParamWithType
+= Stats
.NumParamTypes
;
377 ParamWithLoc
+= Stats
.NumParamLocations
;
378 ParamWithSrcLoc
+= Stats
.NumParamSourceLocations
;
379 VarTotal
+= Stats
.NumVars
;
380 VarWithType
+= Stats
.NumVarTypes
;
381 VarWithLoc
+= Stats
.NumVarLocations
;
382 VarWithSrcLoc
+= Stats
.NumVarSourceLocations
;
386 OS
.SetBufferSize(1024);
387 OS
<< "{\"version\":" << Version
;
388 LLVM_DEBUG(llvm::dbgs() << "Variable location quality metrics\n";
389 llvm::dbgs() << "---------------------------------\n");
390 printDatum(OS
, "file", Filename
.str());
391 printDatum(OS
, "format", FormatName
);
392 printDatum(OS
, "source functions", NumFunctions
);
393 printDatum(OS
, "source functions with location", NumFuncsWithSrcLoc
);
394 printDatum(OS
, "inlined functions", NumInlinedFunctions
);
395 printDatum(OS
, "inlined funcs with abstract origins", NumAbstractOrigins
);
396 printDatum(OS
, "unique source variables", VarParamUnique
);
397 printDatum(OS
, "source variables", VarParamTotal
);
398 printDatum(OS
, "variables with location", VarParamWithLoc
);
399 printDatum(OS
, "call site entries", GlobalStats
.CallSiteEntries
);
400 printDatum(OS
, "call site DIEs", GlobalStats
.CallSiteDIEs
);
401 printDatum(OS
, "call site parameter DIEs", GlobalStats
.CallSiteParamDIEs
);
402 printDatum(OS
, "scope bytes total",
403 GlobalStats
.ScopeBytesFromFirstDefinition
);
404 printDatum(OS
, "scope bytes covered", GlobalStats
.ScopeBytesCovered
);
405 printDatum(OS
, "total function size", GlobalStats
.FunctionSize
);
406 printDatum(OS
, "total inlined function size", GlobalStats
.InlineFunctionSize
);
407 printDatum(OS
, "total formal params", ParamTotal
);
408 printDatum(OS
, "formal params with source location", ParamWithSrcLoc
);
409 printDatum(OS
, "formal params with type", ParamWithType
);
410 printDatum(OS
, "formal params with binary location", ParamWithLoc
);
411 printDatum(OS
, "total vars", VarTotal
);
412 printDatum(OS
, "vars with source location", VarWithSrcLoc
);
413 printDatum(OS
, "vars with type", VarWithType
);
414 printDatum(OS
, "vars with binary location", VarWithLoc
);
417 llvm::dbgs() << "Total Availability: "
418 << (int)std::round((VarParamWithLoc
* 100.0) / VarParamTotal
)
420 llvm::dbgs() << "PC Ranges covered: "
421 << (int)std::round((GlobalStats
.ScopeBytesCovered
* 100.0) /
422 GlobalStats
.ScopeBytesFromFirstDefinition
)