1 #include "llvm/ADT/DenseMap.h"
2 #include "llvm/ADT/StringExtras.h"
3 #include "llvm/ADT/StringSet.h"
4 #include "llvm/DebugInfo/DIContext.h"
5 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
6 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
7 #include "llvm/Object/ObjectFile.h"
9 #define DEBUG_TYPE "dwarfdump"
11 using namespace object
;
13 /// Holds statistics for one function (or other entity that has a PC range and
14 /// contains variables, such as a compile unit).
15 struct PerFunctionStats
{
16 /// Number of inlined instances of this function.
17 unsigned NumFnInlined
= 0;
18 /// Number of variables with location across all inlined instances.
19 unsigned TotalVarWithLoc
= 0;
20 /// Number of constants with location across all inlined instances.
21 unsigned ConstantMembers
= 0;
22 /// List of all Variables in this function.
23 StringSet
<> VarsInFunction
;
24 /// Compile units also cover a PC range, but have this flag set to false.
25 bool IsFunction
= false;
26 /// Verify function definition has PC addresses (for detecting when
27 /// a function has been inlined everywhere).
28 bool HasPCAddresses
= false;
31 /// Holds accumulated global statistics about DIEs.
33 /// Total number of PC range bytes covered by DW_AT_locations.
34 unsigned ScopeBytesCovered
= 0;
35 /// Total number of PC range bytes in each variable's enclosing scope,
36 /// starting from the first definition of the variable.
37 unsigned ScopeBytesFromFirstDefinition
= 0;
38 /// Total number of call site entries (DW_TAG_call_site).
39 unsigned CallSiteEntries
= 0;
40 /// Total byte size of concrete functions. This byte size includes
41 /// inline functions contained in the concrete functions.
42 uint64_t FunctionSize
= 0;
43 /// Total byte size of inlined functions. This is the total number of bytes
44 /// for the top inline functions within concrete functions. This can help
45 /// tune the inline settings when compiling to match user expectations.
46 uint64_t InlineFunctionSize
= 0;
49 /// Extract the low pc from a Die.
50 static uint64_t getLowPC(DWARFDie Die
) {
51 auto RangesOrError
= Die
.getAddressRanges();
52 DWARFAddressRangesVector Ranges
;
54 Ranges
= RangesOrError
.get();
56 llvm::consumeError(RangesOrError
.takeError());
58 return Ranges
[0].LowPC
;
59 return dwarf::toAddress(Die
.find(dwarf::DW_AT_low_pc
), 0);
62 /// Collect debug info quality metrics for one DIE.
63 static void collectStatsForDie(DWARFDie Die
, std::string FnPrefix
,
64 std::string VarPrefix
, uint64_t ScopeLowPC
,
65 uint64_t BytesInScope
,
67 StringMap
<PerFunctionStats
> &FnStatMap
,
68 GlobalStats
&GlobalStats
) {
70 uint64_t BytesCovered
= 0;
71 uint64_t OffsetToFirstDefinition
= 0;
73 if (Die
.getTag() == dwarf::DW_TAG_call_site
) {
74 GlobalStats
.CallSiteEntries
++;
78 if (Die
.getTag() != dwarf::DW_TAG_formal_parameter
&&
79 Die
.getTag() != dwarf::DW_TAG_variable
&&
80 Die
.getTag() != dwarf::DW_TAG_member
) {
81 // Not a variable or constant member.
85 if (Die
.find(dwarf::DW_AT_const_value
)) {
86 // This catches constant members *and* variables.
88 BytesCovered
= BytesInScope
;
90 if (Die
.getTag() == dwarf::DW_TAG_member
) {
94 // Handle variables and function arguments.
95 auto FormValue
= Die
.find(dwarf::DW_AT_location
);
96 HasLoc
= FormValue
.hasValue();
99 if (auto DebugLocOffset
= FormValue
->getAsSectionOffset()) {
100 auto *DebugLoc
= Die
.getDwarfUnit()->getContext().getDebugLoc();
101 if (auto List
= DebugLoc
->getLocationListAtOffset(*DebugLocOffset
)) {
102 for (auto Entry
: List
->Entries
)
103 BytesCovered
+= Entry
.End
- Entry
.Begin
;
104 if (List
->Entries
.size()) {
105 uint64_t FirstDef
= List
->Entries
[0].Begin
;
106 uint64_t UnitOfs
= getLowPC(Die
.getDwarfUnit()->getUnitDIE());
107 // Ranges sometimes start before the lexical scope.
108 if (UnitOfs
+ FirstDef
>= ScopeLowPC
)
109 OffsetToFirstDefinition
= UnitOfs
+ FirstDef
- ScopeLowPC
;
110 // Or even after it. Count that as a failure.
111 if (OffsetToFirstDefinition
> BytesInScope
)
112 OffsetToFirstDefinition
= 0;
115 assert(BytesInScope
);
117 // Assume the entire range is covered by a single location.
118 BytesCovered
= BytesInScope
;
123 // Collect PC range coverage data.
124 auto &FnStats
= FnStatMap
[FnPrefix
];
126 Die
.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin
))
128 // By using the variable name + the path through the lexical block tree, the
129 // keys are consistent across duplicate abstract origins in different CUs.
130 std::string VarName
= StringRef(Die
.getName(DINameKind::ShortName
));
131 FnStats
.VarsInFunction
.insert(VarPrefix
+VarName
);
133 FnStats
.TotalVarWithLoc
+= (unsigned)HasLoc
;
134 // Adjust for the fact the variables often start their lifetime in the
135 // middle of the scope.
136 BytesInScope
-= OffsetToFirstDefinition
;
137 // Turns out we have a lot of ranges that extend past the lexical scope.
138 GlobalStats
.ScopeBytesCovered
+= std::min(BytesInScope
, BytesCovered
);
139 GlobalStats
.ScopeBytesFromFirstDefinition
+= BytesInScope
;
140 assert(GlobalStats
.ScopeBytesCovered
<=
141 GlobalStats
.ScopeBytesFromFirstDefinition
);
142 } else if (Die
.getTag() == dwarf::DW_TAG_member
) {
143 FnStats
.ConstantMembers
++;
145 FnStats
.TotalVarWithLoc
+= (unsigned)HasLoc
;
149 /// Recursively collect debug info quality metrics.
150 static void collectStatsRecursive(DWARFDie Die
, std::string FnPrefix
,
151 std::string VarPrefix
, uint64_t ScopeLowPC
,
152 uint64_t BytesInScope
,
153 uint32_t InlineDepth
,
154 StringMap
<PerFunctionStats
> &FnStatMap
,
155 GlobalStats
&GlobalStats
) {
156 // Handle any kind of lexical scope.
157 const dwarf::Tag Tag
= Die
.getTag();
158 const bool IsFunction
= Tag
== dwarf::DW_TAG_subprogram
;
159 const bool IsBlock
= Tag
== dwarf::DW_TAG_lexical_block
;
160 const bool IsInlinedFunction
= Tag
== dwarf::DW_TAG_inlined_subroutine
;
161 if (IsFunction
|| IsInlinedFunction
|| IsBlock
) {
163 // Reset VarPrefix when entering a new function.
164 if (Die
.getTag() == dwarf::DW_TAG_subprogram
||
165 Die
.getTag() == dwarf::DW_TAG_inlined_subroutine
)
168 // Ignore forward declarations.
169 if (Die
.find(dwarf::DW_AT_declaration
))
173 auto RangesOrError
= Die
.getAddressRanges();
174 if (!RangesOrError
) {
175 llvm::consumeError(RangesOrError
.takeError());
179 auto Ranges
= RangesOrError
.get();
180 uint64_t BytesInThisScope
= 0;
181 for (auto Range
: Ranges
)
182 BytesInThisScope
+= Range
.HighPC
- Range
.LowPC
;
183 ScopeLowPC
= getLowPC(Die
);
185 // Count the function.
187 StringRef Name
= Die
.getName(DINameKind::LinkageName
);
189 Name
= Die
.getName(DINameKind::ShortName
);
191 // Skip over abstract origins.
192 if (Die
.find(dwarf::DW_AT_inline
))
194 // We've seen an (inlined) instance of this function.
195 auto &FnStats
= FnStatMap
[Name
];
196 if (IsInlinedFunction
)
197 FnStats
.NumFnInlined
++;
198 FnStats
.IsFunction
= true;
199 if (BytesInThisScope
&& !IsInlinedFunction
)
200 FnStats
.HasPCAddresses
= true;
203 if (BytesInThisScope
) {
204 BytesInScope
= BytesInThisScope
;
206 GlobalStats
.FunctionSize
+= BytesInThisScope
;
207 else if (IsInlinedFunction
&& InlineDepth
== 0)
208 GlobalStats
.InlineFunctionSize
+= BytesInThisScope
;
211 // Not a scope, visit the Die itself. It could be a variable.
212 collectStatsForDie(Die
, FnPrefix
, VarPrefix
, ScopeLowPC
, BytesInScope
,
213 InlineDepth
, FnStatMap
, GlobalStats
);
216 // Set InlineDepth correctly for child recursion
219 else if (IsInlinedFunction
)
222 // Traverse children.
223 unsigned LexicalBlockIndex
= 0;
224 DWARFDie Child
= Die
.getFirstChild();
226 std::string ChildVarPrefix
= VarPrefix
;
227 if (Child
.getTag() == dwarf::DW_TAG_lexical_block
)
228 ChildVarPrefix
+= toHex(LexicalBlockIndex
++) + '.';
230 collectStatsRecursive(Child
, FnPrefix
, ChildVarPrefix
, ScopeLowPC
,
231 BytesInScope
, InlineDepth
, FnStatMap
, GlobalStats
);
232 Child
= Child
.getSibling();
236 /// Print machine-readable output.
237 /// The machine-readable format is single-line JSON output.
239 static void printDatum(raw_ostream
&OS
, const char *Key
, StringRef Value
) {
240 OS
<< ",\"" << Key
<< "\":\"" << Value
<< '"';
241 LLVM_DEBUG(llvm::dbgs() << Key
<< ": " << Value
<< '\n');
243 static void printDatum(raw_ostream
&OS
, const char *Key
, uint64_t Value
) {
244 OS
<< ",\"" << Key
<< "\":" << Value
;
245 LLVM_DEBUG(llvm::dbgs() << Key
<< ": " << Value
<< '\n');
249 /// Collect debug info quality metrics for an entire DIContext.
251 /// Do the impossible and reduce the quality of the debug info down to a few
252 /// numbers. The idea is to condense the data into numbers that can be tracked
253 /// over time to identify trends in newer compiler versions and gauge the effect
254 /// of particular optimizations. The raw numbers themselves are not particularly
255 /// useful, only the delta between compiling the same program with different
257 bool collectStatsForObjectFile(ObjectFile
&Obj
, DWARFContext
&DICtx
,
258 Twine Filename
, raw_ostream
&OS
) {
259 StringRef FormatName
= Obj
.getFileFormatName();
260 GlobalStats GlobalStats
;
261 StringMap
<PerFunctionStats
> Statistics
;
262 for (const auto &CU
: static_cast<DWARFContext
*>(&DICtx
)->compile_units())
263 if (DWARFDie CUDie
= CU
->getUnitDIE(false))
264 collectStatsRecursive(CUDie
, "/", "g", 0, 0, 0, Statistics
, GlobalStats
);
266 /// The version number should be increased every time the algorithm is changed
267 /// (including bug fixes). New metrics may be added without increasing the
269 unsigned Version
= 2;
270 unsigned VarTotal
= 0;
271 unsigned VarUnique
= 0;
272 unsigned VarWithLoc
= 0;
273 unsigned NumFunctions
= 0;
274 unsigned NumInlinedFunctions
= 0;
275 for (auto &Entry
: Statistics
) {
276 PerFunctionStats
&Stats
= Entry
.getValue();
277 unsigned TotalVars
= Stats
.VarsInFunction
.size() * Stats
.NumFnInlined
;
278 // Count variables in concrete out-of-line functions and in global scope.
279 if (Stats
.HasPCAddresses
|| !Stats
.IsFunction
)
280 TotalVars
+= Stats
.VarsInFunction
.size();
281 unsigned Constants
= Stats
.ConstantMembers
;
282 VarWithLoc
+= Stats
.TotalVarWithLoc
+ Constants
;
283 VarTotal
+= TotalVars
;
284 VarUnique
+= Stats
.VarsInFunction
.size();
285 LLVM_DEBUG(for (auto &V
: Stats
.VarsInFunction
) llvm::dbgs()
286 << Entry
.getKey() << ": " << V
.getKey() << "\n");
287 NumFunctions
+= Stats
.IsFunction
;
288 NumInlinedFunctions
+= Stats
.IsFunction
* Stats
.NumFnInlined
;
292 OS
.SetBufferSize(1024);
293 OS
<< "{\"version\":" << Version
;
294 LLVM_DEBUG(llvm::dbgs() << "Variable location quality metrics\n";
295 llvm::dbgs() << "---------------------------------\n");
296 printDatum(OS
, "file", Filename
.str());
297 printDatum(OS
, "format", FormatName
);
298 printDatum(OS
, "source functions", NumFunctions
);
299 printDatum(OS
, "inlined functions", NumInlinedFunctions
);
300 printDatum(OS
, "unique source variables", VarUnique
);
301 printDatum(OS
, "source variables", VarTotal
);
302 printDatum(OS
, "variables with location", VarWithLoc
);
303 printDatum(OS
, "call site entries", GlobalStats
.CallSiteEntries
);
304 printDatum(OS
, "scope bytes total",
305 GlobalStats
.ScopeBytesFromFirstDefinition
);
306 printDatum(OS
, "scope bytes covered", GlobalStats
.ScopeBytesCovered
);
307 printDatum(OS
, "total function size", GlobalStats
.FunctionSize
);
308 printDatum(OS
, "total inlined function size", GlobalStats
.InlineFunctionSize
);
311 llvm::dbgs() << "Total Availability: "
312 << (int)std::round((VarWithLoc
* 100.0) / VarTotal
) << "%\n";
313 llvm::dbgs() << "PC Ranges covered: "
314 << (int)std::round((GlobalStats
.ScopeBytesCovered
* 100.0) /
315 GlobalStats
.ScopeBytesFromFirstDefinition
)