1 #include "llvm/ADT/DenseMap.h"
2 #include "llvm/ADT/StringExtras.h"
3 #include "llvm/ADT/StringSet.h"
4 #include "llvm/DebugInfo/DIContext.h"
5 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
6 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
7 #include "llvm/Object/ObjectFile.h"
9 #define DEBUG_TYPE "dwarfdump"
11 using namespace object
;
13 /// Holds statistics for one function (or other entity that has a PC range and
14 /// contains variables, such as a compile unit).
15 struct PerFunctionStats
{
16 /// Number of inlined instances of this function.
17 unsigned NumFnInlined
= 0;
18 /// Number of variables with location across all inlined instances.
19 unsigned TotalVarWithLoc
= 0;
20 /// Number of constants with location across all inlined instances.
21 unsigned ConstantMembers
= 0;
22 /// List of all Variables in this function.
23 StringSet
<> VarsInFunction
;
24 /// Compile units also cover a PC range, but have this flag set to false.
25 bool IsFunction
= false;
28 /// Holds accumulated global statistics about DIEs.
30 /// Total number of PC range bytes covered by DW_AT_locations.
31 unsigned ScopeBytesCovered
= 0;
32 /// Total number of PC range bytes in each variable's enclosing scope,
33 /// starting from the first definition of the variable.
34 unsigned ScopeBytesFromFirstDefinition
= 0;
35 /// Total number of call site entries (DW_TAG_call_site).
36 unsigned CallSiteEntries
= 0;
37 /// Total byte size of concrete functions. This byte size includes
38 /// inline functions contained in the concrete functions.
39 uint64_t FunctionSize
= 0;
40 /// Total byte size of inlined functions. This is the total number of bytes
41 /// for the top inline functions within concrete functions. This can help
42 /// tune the inline settings when compiling to match user expectations.
43 uint64_t InlineFunctionSize
= 0;
46 /// Extract the low pc from a Die.
47 static uint64_t getLowPC(DWARFDie Die
) {
48 auto RangesOrError
= Die
.getAddressRanges();
49 DWARFAddressRangesVector Ranges
;
51 Ranges
= RangesOrError
.get();
53 llvm::consumeError(RangesOrError
.takeError());
55 return Ranges
[0].LowPC
;
56 return dwarf::toAddress(Die
.find(dwarf::DW_AT_low_pc
), 0);
59 /// Collect debug info quality metrics for one DIE.
60 static void collectStatsForDie(DWARFDie Die
, std::string FnPrefix
,
61 std::string VarPrefix
, uint64_t ScopeLowPC
,
62 uint64_t BytesInScope
,
64 StringMap
<PerFunctionStats
> &FnStatMap
,
65 GlobalStats
&GlobalStats
) {
67 uint64_t BytesCovered
= 0;
68 uint64_t OffsetToFirstDefinition
= 0;
70 if (Die
.getTag() == dwarf::DW_TAG_call_site
) {
71 GlobalStats
.CallSiteEntries
++;
75 if (Die
.getTag() != dwarf::DW_TAG_formal_parameter
&&
76 Die
.getTag() != dwarf::DW_TAG_variable
&&
77 Die
.getTag() != dwarf::DW_TAG_member
) {
78 // Not a variable or constant member.
82 if (Die
.find(dwarf::DW_AT_const_value
)) {
83 // This catches constant members *and* variables.
85 BytesCovered
= BytesInScope
;
87 if (Die
.getTag() == dwarf::DW_TAG_member
) {
91 // Handle variables and function arguments.
92 auto FormValue
= Die
.find(dwarf::DW_AT_location
);
93 HasLoc
= FormValue
.hasValue();
96 if (auto DebugLocOffset
= FormValue
->getAsSectionOffset()) {
97 auto *DebugLoc
= Die
.getDwarfUnit()->getContext().getDebugLoc();
98 if (auto List
= DebugLoc
->getLocationListAtOffset(*DebugLocOffset
)) {
99 for (auto Entry
: List
->Entries
)
100 BytesCovered
+= Entry
.End
- Entry
.Begin
;
101 if (List
->Entries
.size()) {
102 uint64_t FirstDef
= List
->Entries
[0].Begin
;
103 uint64_t UnitOfs
= getLowPC(Die
.getDwarfUnit()->getUnitDIE());
104 // Ranges sometimes start before the lexical scope.
105 if (UnitOfs
+ FirstDef
>= ScopeLowPC
)
106 OffsetToFirstDefinition
= UnitOfs
+ FirstDef
- ScopeLowPC
;
107 // Or even after it. Count that as a failure.
108 if (OffsetToFirstDefinition
> BytesInScope
)
109 OffsetToFirstDefinition
= 0;
112 assert(BytesInScope
);
114 // Assume the entire range is covered by a single location.
115 BytesCovered
= BytesInScope
;
120 // Collect PC range coverage data.
121 auto &FnStats
= FnStatMap
[FnPrefix
];
123 Die
.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin
))
125 // By using the variable name + the path through the lexical block tree, the
126 // keys are consistent across duplicate abstract origins in different CUs.
127 std::string VarName
= StringRef(Die
.getName(DINameKind::ShortName
));
128 FnStats
.VarsInFunction
.insert(VarPrefix
+VarName
);
130 FnStats
.TotalVarWithLoc
+= (unsigned)HasLoc
;
131 // Adjust for the fact the variables often start their lifetime in the
132 // middle of the scope.
133 BytesInScope
-= OffsetToFirstDefinition
;
134 // Turns out we have a lot of ranges that extend past the lexical scope.
135 GlobalStats
.ScopeBytesCovered
+= std::min(BytesInScope
, BytesCovered
);
136 GlobalStats
.ScopeBytesFromFirstDefinition
+= BytesInScope
;
137 assert(GlobalStats
.ScopeBytesCovered
<=
138 GlobalStats
.ScopeBytesFromFirstDefinition
);
140 FnStats
.ConstantMembers
++;
144 /// Recursively collect debug info quality metrics.
145 static void collectStatsRecursive(DWARFDie Die
, std::string FnPrefix
,
146 std::string VarPrefix
, uint64_t ScopeLowPC
,
147 uint64_t BytesInScope
,
148 uint32_t InlineDepth
,
149 StringMap
<PerFunctionStats
> &FnStatMap
,
150 GlobalStats
&GlobalStats
) {
151 // Handle any kind of lexical scope.
152 const dwarf::Tag Tag
= Die
.getTag();
153 const bool IsFunction
= Tag
== dwarf::DW_TAG_subprogram
;
154 const bool IsBlock
= Tag
== dwarf::DW_TAG_lexical_block
;
155 const bool IsInlinedFunction
= Tag
== dwarf::DW_TAG_inlined_subroutine
;
156 if (IsFunction
|| IsInlinedFunction
|| IsBlock
) {
158 // Reset VarPrefix when entering a new function.
159 if (Die
.getTag() == dwarf::DW_TAG_subprogram
||
160 Die
.getTag() == dwarf::DW_TAG_inlined_subroutine
)
163 // Ignore forward declarations.
164 if (Die
.find(dwarf::DW_AT_declaration
))
167 // Count the function.
169 StringRef Name
= Die
.getName(DINameKind::LinkageName
);
171 Name
= Die
.getName(DINameKind::ShortName
);
173 // Skip over abstract origins.
174 if (Die
.find(dwarf::DW_AT_inline
))
176 // We've seen an (inlined) instance of this function.
177 auto &FnStats
= FnStatMap
[Name
];
178 FnStats
.NumFnInlined
++;
179 FnStats
.IsFunction
= true;
183 auto RangesOrError
= Die
.getAddressRanges();
184 if (!RangesOrError
) {
185 llvm::consumeError(RangesOrError
.takeError());
189 auto Ranges
= RangesOrError
.get();
190 uint64_t BytesInThisScope
= 0;
191 for (auto Range
: Ranges
)
192 BytesInThisScope
+= Range
.HighPC
- Range
.LowPC
;
193 ScopeLowPC
= getLowPC(Die
);
195 if (BytesInThisScope
) {
196 BytesInScope
= BytesInThisScope
;
198 GlobalStats
.FunctionSize
+= BytesInThisScope
;
199 else if (IsInlinedFunction
&& InlineDepth
== 0)
200 GlobalStats
.InlineFunctionSize
+= BytesInThisScope
;
203 // Not a scope, visit the Die itself. It could be a variable.
204 collectStatsForDie(Die
, FnPrefix
, VarPrefix
, ScopeLowPC
, BytesInScope
,
205 InlineDepth
, FnStatMap
, GlobalStats
);
208 // Set InlineDepth correctly for child recursion
211 else if (IsInlinedFunction
)
214 // Traverse children.
215 unsigned LexicalBlockIndex
= 0;
216 DWARFDie Child
= Die
.getFirstChild();
218 std::string ChildVarPrefix
= VarPrefix
;
219 if (Child
.getTag() == dwarf::DW_TAG_lexical_block
)
220 ChildVarPrefix
+= toHex(LexicalBlockIndex
++) + '.';
222 collectStatsRecursive(Child
, FnPrefix
, ChildVarPrefix
, ScopeLowPC
,
223 BytesInScope
, InlineDepth
, FnStatMap
, GlobalStats
);
224 Child
= Child
.getSibling();
228 /// Print machine-readable output.
229 /// The machine-readable format is single-line JSON output.
231 static void printDatum(raw_ostream
&OS
, const char *Key
, StringRef Value
) {
232 OS
<< ",\"" << Key
<< "\":\"" << Value
<< '"';
233 LLVM_DEBUG(llvm::dbgs() << Key
<< ": " << Value
<< '\n');
235 static void printDatum(raw_ostream
&OS
, const char *Key
, uint64_t Value
) {
236 OS
<< ",\"" << Key
<< "\":" << Value
;
237 LLVM_DEBUG(llvm::dbgs() << Key
<< ": " << Value
<< '\n');
241 /// Collect debug info quality metrics for an entire DIContext.
243 /// Do the impossible and reduce the quality of the debug info down to a few
244 /// numbers. The idea is to condense the data into numbers that can be tracked
245 /// over time to identify trends in newer compiler versions and gauge the effect
246 /// of particular optimizations. The raw numbers themselves are not particularly
247 /// useful, only the delta between compiling the same program with different
249 bool collectStatsForObjectFile(ObjectFile
&Obj
, DWARFContext
&DICtx
,
250 Twine Filename
, raw_ostream
&OS
) {
251 StringRef FormatName
= Obj
.getFileFormatName();
252 GlobalStats GlobalStats
;
253 StringMap
<PerFunctionStats
> Statistics
;
254 for (const auto &CU
: static_cast<DWARFContext
*>(&DICtx
)->compile_units())
255 if (DWARFDie CUDie
= CU
->getUnitDIE(false))
256 collectStatsRecursive(CUDie
, "/", "g", 0, 0, 0, Statistics
, GlobalStats
);
258 /// The version number should be increased every time the algorithm is changed
259 /// (including bug fixes). New metrics may be added without increasing the
261 unsigned Version
= 1;
262 unsigned VarTotal
= 0;
263 unsigned VarUnique
= 0;
264 unsigned VarWithLoc
= 0;
265 unsigned NumFunctions
= 0;
266 unsigned NumInlinedFunctions
= 0;
267 for (auto &Entry
: Statistics
) {
268 PerFunctionStats
&Stats
= Entry
.getValue();
269 unsigned TotalVars
= Stats
.VarsInFunction
.size() * Stats
.NumFnInlined
;
270 unsigned Constants
= Stats
.ConstantMembers
;
271 VarWithLoc
+= Stats
.TotalVarWithLoc
+ Constants
;
272 VarTotal
+= TotalVars
+ Constants
;
273 VarUnique
+= Stats
.VarsInFunction
.size();
274 LLVM_DEBUG(for (auto &V
: Stats
.VarsInFunction
) llvm::dbgs()
275 << Entry
.getKey() << ": " << V
.getKey() << "\n");
276 NumFunctions
+= Stats
.IsFunction
;
277 NumInlinedFunctions
+= Stats
.IsFunction
* Stats
.NumFnInlined
;
281 OS
.SetBufferSize(1024);
282 OS
<< "{\"version\":" << Version
;
283 LLVM_DEBUG(llvm::dbgs() << "Variable location quality metrics\n";
284 llvm::dbgs() << "---------------------------------\n");
285 printDatum(OS
, "file", Filename
.str());
286 printDatum(OS
, "format", FormatName
);
287 printDatum(OS
, "source functions", NumFunctions
);
288 printDatum(OS
, "inlined functions", NumInlinedFunctions
);
289 printDatum(OS
, "unique source variables", VarUnique
);
290 printDatum(OS
, "source variables", VarTotal
);
291 printDatum(OS
, "variables with location", VarWithLoc
);
292 printDatum(OS
, "call site entries", GlobalStats
.CallSiteEntries
);
293 printDatum(OS
, "scope bytes total",
294 GlobalStats
.ScopeBytesFromFirstDefinition
);
295 printDatum(OS
, "scope bytes covered", GlobalStats
.ScopeBytesCovered
);
296 printDatum(OS
, "total function size", GlobalStats
.FunctionSize
);
297 printDatum(OS
, "total inlined function size", GlobalStats
.InlineFunctionSize
);
300 llvm::dbgs() << "Total Availability: "
301 << (int)std::round((VarWithLoc
* 100.0) / VarTotal
) << "%\n";
302 llvm::dbgs() << "PC Ranges covered: "
303 << (int)std::round((GlobalStats
.ScopeBytesCovered
* 100.0) /
304 GlobalStats
.ScopeBytesFromFirstDefinition
)