1 //===-- llvm-bcanalyzer.cpp - Bitcode Analyzer --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This tool may be invoked in the following manner:
11 // llvm-bcanalyzer [options] - Read LLVM bitcode from stdin
12 // llvm-bcanalyzer [options] x.bc - Read LLVM bitcode from the x.bc file
15 // --help - Output information about command line switches
16 // --dump - Dump low-level bitcode structure in readable format
18 // This tool provides analytical information about a bitcode file. It is
19 // intended as an aid to developers of bitcode reading and writing software. It
20 // produces on std::out a summary of the bitcode file that shows various
21 // statistics about the contents of the file. By default this information is
22 // detailed and contains information about individual bitcode blocks and the
23 // functions in the module.
24 // The tool is also able to print a bitcode file in a straight forward text
25 // format that shows the containment and relationships of the information in
26 // the bitcode file (-dump option).
28 //===----------------------------------------------------------------------===//
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Bitcode/BitstreamReader.h"
33 #include "llvm/Bitcode/LLVMBitCodes.h"
34 #include "llvm/IR/Verifier.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Format.h"
37 #include "llvm/Support/ManagedStatic.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/PrettyStackTrace.h"
40 #include "llvm/Support/SHA1.h"
41 #include "llvm/Support/Signals.h"
42 #include "llvm/Support/raw_ostream.h"
46 #include <system_error>
49 static cl::opt
<std::string
>
50 InputFilename(cl::Positional
, cl::desc("<input bitcode>"), cl::init("-"));
52 static cl::opt
<bool> Dump("dump", cl::desc("Dump low level bitcode trace"));
54 //===----------------------------------------------------------------------===//
55 // Bitcode specific analysis.
56 //===----------------------------------------------------------------------===//
58 static cl::opt
<bool> NoHistogram("disable-histogram",
59 cl::desc("Do not print per-code histogram"));
62 NonSymbolic("non-symbolic",
63 cl::desc("Emit numeric info in dump even if"
64 " symbolic info is available"));
66 static cl::opt
<std::string
>
67 BlockInfoFilename("block-info",
68 cl::desc("Use the BLOCK_INFO from the given file"));
71 ShowBinaryBlobs("show-binary-blobs",
72 cl::desc("Print binary blobs using hex escapes"));
74 static cl::opt
<std::string
> CheckHash(
76 cl::desc("Check module hash using the argument as a string table"));
80 /// CurStreamTypeType - A type for CurStreamType
81 enum CurStreamTypeType
{
88 /// GetBlockName - Return a symbolic block name if known, otherwise return
90 static const char *GetBlockName(unsigned BlockID
,
91 const BitstreamBlockInfo
&BlockInfo
,
92 CurStreamTypeType CurStreamType
) {
93 // Standard blocks for all bitcode files.
94 if (BlockID
< bitc::FIRST_APPLICATION_BLOCKID
) {
95 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
)
96 return "BLOCKINFO_BLOCK";
100 // Check to see if we have a blockinfo record for this block, with a name.
101 if (const BitstreamBlockInfo::BlockInfo
*Info
=
102 BlockInfo
.getBlockInfo(BlockID
)) {
103 if (!Info
->Name
.empty())
104 return Info
->Name
.c_str();
108 if (CurStreamType
!= LLVMIRBitstream
) return nullptr;
111 default: return nullptr;
112 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID
: return "OPERAND_BUNDLE_TAGS_BLOCK";
113 case bitc::MODULE_BLOCK_ID
: return "MODULE_BLOCK";
114 case bitc::PARAMATTR_BLOCK_ID
: return "PARAMATTR_BLOCK";
115 case bitc::PARAMATTR_GROUP_BLOCK_ID
: return "PARAMATTR_GROUP_BLOCK_ID";
116 case bitc::TYPE_BLOCK_ID_NEW
: return "TYPE_BLOCK_ID";
117 case bitc::CONSTANTS_BLOCK_ID
: return "CONSTANTS_BLOCK";
118 case bitc::FUNCTION_BLOCK_ID
: return "FUNCTION_BLOCK";
119 case bitc::IDENTIFICATION_BLOCK_ID
:
120 return "IDENTIFICATION_BLOCK_ID";
121 case bitc::VALUE_SYMTAB_BLOCK_ID
: return "VALUE_SYMTAB";
122 case bitc::METADATA_BLOCK_ID
: return "METADATA_BLOCK";
123 case bitc::METADATA_KIND_BLOCK_ID
: return "METADATA_KIND_BLOCK";
124 case bitc::METADATA_ATTACHMENT_ID
: return "METADATA_ATTACHMENT_BLOCK";
125 case bitc::USELIST_BLOCK_ID
: return "USELIST_BLOCK_ID";
126 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID
:
127 return "GLOBALVAL_SUMMARY_BLOCK";
128 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID
:
129 return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
130 case bitc::MODULE_STRTAB_BLOCK_ID
: return "MODULE_STRTAB_BLOCK";
131 case bitc::STRTAB_BLOCK_ID
: return "STRTAB_BLOCK";
132 case bitc::SYMTAB_BLOCK_ID
: return "SYMTAB_BLOCK";
136 /// GetCodeName - Return a symbolic code name if known, otherwise return
138 static const char *GetCodeName(unsigned CodeID
, unsigned BlockID
,
139 const BitstreamBlockInfo
&BlockInfo
,
140 CurStreamTypeType CurStreamType
) {
141 // Standard blocks for all bitcode files.
142 if (BlockID
< bitc::FIRST_APPLICATION_BLOCKID
) {
143 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
) {
145 default: return nullptr;
146 case bitc::BLOCKINFO_CODE_SETBID
: return "SETBID";
147 case bitc::BLOCKINFO_CODE_BLOCKNAME
: return "BLOCKNAME";
148 case bitc::BLOCKINFO_CODE_SETRECORDNAME
: return "SETRECORDNAME";
154 // Check to see if we have a blockinfo record for this record, with a name.
155 if (const BitstreamBlockInfo::BlockInfo
*Info
=
156 BlockInfo
.getBlockInfo(BlockID
)) {
157 for (unsigned i
= 0, e
= Info
->RecordNames
.size(); i
!= e
; ++i
)
158 if (Info
->RecordNames
[i
].first
== CodeID
)
159 return Info
->RecordNames
[i
].second
.c_str();
163 if (CurStreamType
!= LLVMIRBitstream
) return nullptr;
165 #define STRINGIFY_CODE(PREFIX, CODE) \
166 case bitc::PREFIX##_##CODE: \
169 default: return nullptr;
170 case bitc::MODULE_BLOCK_ID
:
172 default: return nullptr;
173 STRINGIFY_CODE(MODULE_CODE
, VERSION
)
174 STRINGIFY_CODE(MODULE_CODE
, TRIPLE
)
175 STRINGIFY_CODE(MODULE_CODE
, DATALAYOUT
)
176 STRINGIFY_CODE(MODULE_CODE
, ASM
)
177 STRINGIFY_CODE(MODULE_CODE
, SECTIONNAME
)
178 STRINGIFY_CODE(MODULE_CODE
, DEPLIB
) // FIXME: Remove in 4.0
179 STRINGIFY_CODE(MODULE_CODE
, GLOBALVAR
)
180 STRINGIFY_CODE(MODULE_CODE
, FUNCTION
)
181 STRINGIFY_CODE(MODULE_CODE
, ALIAS
)
182 STRINGIFY_CODE(MODULE_CODE
, GCNAME
)
183 STRINGIFY_CODE(MODULE_CODE
, VSTOFFSET
)
184 STRINGIFY_CODE(MODULE_CODE
, METADATA_VALUES_UNUSED
)
185 STRINGIFY_CODE(MODULE_CODE
, SOURCE_FILENAME
)
186 STRINGIFY_CODE(MODULE_CODE
, HASH
)
188 case bitc::IDENTIFICATION_BLOCK_ID
:
192 STRINGIFY_CODE(IDENTIFICATION_CODE
, STRING
)
193 STRINGIFY_CODE(IDENTIFICATION_CODE
, EPOCH
)
195 case bitc::PARAMATTR_BLOCK_ID
:
197 default: return nullptr;
198 // FIXME: Should these be different?
199 case bitc::PARAMATTR_CODE_ENTRY_OLD
: return "ENTRY";
200 case bitc::PARAMATTR_CODE_ENTRY
: return "ENTRY";
202 case bitc::PARAMATTR_GROUP_BLOCK_ID
:
204 default: return nullptr;
205 case bitc::PARAMATTR_GRP_CODE_ENTRY
: return "ENTRY";
207 case bitc::TYPE_BLOCK_ID_NEW
:
209 default: return nullptr;
210 STRINGIFY_CODE(TYPE_CODE
, NUMENTRY
)
211 STRINGIFY_CODE(TYPE_CODE
, VOID
)
212 STRINGIFY_CODE(TYPE_CODE
, FLOAT
)
213 STRINGIFY_CODE(TYPE_CODE
, DOUBLE
)
214 STRINGIFY_CODE(TYPE_CODE
, LABEL
)
215 STRINGIFY_CODE(TYPE_CODE
, OPAQUE
)
216 STRINGIFY_CODE(TYPE_CODE
, INTEGER
)
217 STRINGIFY_CODE(TYPE_CODE
, POINTER
)
218 STRINGIFY_CODE(TYPE_CODE
, ARRAY
)
219 STRINGIFY_CODE(TYPE_CODE
, VECTOR
)
220 STRINGIFY_CODE(TYPE_CODE
, X86_FP80
)
221 STRINGIFY_CODE(TYPE_CODE
, FP128
)
222 STRINGIFY_CODE(TYPE_CODE
, PPC_FP128
)
223 STRINGIFY_CODE(TYPE_CODE
, METADATA
)
224 STRINGIFY_CODE(TYPE_CODE
, STRUCT_ANON
)
225 STRINGIFY_CODE(TYPE_CODE
, STRUCT_NAME
)
226 STRINGIFY_CODE(TYPE_CODE
, STRUCT_NAMED
)
227 STRINGIFY_CODE(TYPE_CODE
, FUNCTION
)
230 case bitc::CONSTANTS_BLOCK_ID
:
232 default: return nullptr;
233 STRINGIFY_CODE(CST_CODE
, SETTYPE
)
234 STRINGIFY_CODE(CST_CODE
, NULL
)
235 STRINGIFY_CODE(CST_CODE
, UNDEF
)
236 STRINGIFY_CODE(CST_CODE
, INTEGER
)
237 STRINGIFY_CODE(CST_CODE
, WIDE_INTEGER
)
238 STRINGIFY_CODE(CST_CODE
, FLOAT
)
239 STRINGIFY_CODE(CST_CODE
, AGGREGATE
)
240 STRINGIFY_CODE(CST_CODE
, STRING
)
241 STRINGIFY_CODE(CST_CODE
, CSTRING
)
242 STRINGIFY_CODE(CST_CODE
, CE_BINOP
)
243 STRINGIFY_CODE(CST_CODE
, CE_CAST
)
244 STRINGIFY_CODE(CST_CODE
, CE_GEP
)
245 STRINGIFY_CODE(CST_CODE
, CE_INBOUNDS_GEP
)
246 STRINGIFY_CODE(CST_CODE
, CE_SELECT
)
247 STRINGIFY_CODE(CST_CODE
, CE_EXTRACTELT
)
248 STRINGIFY_CODE(CST_CODE
, CE_INSERTELT
)
249 STRINGIFY_CODE(CST_CODE
, CE_SHUFFLEVEC
)
250 STRINGIFY_CODE(CST_CODE
, CE_CMP
)
251 STRINGIFY_CODE(CST_CODE
, INLINEASM
)
252 STRINGIFY_CODE(CST_CODE
, CE_SHUFVEC_EX
)
253 case bitc::CST_CODE_BLOCKADDRESS
: return "CST_CODE_BLOCKADDRESS";
254 STRINGIFY_CODE(CST_CODE
, DATA
)
256 case bitc::FUNCTION_BLOCK_ID
:
258 default: return nullptr;
259 STRINGIFY_CODE(FUNC_CODE
, DECLAREBLOCKS
)
260 STRINGIFY_CODE(FUNC_CODE
, INST_BINOP
)
261 STRINGIFY_CODE(FUNC_CODE
, INST_CAST
)
262 STRINGIFY_CODE(FUNC_CODE
, INST_GEP_OLD
)
263 STRINGIFY_CODE(FUNC_CODE
, INST_INBOUNDS_GEP_OLD
)
264 STRINGIFY_CODE(FUNC_CODE
, INST_SELECT
)
265 STRINGIFY_CODE(FUNC_CODE
, INST_EXTRACTELT
)
266 STRINGIFY_CODE(FUNC_CODE
, INST_INSERTELT
)
267 STRINGIFY_CODE(FUNC_CODE
, INST_SHUFFLEVEC
)
268 STRINGIFY_CODE(FUNC_CODE
, INST_CMP
)
269 STRINGIFY_CODE(FUNC_CODE
, INST_RET
)
270 STRINGIFY_CODE(FUNC_CODE
, INST_BR
)
271 STRINGIFY_CODE(FUNC_CODE
, INST_SWITCH
)
272 STRINGIFY_CODE(FUNC_CODE
, INST_INVOKE
)
273 STRINGIFY_CODE(FUNC_CODE
, INST_UNREACHABLE
)
274 STRINGIFY_CODE(FUNC_CODE
, INST_CLEANUPRET
)
275 STRINGIFY_CODE(FUNC_CODE
, INST_CATCHRET
)
276 STRINGIFY_CODE(FUNC_CODE
, INST_CATCHPAD
)
277 STRINGIFY_CODE(FUNC_CODE
, INST_PHI
)
278 STRINGIFY_CODE(FUNC_CODE
, INST_ALLOCA
)
279 STRINGIFY_CODE(FUNC_CODE
, INST_LOAD
)
280 STRINGIFY_CODE(FUNC_CODE
, INST_VAARG
)
281 STRINGIFY_CODE(FUNC_CODE
, INST_STORE
)
282 STRINGIFY_CODE(FUNC_CODE
, INST_EXTRACTVAL
)
283 STRINGIFY_CODE(FUNC_CODE
, INST_INSERTVAL
)
284 STRINGIFY_CODE(FUNC_CODE
, INST_CMP2
)
285 STRINGIFY_CODE(FUNC_CODE
, INST_VSELECT
)
286 STRINGIFY_CODE(FUNC_CODE
, DEBUG_LOC_AGAIN
)
287 STRINGIFY_CODE(FUNC_CODE
, INST_CALL
)
288 STRINGIFY_CODE(FUNC_CODE
, DEBUG_LOC
)
289 STRINGIFY_CODE(FUNC_CODE
, INST_GEP
)
290 STRINGIFY_CODE(FUNC_CODE
, OPERAND_BUNDLE
)
292 case bitc::VALUE_SYMTAB_BLOCK_ID
:
294 default: return nullptr;
295 STRINGIFY_CODE(VST_CODE
, ENTRY
)
296 STRINGIFY_CODE(VST_CODE
, BBENTRY
)
297 STRINGIFY_CODE(VST_CODE
, FNENTRY
)
298 STRINGIFY_CODE(VST_CODE
, COMBINED_ENTRY
)
300 case bitc::MODULE_STRTAB_BLOCK_ID
:
304 STRINGIFY_CODE(MST_CODE
, ENTRY
)
305 STRINGIFY_CODE(MST_CODE
, HASH
)
307 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID
:
308 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID
:
312 STRINGIFY_CODE(FS
, PERMODULE
)
313 STRINGIFY_CODE(FS
, PERMODULE_PROFILE
)
314 STRINGIFY_CODE(FS
, PERMODULE_GLOBALVAR_INIT_REFS
)
315 STRINGIFY_CODE(FS
, COMBINED
)
316 STRINGIFY_CODE(FS
, COMBINED_PROFILE
)
317 STRINGIFY_CODE(FS
, COMBINED_GLOBALVAR_INIT_REFS
)
318 STRINGIFY_CODE(FS
, ALIAS
)
319 STRINGIFY_CODE(FS
, COMBINED_ALIAS
)
320 STRINGIFY_CODE(FS
, COMBINED_ORIGINAL_NAME
)
321 STRINGIFY_CODE(FS
, VERSION
)
322 STRINGIFY_CODE(FS
, TYPE_TESTS
)
323 STRINGIFY_CODE(FS
, TYPE_TEST_ASSUME_VCALLS
)
324 STRINGIFY_CODE(FS
, TYPE_CHECKED_LOAD_VCALLS
)
325 STRINGIFY_CODE(FS
, TYPE_TEST_ASSUME_CONST_VCALL
)
326 STRINGIFY_CODE(FS
, TYPE_CHECKED_LOAD_CONST_VCALL
)
327 STRINGIFY_CODE(FS
, VALUE_GUID
)
328 STRINGIFY_CODE(FS
, CFI_FUNCTION_DEFS
)
329 STRINGIFY_CODE(FS
, CFI_FUNCTION_DECLS
)
331 case bitc::METADATA_ATTACHMENT_ID
:
333 default:return nullptr;
334 STRINGIFY_CODE(METADATA
, ATTACHMENT
)
336 case bitc::METADATA_BLOCK_ID
:
338 default:return nullptr;
339 STRINGIFY_CODE(METADATA
, STRING_OLD
)
340 STRINGIFY_CODE(METADATA
, VALUE
)
341 STRINGIFY_CODE(METADATA
, NODE
)
342 STRINGIFY_CODE(METADATA
, NAME
)
343 STRINGIFY_CODE(METADATA
, DISTINCT_NODE
)
344 STRINGIFY_CODE(METADATA
, KIND
) // Older bitcode has it in a MODULE_BLOCK
345 STRINGIFY_CODE(METADATA
, LOCATION
)
346 STRINGIFY_CODE(METADATA
, OLD_NODE
)
347 STRINGIFY_CODE(METADATA
, OLD_FN_NODE
)
348 STRINGIFY_CODE(METADATA
, NAMED_NODE
)
349 STRINGIFY_CODE(METADATA
, GENERIC_DEBUG
)
350 STRINGIFY_CODE(METADATA
, SUBRANGE
)
351 STRINGIFY_CODE(METADATA
, ENUMERATOR
)
352 STRINGIFY_CODE(METADATA
, BASIC_TYPE
)
353 STRINGIFY_CODE(METADATA
, FILE)
354 STRINGIFY_CODE(METADATA
, DERIVED_TYPE
)
355 STRINGIFY_CODE(METADATA
, COMPOSITE_TYPE
)
356 STRINGIFY_CODE(METADATA
, SUBROUTINE_TYPE
)
357 STRINGIFY_CODE(METADATA
, COMPILE_UNIT
)
358 STRINGIFY_CODE(METADATA
, SUBPROGRAM
)
359 STRINGIFY_CODE(METADATA
, LEXICAL_BLOCK
)
360 STRINGIFY_CODE(METADATA
, LEXICAL_BLOCK_FILE
)
361 STRINGIFY_CODE(METADATA
, NAMESPACE
)
362 STRINGIFY_CODE(METADATA
, TEMPLATE_TYPE
)
363 STRINGIFY_CODE(METADATA
, TEMPLATE_VALUE
)
364 STRINGIFY_CODE(METADATA
, GLOBAL_VAR
)
365 STRINGIFY_CODE(METADATA
, LOCAL_VAR
)
366 STRINGIFY_CODE(METADATA
, EXPRESSION
)
367 STRINGIFY_CODE(METADATA
, OBJC_PROPERTY
)
368 STRINGIFY_CODE(METADATA
, IMPORTED_ENTITY
)
369 STRINGIFY_CODE(METADATA
, MODULE
)
370 STRINGIFY_CODE(METADATA
, MACRO
)
371 STRINGIFY_CODE(METADATA
, MACRO_FILE
)
372 STRINGIFY_CODE(METADATA
, STRINGS
)
373 STRINGIFY_CODE(METADATA
, GLOBAL_DECL_ATTACHMENT
)
374 STRINGIFY_CODE(METADATA
, GLOBAL_VAR_EXPR
)
375 STRINGIFY_CODE(METADATA
, INDEX_OFFSET
)
376 STRINGIFY_CODE(METADATA
, INDEX
)
378 case bitc::METADATA_KIND_BLOCK_ID
:
382 STRINGIFY_CODE(METADATA
, KIND
)
384 case bitc::USELIST_BLOCK_ID
:
386 default:return nullptr;
387 case bitc::USELIST_CODE_DEFAULT
: return "USELIST_CODE_DEFAULT";
388 case bitc::USELIST_CODE_BB
: return "USELIST_CODE_BB";
391 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID
:
393 default: return nullptr;
394 case bitc::OPERAND_BUNDLE_TAG
: return "OPERAND_BUNDLE_TAG";
396 case bitc::STRTAB_BLOCK_ID
:
398 default: return nullptr;
399 case bitc::STRTAB_BLOB
: return "BLOB";
401 case bitc::SYMTAB_BLOCK_ID
:
403 default: return nullptr;
404 case bitc::SYMTAB_BLOB
: return "BLOB";
407 #undef STRINGIFY_CODE
410 struct PerRecordStats
{
411 unsigned NumInstances
;
415 PerRecordStats() : NumInstances(0), NumAbbrev(0), TotalBits(0) {}
418 struct PerBlockIDStats
{
419 /// NumInstances - This the number of times this block ID has been seen.
420 unsigned NumInstances
;
422 /// NumBits - The total size in bits of all of these blocks.
425 /// NumSubBlocks - The total number of blocks these blocks contain.
426 unsigned NumSubBlocks
;
428 /// NumAbbrevs - The total number of abbreviations.
431 /// NumRecords - The total number of records these blocks contain, and the
432 /// number that are abbreviated.
433 unsigned NumRecords
, NumAbbreviatedRecords
;
435 /// CodeFreq - Keep track of the number of times we see each code.
436 std::vector
<PerRecordStats
> CodeFreq
;
439 : NumInstances(0), NumBits(0),
440 NumSubBlocks(0), NumAbbrevs(0), NumRecords(0), NumAbbreviatedRecords(0) {}
443 static std::map
<unsigned, PerBlockIDStats
> BlockIDStats
;
447 /// ReportError - All bitcode analysis errors go through this function, making this a
448 /// good place to breakpoint if debugging.
449 static bool ReportError(const Twine
&Err
) {
450 errs() << Err
<< "\n";
454 static bool decodeMetadataStringsBlob(StringRef Indent
,
455 ArrayRef
<uint64_t> Record
,
460 if (Record
.size() != 2)
463 unsigned NumStrings
= Record
[0];
464 unsigned StringsOffset
= Record
[1];
465 outs() << " num-strings = " << NumStrings
<< " {\n";
467 StringRef Lengths
= Blob
.slice(0, StringsOffset
);
468 SimpleBitstreamCursor
R(Lengths
);
469 StringRef Strings
= Blob
.drop_front(StringsOffset
);
471 if (R
.AtEndOfStream())
472 return ReportError("bad length");
474 unsigned Size
= R
.ReadVBR(6);
475 if (Strings
.size() < Size
)
476 return ReportError("truncated chars");
478 outs() << Indent
<< " '";
479 outs().write_escaped(Strings
.slice(0, Size
), /*hex=*/true);
481 Strings
= Strings
.drop_front(Size
);
482 } while (--NumStrings
);
484 outs() << Indent
<< " }";
488 static bool decodeBlob(unsigned Code
, unsigned BlockID
, StringRef Indent
,
489 ArrayRef
<uint64_t> Record
, StringRef Blob
) {
490 if (BlockID
!= bitc::METADATA_BLOCK_ID
)
492 if (Code
!= bitc::METADATA_STRINGS
)
495 return decodeMetadataStringsBlob(Indent
, Record
, Blob
);
498 /// ParseBlock - Read a block, updating statistics, etc.
499 static bool ParseBlock(BitstreamCursor
&Stream
, BitstreamBlockInfo
&BlockInfo
,
500 unsigned BlockID
, unsigned IndentLevel
,
501 CurStreamTypeType CurStreamType
) {
502 std::string
Indent(IndentLevel
*2, ' ');
503 uint64_t BlockBitStart
= Stream
.GetCurrentBitNo();
505 // Get the statistics for this BlockID.
506 PerBlockIDStats
&BlockStats
= BlockIDStats
[BlockID
];
508 BlockStats
.NumInstances
++;
510 // BLOCKINFO is a special part of the stream.
511 bool DumpRecords
= Dump
;
512 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
) {
513 if (Dump
) outs() << Indent
<< "<BLOCKINFO_BLOCK/>\n";
514 Optional
<BitstreamBlockInfo
> NewBlockInfo
=
515 Stream
.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
517 return ReportError("Malformed BlockInfoBlock");
518 BlockInfo
= std::move(*NewBlockInfo
);
519 Stream
.JumpToBit(BlockBitStart
);
520 // It's not really interesting to dump the contents of the blockinfo block.
524 unsigned NumWords
= 0;
525 if (Stream
.EnterSubBlock(BlockID
, &NumWords
))
526 return ReportError("Malformed block record");
528 // Keep it for later, when we see a MODULE_HASH record
529 uint64_t BlockEntryPos
= Stream
.getCurrentByteNo();
531 const char *BlockName
= nullptr;
533 outs() << Indent
<< "<";
534 if ((BlockName
= GetBlockName(BlockID
, BlockInfo
, CurStreamType
)))
537 outs() << "UnknownBlock" << BlockID
;
539 if (NonSymbolic
&& BlockName
)
540 outs() << " BlockID=" << BlockID
;
542 outs() << " NumWords=" << NumWords
543 << " BlockCodeSize=" << Stream
.getAbbrevIDWidth() << ">\n";
546 SmallVector
<uint64_t, 64> Record
;
548 // Keep the offset to the metadata index if seen.
549 uint64_t MetadataIndexOffset
= 0;
551 // Read all the records for this block.
553 if (Stream
.AtEndOfStream())
554 return ReportError("Premature end of bitstream");
556 uint64_t RecordStartBit
= Stream
.GetCurrentBitNo();
558 BitstreamEntry Entry
=
559 Stream
.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs
);
561 switch (Entry
.Kind
) {
562 case BitstreamEntry::Error
:
563 return ReportError("malformed bitcode file");
564 case BitstreamEntry::EndBlock
: {
565 uint64_t BlockBitEnd
= Stream
.GetCurrentBitNo();
566 BlockStats
.NumBits
+= BlockBitEnd
-BlockBitStart
;
568 outs() << Indent
<< "</";
570 outs() << BlockName
<< ">\n";
572 outs() << "UnknownBlock" << BlockID
<< ">\n";
577 case BitstreamEntry::SubBlock
: {
578 uint64_t SubBlockBitStart
= Stream
.GetCurrentBitNo();
579 if (ParseBlock(Stream
, BlockInfo
, Entry
.ID
, IndentLevel
+ 1,
582 ++BlockStats
.NumSubBlocks
;
583 uint64_t SubBlockBitEnd
= Stream
.GetCurrentBitNo();
585 // Don't include subblock sizes in the size of this block.
586 BlockBitStart
+= SubBlockBitEnd
-SubBlockBitStart
;
589 case BitstreamEntry::Record
:
590 // The interesting case.
594 if (Entry
.ID
== bitc::DEFINE_ABBREV
) {
595 Stream
.ReadAbbrevRecord();
596 ++BlockStats
.NumAbbrevs
;
602 ++BlockStats
.NumRecords
;
605 unsigned CurrentRecordPos
= Stream
.GetCurrentBitNo();
606 unsigned Code
= Stream
.readRecord(Entry
.ID
, Record
, &Blob
);
608 // Increment the # occurrences of this code.
609 if (BlockStats
.CodeFreq
.size() <= Code
)
610 BlockStats
.CodeFreq
.resize(Code
+1);
611 BlockStats
.CodeFreq
[Code
].NumInstances
++;
612 BlockStats
.CodeFreq
[Code
].TotalBits
+=
613 Stream
.GetCurrentBitNo()-RecordStartBit
;
614 if (Entry
.ID
!= bitc::UNABBREV_RECORD
) {
615 BlockStats
.CodeFreq
[Code
].NumAbbrev
++;
616 ++BlockStats
.NumAbbreviatedRecords
;
620 outs() << Indent
<< " <";
621 if (const char *CodeName
=
622 GetCodeName(Code
, BlockID
, BlockInfo
, CurStreamType
))
625 outs() << "UnknownCode" << Code
;
626 if (NonSymbolic
&& GetCodeName(Code
, BlockID
, BlockInfo
, CurStreamType
))
627 outs() << " codeid=" << Code
;
628 const BitCodeAbbrev
*Abbv
= nullptr;
629 if (Entry
.ID
!= bitc::UNABBREV_RECORD
) {
630 Abbv
= Stream
.getAbbrev(Entry
.ID
);
631 outs() << " abbrevid=" << Entry
.ID
;
634 for (unsigned i
= 0, e
= Record
.size(); i
!= e
; ++i
)
635 outs() << " op" << i
<< "=" << (int64_t)Record
[i
];
637 // If we found a metadata index, let's verify that we had an offset before
638 // and validate its forward reference offset was correct!
639 if (BlockID
== bitc::METADATA_BLOCK_ID
) {
640 if (Code
== bitc::METADATA_INDEX_OFFSET
) {
641 if (Record
.size() != 2)
642 outs() << "(Invalid record)";
644 auto Offset
= Record
[0] + (Record
[1] << 32);
645 MetadataIndexOffset
= Stream
.GetCurrentBitNo() + Offset
;
648 if (Code
== bitc::METADATA_INDEX
) {
649 outs() << " (offset ";
650 if (MetadataIndexOffset
== RecordStartBit
)
653 outs() << "mismatch: " << MetadataIndexOffset
<< " vs "
654 << RecordStartBit
<< ")";
658 // If we found a module hash, let's verify that it matches!
659 if (BlockID
== bitc::MODULE_BLOCK_ID
&& Code
== bitc::MODULE_CODE_HASH
&&
660 !CheckHash
.empty()) {
661 if (Record
.size() != 5)
662 outs() << " (invalid)";
664 // Recompute the hash and compare it to the one in the bitcode
667 Hasher
.update(CheckHash
);
669 int BlockSize
= (CurrentRecordPos
/ 8) - BlockEntryPos
;
670 auto Ptr
= Stream
.getPointerToByte(BlockEntryPos
, BlockSize
);
671 Hasher
.update(ArrayRef
<uint8_t>(Ptr
, BlockSize
));
672 Hash
= Hasher
.result();
674 SmallString
<20> RecordedHash
;
675 RecordedHash
.resize(20);
677 for (auto &Val
: Record
) {
678 assert(!(Val
>> 32) && "Unexpected high bits set");
679 RecordedHash
[Pos
++] = (Val
>> 24) & 0xFF;
680 RecordedHash
[Pos
++] = (Val
>> 16) & 0xFF;
681 RecordedHash
[Pos
++] = (Val
>> 8) & 0xFF;
682 RecordedHash
[Pos
++] = (Val
>> 0) & 0xFF;
684 if (Hash
== RecordedHash
)
685 outs() << " (match)";
687 outs() << " (!mismatch!)";
694 for (unsigned i
= 1, e
= Abbv
->getNumOperandInfos(); i
!= e
; ++i
) {
695 const BitCodeAbbrevOp
&Op
= Abbv
->getOperandInfo(i
);
696 if (!Op
.isEncoding() || Op
.getEncoding() != BitCodeAbbrevOp::Array
)
698 assert(i
+ 2 == e
&& "Array op not second to last");
700 bool ArrayIsPrintable
= true;
701 for (unsigned j
= i
- 1, je
= Record
.size(); j
!= je
; ++j
) {
702 if (!isprint(static_cast<unsigned char>(Record
[j
]))) {
703 ArrayIsPrintable
= false;
706 Str
+= (char)Record
[j
];
708 if (ArrayIsPrintable
)
709 outs() << " record string = '" << Str
<< "'";
714 if (Blob
.data() && decodeBlob(Code
, BlockID
, Indent
, Record
, Blob
)) {
715 outs() << " blob data = ";
716 if (ShowBinaryBlobs
) {
718 outs().write_escaped(Blob
, /*hex=*/true) << "'";
720 bool BlobIsPrintable
= true;
721 for (unsigned i
= 0, e
= Blob
.size(); i
!= e
; ++i
)
722 if (!isprint(static_cast<unsigned char>(Blob
[i
]))) {
723 BlobIsPrintable
= false;
728 outs() << "'" << Blob
<< "'";
730 outs() << "unprintable, " << Blob
.size() << " bytes.";
737 // Make sure that we can skip the current record.
738 Stream
.JumpToBit(CurrentRecordPos
);
739 Stream
.skipRecord(Entry
.ID
);
743 static void PrintSize(double Bits
) {
744 outs() << format("%.2f/%.2fB/%luW", Bits
, Bits
/8,(unsigned long)(Bits
/32));
746 static void PrintSize(uint64_t Bits
) {
747 outs() << format("%lub/%.2fB/%luW", (unsigned long)Bits
,
748 (double)Bits
/8, (unsigned long)(Bits
/32));
751 static bool openBitcodeFile(StringRef Path
,
752 std::unique_ptr
<MemoryBuffer
> &MemBuf
,
753 BitstreamCursor
&Stream
,
754 CurStreamTypeType
&CurStreamType
) {
755 // Read the input file.
756 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MemBufOrErr
=
757 MemoryBuffer::getFileOrSTDIN(Path
);
758 if (std::error_code EC
= MemBufOrErr
.getError())
759 return ReportError(Twine("ReportError reading '") + Path
+ "': " + EC
.message());
760 MemBuf
= std::move(MemBufOrErr
.get());
762 if (MemBuf
->getBufferSize() & 3)
763 return ReportError("Bitcode stream should be a multiple of 4 bytes in length");
765 const unsigned char *BufPtr
= (const unsigned char *)MemBuf
->getBufferStart();
766 const unsigned char *EndBufPtr
= BufPtr
+ MemBuf
->getBufferSize();
768 // If we have a wrapper header, parse it and ignore the non-bc file contents.
769 // The magic number is 0x0B17C0DE stored in little endian.
770 if (isBitcodeWrapper(BufPtr
, EndBufPtr
)) {
771 if (MemBuf
->getBufferSize() < BWH_HeaderSize
)
772 return ReportError("Invalid bitcode wrapper header");
775 unsigned Magic
= support::endian::read32le(&BufPtr
[BWH_MagicField
]);
776 unsigned Version
= support::endian::read32le(&BufPtr
[BWH_VersionField
]);
777 unsigned Offset
= support::endian::read32le(&BufPtr
[BWH_OffsetField
]);
778 unsigned Size
= support::endian::read32le(&BufPtr
[BWH_SizeField
]);
779 unsigned CPUType
= support::endian::read32le(&BufPtr
[BWH_CPUTypeField
]);
781 outs() << "<BITCODE_WRAPPER_HEADER"
782 << " Magic=" << format_hex(Magic
, 10)
783 << " Version=" << format_hex(Version
, 10)
784 << " Offset=" << format_hex(Offset
, 10)
785 << " Size=" << format_hex(Size
, 10)
786 << " CPUType=" << format_hex(CPUType
, 10) << "/>\n";
789 if (SkipBitcodeWrapperHeader(BufPtr
, EndBufPtr
, true))
790 return ReportError("Invalid bitcode wrapper header");
793 Stream
= BitstreamCursor(ArrayRef
<uint8_t>(BufPtr
, EndBufPtr
));
795 // Read the stream signature.
797 Signature
[0] = Stream
.Read(8);
798 Signature
[1] = Stream
.Read(8);
799 Signature
[2] = Stream
.Read(4);
800 Signature
[3] = Stream
.Read(4);
801 Signature
[4] = Stream
.Read(4);
802 Signature
[5] = Stream
.Read(4);
804 // Autodetect the file contents, if it is one we know.
805 CurStreamType
= UnknownBitstream
;
806 if (Signature
[0] == 'B' && Signature
[1] == 'C' &&
807 Signature
[2] == 0x0 && Signature
[3] == 0xC &&
808 Signature
[4] == 0xE && Signature
[5] == 0xD)
809 CurStreamType
= LLVMIRBitstream
;
814 /// AnalyzeBitcode - Analyze the bitcode file specified by InputFilename.
815 static int AnalyzeBitcode() {
816 std::unique_ptr
<MemoryBuffer
> StreamBuffer
;
817 BitstreamCursor Stream
;
818 BitstreamBlockInfo BlockInfo
;
819 CurStreamTypeType CurStreamType
;
820 if (openBitcodeFile(InputFilename
, StreamBuffer
, Stream
, CurStreamType
))
822 Stream
.setBlockInfo(&BlockInfo
);
824 // Read block info from BlockInfoFilename, if specified.
825 // The block info must be a top-level block.
826 if (!BlockInfoFilename
.empty()) {
827 std::unique_ptr
<MemoryBuffer
> BlockInfoBuffer
;
828 BitstreamCursor BlockInfoCursor
;
829 CurStreamTypeType BlockInfoStreamType
;
830 if (openBitcodeFile(BlockInfoFilename
, BlockInfoBuffer
, BlockInfoCursor
,
831 BlockInfoStreamType
))
834 while (!BlockInfoCursor
.AtEndOfStream()) {
835 unsigned Code
= BlockInfoCursor
.ReadCode();
836 if (Code
!= bitc::ENTER_SUBBLOCK
)
837 return ReportError("Invalid record at top-level in block info file");
839 unsigned BlockID
= BlockInfoCursor
.ReadSubBlockID();
840 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
) {
841 Optional
<BitstreamBlockInfo
> NewBlockInfo
=
842 BlockInfoCursor
.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
844 return ReportError("Malformed BlockInfoBlock in block info file");
845 BlockInfo
= std::move(*NewBlockInfo
);
849 BlockInfoCursor
.SkipBlock();
853 unsigned NumTopBlocks
= 0;
855 // Parse the top-level structure. We only allow blocks at the top-level.
856 while (!Stream
.AtEndOfStream()) {
857 unsigned Code
= Stream
.ReadCode();
858 if (Code
!= bitc::ENTER_SUBBLOCK
)
859 return ReportError("Invalid record at top-level");
861 unsigned BlockID
= Stream
.ReadSubBlockID();
863 if (ParseBlock(Stream
, BlockInfo
, BlockID
, 0, CurStreamType
))
868 if (Dump
) outs() << "\n\n";
870 uint64_t BufferSizeBits
= Stream
.getBitcodeBytes().size() * CHAR_BIT
;
871 // Print a summary of the read file.
872 outs() << "Summary of " << InputFilename
<< ":\n";
873 outs() << " Total size: ";
874 PrintSize(BufferSizeBits
);
876 outs() << " Stream type: ";
877 switch (CurStreamType
) {
878 case UnknownBitstream
: outs() << "unknown\n"; break;
879 case LLVMIRBitstream
: outs() << "LLVM IR\n"; break;
881 outs() << " # Toplevel Blocks: " << NumTopBlocks
<< "\n";
884 // Emit per-block stats.
885 outs() << "Per-block Summary:\n";
886 for (std::map
<unsigned, PerBlockIDStats
>::iterator I
= BlockIDStats
.begin(),
887 E
= BlockIDStats
.end(); I
!= E
; ++I
) {
888 outs() << " Block ID #" << I
->first
;
889 if (const char *BlockName
=
890 GetBlockName(I
->first
, BlockInfo
, CurStreamType
))
891 outs() << " (" << BlockName
<< ")";
894 const PerBlockIDStats
&Stats
= I
->second
;
895 outs() << " Num Instances: " << Stats
.NumInstances
<< "\n";
896 outs() << " Total Size: ";
897 PrintSize(Stats
.NumBits
);
899 double pct
= (Stats
.NumBits
* 100.0) / BufferSizeBits
;
900 outs() << " Percent of file: " << format("%2.4f%%", pct
) << "\n";
901 if (Stats
.NumInstances
> 1) {
902 outs() << " Average Size: ";
903 PrintSize(Stats
.NumBits
/(double)Stats
.NumInstances
);
905 outs() << " Tot/Avg SubBlocks: " << Stats
.NumSubBlocks
<< "/"
906 << Stats
.NumSubBlocks
/(double)Stats
.NumInstances
<< "\n";
907 outs() << " Tot/Avg Abbrevs: " << Stats
.NumAbbrevs
<< "/"
908 << Stats
.NumAbbrevs
/(double)Stats
.NumInstances
<< "\n";
909 outs() << " Tot/Avg Records: " << Stats
.NumRecords
<< "/"
910 << Stats
.NumRecords
/(double)Stats
.NumInstances
<< "\n";
912 outs() << " Num SubBlocks: " << Stats
.NumSubBlocks
<< "\n";
913 outs() << " Num Abbrevs: " << Stats
.NumAbbrevs
<< "\n";
914 outs() << " Num Records: " << Stats
.NumRecords
<< "\n";
916 if (Stats
.NumRecords
) {
917 double pct
= (Stats
.NumAbbreviatedRecords
* 100.0) / Stats
.NumRecords
;
918 outs() << " Percent Abbrevs: " << format("%2.4f%%", pct
) << "\n";
922 // Print a histogram of the codes we see.
923 if (!NoHistogram
&& !Stats
.CodeFreq
.empty()) {
924 std::vector
<std::pair
<unsigned, unsigned> > FreqPairs
; // <freq,code>
925 for (unsigned i
= 0, e
= Stats
.CodeFreq
.size(); i
!= e
; ++i
)
926 if (unsigned Freq
= Stats
.CodeFreq
[i
].NumInstances
)
927 FreqPairs
.push_back(std::make_pair(Freq
, i
));
928 std::stable_sort(FreqPairs
.begin(), FreqPairs
.end());
929 std::reverse(FreqPairs
.begin(), FreqPairs
.end());
931 outs() << "\tRecord Histogram:\n";
932 outs() << "\t\t Count # Bits b/Rec % Abv Record Kind\n";
933 for (unsigned i
= 0, e
= FreqPairs
.size(); i
!= e
; ++i
) {
934 const PerRecordStats
&RecStats
= Stats
.CodeFreq
[FreqPairs
[i
].second
];
936 outs() << format("\t\t%7d %9lu",
937 RecStats
.NumInstances
,
938 (unsigned long)RecStats
.TotalBits
);
940 if (RecStats
.NumInstances
> 1)
941 outs() << format(" %9.1f",
942 (double)RecStats
.TotalBits
/RecStats
.NumInstances
);
946 if (RecStats
.NumAbbrev
)
949 (double)RecStats
.NumAbbrev
/RecStats
.NumInstances
*100);
954 if (const char *CodeName
= GetCodeName(FreqPairs
[i
].second
, I
->first
,
955 BlockInfo
, CurStreamType
))
956 outs() << CodeName
<< "\n";
958 outs() << "UnknownCode" << FreqPairs
[i
].second
<< "\n";
968 int main(int argc
, char **argv
) {
969 // Print a stack trace if we signal out.
970 sys::PrintStackTraceOnErrorSignal(argv
[0]);
971 PrettyStackTraceProgram
X(argc
, argv
);
972 llvm_shutdown_obj Y
; // Call llvm_shutdown() on exit.
973 cl::ParseCommandLineOptions(argc
, argv
, "llvm-bcanalyzer file analyzer\n");
975 return AnalyzeBitcode();