1 //===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Bitcode/BitcodeAnalyzer.h"
10 #include "llvm/Bitcode/BitcodeReader.h"
11 #include "llvm/Bitcode/LLVMBitCodes.h"
12 #include "llvm/Bitstream/BitCodes.h"
13 #include "llvm/Bitstream/BitstreamReader.h"
14 #include "llvm/Support/Format.h"
15 #include "llvm/Support/SHA1.h"
19 static Error
reportError(StringRef Message
) {
20 return createStringError(std::errc::illegal_byte_sequence
, Message
.data());
23 /// Return a symbolic block name if known, otherwise return null.
24 static Optional
<const char *> GetBlockName(unsigned BlockID
,
25 const BitstreamBlockInfo
&BlockInfo
,
26 CurStreamTypeType CurStreamType
) {
27 // Standard blocks for all bitcode files.
28 if (BlockID
< bitc::FIRST_APPLICATION_BLOCKID
) {
29 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
)
30 return "BLOCKINFO_BLOCK";
34 // Check to see if we have a blockinfo record for this block, with a name.
35 if (const BitstreamBlockInfo::BlockInfo
*Info
=
36 BlockInfo
.getBlockInfo(BlockID
)) {
37 if (!Info
->Name
.empty())
38 return Info
->Name
.c_str();
41 if (CurStreamType
!= LLVMIRBitstream
)
47 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID
:
48 return "OPERAND_BUNDLE_TAGS_BLOCK";
49 case bitc::MODULE_BLOCK_ID
:
50 return "MODULE_BLOCK";
51 case bitc::PARAMATTR_BLOCK_ID
:
52 return "PARAMATTR_BLOCK";
53 case bitc::PARAMATTR_GROUP_BLOCK_ID
:
54 return "PARAMATTR_GROUP_BLOCK_ID";
55 case bitc::TYPE_BLOCK_ID_NEW
:
56 return "TYPE_BLOCK_ID";
57 case bitc::CONSTANTS_BLOCK_ID
:
58 return "CONSTANTS_BLOCK";
59 case bitc::FUNCTION_BLOCK_ID
:
60 return "FUNCTION_BLOCK";
61 case bitc::IDENTIFICATION_BLOCK_ID
:
62 return "IDENTIFICATION_BLOCK_ID";
63 case bitc::VALUE_SYMTAB_BLOCK_ID
:
64 return "VALUE_SYMTAB";
65 case bitc::METADATA_BLOCK_ID
:
66 return "METADATA_BLOCK";
67 case bitc::METADATA_KIND_BLOCK_ID
:
68 return "METADATA_KIND_BLOCK";
69 case bitc::METADATA_ATTACHMENT_ID
:
70 return "METADATA_ATTACHMENT_BLOCK";
71 case bitc::USELIST_BLOCK_ID
:
72 return "USELIST_BLOCK_ID";
73 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID
:
74 return "GLOBALVAL_SUMMARY_BLOCK";
75 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID
:
76 return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
77 case bitc::MODULE_STRTAB_BLOCK_ID
:
78 return "MODULE_STRTAB_BLOCK";
79 case bitc::STRTAB_BLOCK_ID
:
80 return "STRTAB_BLOCK";
81 case bitc::SYMTAB_BLOCK_ID
:
82 return "SYMTAB_BLOCK";
86 /// Return a symbolic code name if known, otherwise return null.
87 static Optional
<const char *> GetCodeName(unsigned CodeID
, unsigned BlockID
,
88 const BitstreamBlockInfo
&BlockInfo
,
89 CurStreamTypeType CurStreamType
) {
90 // Standard blocks for all bitcode files.
91 if (BlockID
< bitc::FIRST_APPLICATION_BLOCKID
) {
92 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
) {
96 case bitc::BLOCKINFO_CODE_SETBID
:
98 case bitc::BLOCKINFO_CODE_BLOCKNAME
:
100 case bitc::BLOCKINFO_CODE_SETRECORDNAME
:
101 return "SETRECORDNAME";
107 // Check to see if we have a blockinfo record for this record, with a name.
108 if (const BitstreamBlockInfo::BlockInfo
*Info
=
109 BlockInfo
.getBlockInfo(BlockID
)) {
110 for (unsigned i
= 0, e
= Info
->RecordNames
.size(); i
!= e
; ++i
)
111 if (Info
->RecordNames
[i
].first
== CodeID
)
112 return Info
->RecordNames
[i
].second
.c_str();
115 if (CurStreamType
!= LLVMIRBitstream
)
118 #define STRINGIFY_CODE(PREFIX, CODE) \
119 case bitc::PREFIX##_##CODE: \
124 case bitc::MODULE_BLOCK_ID
:
128 STRINGIFY_CODE(MODULE_CODE
, VERSION
)
129 STRINGIFY_CODE(MODULE_CODE
, TRIPLE
)
130 STRINGIFY_CODE(MODULE_CODE
, DATALAYOUT
)
131 STRINGIFY_CODE(MODULE_CODE
, ASM
)
132 STRINGIFY_CODE(MODULE_CODE
, SECTIONNAME
)
133 STRINGIFY_CODE(MODULE_CODE
, DEPLIB
) // Deprecated, present in old bitcode
134 STRINGIFY_CODE(MODULE_CODE
, GLOBALVAR
)
135 STRINGIFY_CODE(MODULE_CODE
, FUNCTION
)
136 STRINGIFY_CODE(MODULE_CODE
, ALIAS
)
137 STRINGIFY_CODE(MODULE_CODE
, GCNAME
)
138 STRINGIFY_CODE(MODULE_CODE
, COMDAT
)
139 STRINGIFY_CODE(MODULE_CODE
, VSTOFFSET
)
140 STRINGIFY_CODE(MODULE_CODE
, METADATA_VALUES_UNUSED
)
141 STRINGIFY_CODE(MODULE_CODE
, SOURCE_FILENAME
)
142 STRINGIFY_CODE(MODULE_CODE
, HASH
)
144 case bitc::IDENTIFICATION_BLOCK_ID
:
148 STRINGIFY_CODE(IDENTIFICATION_CODE
, STRING
)
149 STRINGIFY_CODE(IDENTIFICATION_CODE
, EPOCH
)
151 case bitc::PARAMATTR_BLOCK_ID
:
155 // FIXME: Should these be different?
156 case bitc::PARAMATTR_CODE_ENTRY_OLD
:
158 case bitc::PARAMATTR_CODE_ENTRY
:
161 case bitc::PARAMATTR_GROUP_BLOCK_ID
:
165 case bitc::PARAMATTR_GRP_CODE_ENTRY
:
168 case bitc::TYPE_BLOCK_ID_NEW
:
172 STRINGIFY_CODE(TYPE_CODE
, NUMENTRY
)
173 STRINGIFY_CODE(TYPE_CODE
, VOID
)
174 STRINGIFY_CODE(TYPE_CODE
, FLOAT
)
175 STRINGIFY_CODE(TYPE_CODE
, DOUBLE
)
176 STRINGIFY_CODE(TYPE_CODE
, LABEL
)
177 STRINGIFY_CODE(TYPE_CODE
, OPAQUE
)
178 STRINGIFY_CODE(TYPE_CODE
, INTEGER
)
179 STRINGIFY_CODE(TYPE_CODE
, POINTER
)
180 STRINGIFY_CODE(TYPE_CODE
, HALF
)
181 STRINGIFY_CODE(TYPE_CODE
, ARRAY
)
182 STRINGIFY_CODE(TYPE_CODE
, VECTOR
)
183 STRINGIFY_CODE(TYPE_CODE
, X86_FP80
)
184 STRINGIFY_CODE(TYPE_CODE
, FP128
)
185 STRINGIFY_CODE(TYPE_CODE
, PPC_FP128
)
186 STRINGIFY_CODE(TYPE_CODE
, METADATA
)
187 STRINGIFY_CODE(TYPE_CODE
, X86_MMX
)
188 STRINGIFY_CODE(TYPE_CODE
, STRUCT_ANON
)
189 STRINGIFY_CODE(TYPE_CODE
, STRUCT_NAME
)
190 STRINGIFY_CODE(TYPE_CODE
, STRUCT_NAMED
)
191 STRINGIFY_CODE(TYPE_CODE
, FUNCTION
)
192 STRINGIFY_CODE(TYPE_CODE
, TOKEN
)
193 STRINGIFY_CODE(TYPE_CODE
, BFLOAT
)
196 case bitc::CONSTANTS_BLOCK_ID
:
200 STRINGIFY_CODE(CST_CODE
, SETTYPE
)
201 STRINGIFY_CODE(CST_CODE
, NULL
)
202 STRINGIFY_CODE(CST_CODE
, UNDEF
)
203 STRINGIFY_CODE(CST_CODE
, INTEGER
)
204 STRINGIFY_CODE(CST_CODE
, WIDE_INTEGER
)
205 STRINGIFY_CODE(CST_CODE
, FLOAT
)
206 STRINGIFY_CODE(CST_CODE
, AGGREGATE
)
207 STRINGIFY_CODE(CST_CODE
, STRING
)
208 STRINGIFY_CODE(CST_CODE
, CSTRING
)
209 STRINGIFY_CODE(CST_CODE
, CE_BINOP
)
210 STRINGIFY_CODE(CST_CODE
, CE_CAST
)
211 STRINGIFY_CODE(CST_CODE
, CE_GEP
)
212 STRINGIFY_CODE(CST_CODE
, CE_INBOUNDS_GEP
)
213 STRINGIFY_CODE(CST_CODE
, CE_SELECT
)
214 STRINGIFY_CODE(CST_CODE
, CE_EXTRACTELT
)
215 STRINGIFY_CODE(CST_CODE
, CE_INSERTELT
)
216 STRINGIFY_CODE(CST_CODE
, CE_SHUFFLEVEC
)
217 STRINGIFY_CODE(CST_CODE
, CE_CMP
)
218 STRINGIFY_CODE(CST_CODE
, INLINEASM
)
219 STRINGIFY_CODE(CST_CODE
, CE_SHUFVEC_EX
)
220 STRINGIFY_CODE(CST_CODE
, CE_UNOP
)
221 STRINGIFY_CODE(CST_CODE
, DSO_LOCAL_EQUIVALENT
)
222 case bitc::CST_CODE_BLOCKADDRESS
:
223 return "CST_CODE_BLOCKADDRESS";
224 STRINGIFY_CODE(CST_CODE
, DATA
)
226 case bitc::FUNCTION_BLOCK_ID
:
230 STRINGIFY_CODE(FUNC_CODE
, DECLAREBLOCKS
)
231 STRINGIFY_CODE(FUNC_CODE
, INST_BINOP
)
232 STRINGIFY_CODE(FUNC_CODE
, INST_CAST
)
233 STRINGIFY_CODE(FUNC_CODE
, INST_GEP_OLD
)
234 STRINGIFY_CODE(FUNC_CODE
, INST_INBOUNDS_GEP_OLD
)
235 STRINGIFY_CODE(FUNC_CODE
, INST_SELECT
)
236 STRINGIFY_CODE(FUNC_CODE
, INST_EXTRACTELT
)
237 STRINGIFY_CODE(FUNC_CODE
, INST_INSERTELT
)
238 STRINGIFY_CODE(FUNC_CODE
, INST_SHUFFLEVEC
)
239 STRINGIFY_CODE(FUNC_CODE
, INST_CMP
)
240 STRINGIFY_CODE(FUNC_CODE
, INST_RET
)
241 STRINGIFY_CODE(FUNC_CODE
, INST_BR
)
242 STRINGIFY_CODE(FUNC_CODE
, INST_SWITCH
)
243 STRINGIFY_CODE(FUNC_CODE
, INST_INVOKE
)
244 STRINGIFY_CODE(FUNC_CODE
, INST_UNOP
)
245 STRINGIFY_CODE(FUNC_CODE
, INST_UNREACHABLE
)
246 STRINGIFY_CODE(FUNC_CODE
, INST_CLEANUPRET
)
247 STRINGIFY_CODE(FUNC_CODE
, INST_CATCHRET
)
248 STRINGIFY_CODE(FUNC_CODE
, INST_CATCHPAD
)
249 STRINGIFY_CODE(FUNC_CODE
, INST_PHI
)
250 STRINGIFY_CODE(FUNC_CODE
, INST_ALLOCA
)
251 STRINGIFY_CODE(FUNC_CODE
, INST_LOAD
)
252 STRINGIFY_CODE(FUNC_CODE
, INST_VAARG
)
253 STRINGIFY_CODE(FUNC_CODE
, INST_STORE
)
254 STRINGIFY_CODE(FUNC_CODE
, INST_EXTRACTVAL
)
255 STRINGIFY_CODE(FUNC_CODE
, INST_INSERTVAL
)
256 STRINGIFY_CODE(FUNC_CODE
, INST_CMP2
)
257 STRINGIFY_CODE(FUNC_CODE
, INST_VSELECT
)
258 STRINGIFY_CODE(FUNC_CODE
, DEBUG_LOC_AGAIN
)
259 STRINGIFY_CODE(FUNC_CODE
, INST_CALL
)
260 STRINGIFY_CODE(FUNC_CODE
, DEBUG_LOC
)
261 STRINGIFY_CODE(FUNC_CODE
, INST_GEP
)
262 STRINGIFY_CODE(FUNC_CODE
, OPERAND_BUNDLE
)
263 STRINGIFY_CODE(FUNC_CODE
, INST_FENCE
)
264 STRINGIFY_CODE(FUNC_CODE
, INST_ATOMICRMW
)
265 STRINGIFY_CODE(FUNC_CODE
, INST_LOADATOMIC
)
266 STRINGIFY_CODE(FUNC_CODE
, INST_STOREATOMIC
)
267 STRINGIFY_CODE(FUNC_CODE
, INST_CMPXCHG
)
268 STRINGIFY_CODE(FUNC_CODE
, INST_CALLBR
)
270 case bitc::VALUE_SYMTAB_BLOCK_ID
:
274 STRINGIFY_CODE(VST_CODE
, ENTRY
)
275 STRINGIFY_CODE(VST_CODE
, BBENTRY
)
276 STRINGIFY_CODE(VST_CODE
, FNENTRY
)
277 STRINGIFY_CODE(VST_CODE
, COMBINED_ENTRY
)
279 case bitc::MODULE_STRTAB_BLOCK_ID
:
283 STRINGIFY_CODE(MST_CODE
, ENTRY
)
284 STRINGIFY_CODE(MST_CODE
, HASH
)
286 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID
:
287 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID
:
291 STRINGIFY_CODE(FS
, PERMODULE
)
292 STRINGIFY_CODE(FS
, PERMODULE_PROFILE
)
293 STRINGIFY_CODE(FS
, PERMODULE_RELBF
)
294 STRINGIFY_CODE(FS
, PERMODULE_GLOBALVAR_INIT_REFS
)
295 STRINGIFY_CODE(FS
, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS
)
296 STRINGIFY_CODE(FS
, COMBINED
)
297 STRINGIFY_CODE(FS
, COMBINED_PROFILE
)
298 STRINGIFY_CODE(FS
, COMBINED_GLOBALVAR_INIT_REFS
)
299 STRINGIFY_CODE(FS
, ALIAS
)
300 STRINGIFY_CODE(FS
, COMBINED_ALIAS
)
301 STRINGIFY_CODE(FS
, COMBINED_ORIGINAL_NAME
)
302 STRINGIFY_CODE(FS
, VERSION
)
303 STRINGIFY_CODE(FS
, FLAGS
)
304 STRINGIFY_CODE(FS
, TYPE_TESTS
)
305 STRINGIFY_CODE(FS
, TYPE_TEST_ASSUME_VCALLS
)
306 STRINGIFY_CODE(FS
, TYPE_CHECKED_LOAD_VCALLS
)
307 STRINGIFY_CODE(FS
, TYPE_TEST_ASSUME_CONST_VCALL
)
308 STRINGIFY_CODE(FS
, TYPE_CHECKED_LOAD_CONST_VCALL
)
309 STRINGIFY_CODE(FS
, VALUE_GUID
)
310 STRINGIFY_CODE(FS
, CFI_FUNCTION_DEFS
)
311 STRINGIFY_CODE(FS
, CFI_FUNCTION_DECLS
)
312 STRINGIFY_CODE(FS
, TYPE_ID
)
313 STRINGIFY_CODE(FS
, TYPE_ID_METADATA
)
314 STRINGIFY_CODE(FS
, BLOCK_COUNT
)
315 STRINGIFY_CODE(FS
, PARAM_ACCESS
)
317 case bitc::METADATA_ATTACHMENT_ID
:
321 STRINGIFY_CODE(METADATA
, ATTACHMENT
)
323 case bitc::METADATA_BLOCK_ID
:
327 STRINGIFY_CODE(METADATA
, STRING_OLD
)
328 STRINGIFY_CODE(METADATA
, VALUE
)
329 STRINGIFY_CODE(METADATA
, NODE
)
330 STRINGIFY_CODE(METADATA
, NAME
)
331 STRINGIFY_CODE(METADATA
, DISTINCT_NODE
)
332 STRINGIFY_CODE(METADATA
, KIND
) // Older bitcode has it in a MODULE_BLOCK
333 STRINGIFY_CODE(METADATA
, LOCATION
)
334 STRINGIFY_CODE(METADATA
, OLD_NODE
)
335 STRINGIFY_CODE(METADATA
, OLD_FN_NODE
)
336 STRINGIFY_CODE(METADATA
, NAMED_NODE
)
337 STRINGIFY_CODE(METADATA
, GENERIC_DEBUG
)
338 STRINGIFY_CODE(METADATA
, SUBRANGE
)
339 STRINGIFY_CODE(METADATA
, ENUMERATOR
)
340 STRINGIFY_CODE(METADATA
, BASIC_TYPE
)
341 STRINGIFY_CODE(METADATA
, FILE)
342 STRINGIFY_CODE(METADATA
, DERIVED_TYPE
)
343 STRINGIFY_CODE(METADATA
, COMPOSITE_TYPE
)
344 STRINGIFY_CODE(METADATA
, SUBROUTINE_TYPE
)
345 STRINGIFY_CODE(METADATA
, COMPILE_UNIT
)
346 STRINGIFY_CODE(METADATA
, SUBPROGRAM
)
347 STRINGIFY_CODE(METADATA
, LEXICAL_BLOCK
)
348 STRINGIFY_CODE(METADATA
, LEXICAL_BLOCK_FILE
)
349 STRINGIFY_CODE(METADATA
, NAMESPACE
)
350 STRINGIFY_CODE(METADATA
, TEMPLATE_TYPE
)
351 STRINGIFY_CODE(METADATA
, TEMPLATE_VALUE
)
352 STRINGIFY_CODE(METADATA
, GLOBAL_VAR
)
353 STRINGIFY_CODE(METADATA
, LOCAL_VAR
)
354 STRINGIFY_CODE(METADATA
, EXPRESSION
)
355 STRINGIFY_CODE(METADATA
, OBJC_PROPERTY
)
356 STRINGIFY_CODE(METADATA
, IMPORTED_ENTITY
)
357 STRINGIFY_CODE(METADATA
, MODULE
)
358 STRINGIFY_CODE(METADATA
, MACRO
)
359 STRINGIFY_CODE(METADATA
, MACRO_FILE
)
360 STRINGIFY_CODE(METADATA
, STRINGS
)
361 STRINGIFY_CODE(METADATA
, GLOBAL_DECL_ATTACHMENT
)
362 STRINGIFY_CODE(METADATA
, GLOBAL_VAR_EXPR
)
363 STRINGIFY_CODE(METADATA
, INDEX_OFFSET
)
364 STRINGIFY_CODE(METADATA
, INDEX
)
365 STRINGIFY_CODE(METADATA
, ARG_LIST
)
367 case bitc::METADATA_KIND_BLOCK_ID
:
371 STRINGIFY_CODE(METADATA
, KIND
)
373 case bitc::USELIST_BLOCK_ID
:
377 case bitc::USELIST_CODE_DEFAULT
:
378 return "USELIST_CODE_DEFAULT";
379 case bitc::USELIST_CODE_BB
:
380 return "USELIST_CODE_BB";
383 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID
:
387 case bitc::OPERAND_BUNDLE_TAG
:
388 return "OPERAND_BUNDLE_TAG";
390 case bitc::STRTAB_BLOCK_ID
:
394 case bitc::STRTAB_BLOB
:
397 case bitc::SYMTAB_BLOCK_ID
:
401 case bitc::SYMTAB_BLOB
:
405 #undef STRINGIFY_CODE
408 static void printSize(raw_ostream
&OS
, double Bits
) {
409 OS
<< format("%.2f/%.2fB/%luW", Bits
, Bits
/ 8, (unsigned long)(Bits
/ 32));
411 static void printSize(raw_ostream
&OS
, uint64_t Bits
) {
412 OS
<< format("%lub/%.2fB/%luW", (unsigned long)Bits
, (double)Bits
/ 8,
413 (unsigned long)(Bits
/ 32));
416 static Expected
<CurStreamTypeType
> ReadSignature(BitstreamCursor
&Stream
) {
417 auto tryRead
= [&Stream
](char &Dest
, size_t size
) -> Error
{
418 if (Expected
<SimpleBitstreamCursor::word_t
> MaybeWord
= Stream
.Read(size
))
419 Dest
= MaybeWord
.get();
421 return MaybeWord
.takeError();
422 return Error::success();
426 if (Error Err
= tryRead(Signature
[0], 8))
427 return std::move(Err
);
428 if (Error Err
= tryRead(Signature
[1], 8))
429 return std::move(Err
);
431 // Autodetect the file contents, if it is one we know.
432 if (Signature
[0] == 'C' && Signature
[1] == 'P') {
433 if (Error Err
= tryRead(Signature
[2], 8))
434 return std::move(Err
);
435 if (Error Err
= tryRead(Signature
[3], 8))
436 return std::move(Err
);
437 if (Signature
[2] == 'C' && Signature
[3] == 'H')
438 return ClangSerializedASTBitstream
;
439 } else if (Signature
[0] == 'D' && Signature
[1] == 'I') {
440 if (Error Err
= tryRead(Signature
[2], 8))
441 return std::move(Err
);
442 if (Error Err
= tryRead(Signature
[3], 8))
443 return std::move(Err
);
444 if (Signature
[2] == 'A' && Signature
[3] == 'G')
445 return ClangSerializedDiagnosticsBitstream
;
446 } else if (Signature
[0] == 'R' && Signature
[1] == 'M') {
447 if (Error Err
= tryRead(Signature
[2], 8))
448 return std::move(Err
);
449 if (Error Err
= tryRead(Signature
[3], 8))
450 return std::move(Err
);
451 if (Signature
[2] == 'R' && Signature
[3] == 'K')
452 return LLVMBitstreamRemarks
;
454 if (Error Err
= tryRead(Signature
[2], 4))
455 return std::move(Err
);
456 if (Error Err
= tryRead(Signature
[3], 4))
457 return std::move(Err
);
458 if (Error Err
= tryRead(Signature
[4], 4))
459 return std::move(Err
);
460 if (Error Err
= tryRead(Signature
[5], 4))
461 return std::move(Err
);
462 if (Signature
[0] == 'B' && Signature
[1] == 'C' && Signature
[2] == 0x0 &&
463 Signature
[3] == 0xC && Signature
[4] == 0xE && Signature
[5] == 0xD)
464 return LLVMIRBitstream
;
466 return UnknownBitstream
;
469 static Expected
<CurStreamTypeType
> analyzeHeader(Optional
<BCDumpOptions
> O
,
470 BitstreamCursor
&Stream
) {
471 ArrayRef
<uint8_t> Bytes
= Stream
.getBitcodeBytes();
472 const unsigned char *BufPtr
= (const unsigned char *)Bytes
.data();
473 const unsigned char *EndBufPtr
= BufPtr
+ Bytes
.size();
475 // If we have a wrapper header, parse it and ignore the non-bc file
476 // contents. The magic number is 0x0B17C0DE stored in little endian.
477 if (isBitcodeWrapper(BufPtr
, EndBufPtr
)) {
478 if (Bytes
.size() < BWH_HeaderSize
)
479 return reportError("Invalid bitcode wrapper header");
482 unsigned Magic
= support::endian::read32le(&BufPtr
[BWH_MagicField
]);
483 unsigned Version
= support::endian::read32le(&BufPtr
[BWH_VersionField
]);
484 unsigned Offset
= support::endian::read32le(&BufPtr
[BWH_OffsetField
]);
485 unsigned Size
= support::endian::read32le(&BufPtr
[BWH_SizeField
]);
486 unsigned CPUType
= support::endian::read32le(&BufPtr
[BWH_CPUTypeField
]);
488 O
->OS
<< "<BITCODE_WRAPPER_HEADER"
489 << " Magic=" << format_hex(Magic
, 10)
490 << " Version=" << format_hex(Version
, 10)
491 << " Offset=" << format_hex(Offset
, 10)
492 << " Size=" << format_hex(Size
, 10)
493 << " CPUType=" << format_hex(CPUType
, 10) << "/>\n";
496 if (SkipBitcodeWrapperHeader(BufPtr
, EndBufPtr
, true))
497 return reportError("Invalid bitcode wrapper header");
500 // Use the cursor modified by skipping the wrapper header.
501 Stream
= BitstreamCursor(ArrayRef
<uint8_t>(BufPtr
, EndBufPtr
));
503 return ReadSignature(Stream
);
506 static bool canDecodeBlob(unsigned Code
, unsigned BlockID
) {
507 return BlockID
== bitc::METADATA_BLOCK_ID
&& Code
== bitc::METADATA_STRINGS
;
510 Error
BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent
,
511 ArrayRef
<uint64_t> Record
,
515 return reportError("Cannot decode empty blob.");
517 if (Record
.size() != 2)
519 "Decoding metadata strings blob needs two record entries.");
521 unsigned NumStrings
= Record
[0];
522 unsigned StringsOffset
= Record
[1];
523 OS
<< " num-strings = " << NumStrings
<< " {\n";
525 StringRef Lengths
= Blob
.slice(0, StringsOffset
);
526 SimpleBitstreamCursor
R(Lengths
);
527 StringRef Strings
= Blob
.drop_front(StringsOffset
);
529 if (R
.AtEndOfStream())
530 return reportError("bad length");
532 Expected
<uint32_t> MaybeSize
= R
.ReadVBR(6);
534 return MaybeSize
.takeError();
535 uint32_t Size
= MaybeSize
.get();
536 if (Strings
.size() < Size
)
537 return reportError("truncated chars");
539 OS
<< Indent
<< " '";
540 OS
.write_escaped(Strings
.slice(0, Size
), /*hex=*/true);
542 Strings
= Strings
.drop_front(Size
);
543 } while (--NumStrings
);
545 OS
<< Indent
<< " }";
546 return Error::success();
549 BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer
,
550 Optional
<StringRef
> BlockInfoBuffer
)
553 BlockInfoStream
.emplace(*BlockInfoBuffer
);
556 Error
BitcodeAnalyzer::analyze(Optional
<BCDumpOptions
> O
,
557 Optional
<StringRef
> CheckHash
) {
558 Expected
<CurStreamTypeType
> MaybeType
= analyzeHeader(O
, Stream
);
560 return MaybeType
.takeError();
562 CurStreamType
= *MaybeType
;
564 Stream
.setBlockInfo(&BlockInfo
);
566 // Read block info from BlockInfoStream, if specified.
567 // The block info must be a top-level block.
568 if (BlockInfoStream
) {
569 BitstreamCursor
BlockInfoCursor(*BlockInfoStream
);
570 Expected
<CurStreamTypeType
> H
= analyzeHeader(O
, BlockInfoCursor
);
572 return H
.takeError();
574 while (!BlockInfoCursor
.AtEndOfStream()) {
575 Expected
<unsigned> MaybeCode
= BlockInfoCursor
.ReadCode();
577 return MaybeCode
.takeError();
578 if (MaybeCode
.get() != bitc::ENTER_SUBBLOCK
)
579 return reportError("Invalid record at top-level in block info file");
581 Expected
<unsigned> MaybeBlockID
= BlockInfoCursor
.ReadSubBlockID();
583 return MaybeBlockID
.takeError();
584 if (MaybeBlockID
.get() == bitc::BLOCKINFO_BLOCK_ID
) {
585 Expected
<Optional
<BitstreamBlockInfo
>> MaybeNewBlockInfo
=
586 BlockInfoCursor
.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
587 if (!MaybeNewBlockInfo
)
588 return MaybeNewBlockInfo
.takeError();
589 Optional
<BitstreamBlockInfo
> NewBlockInfo
=
590 std::move(MaybeNewBlockInfo
.get());
592 return reportError("Malformed BlockInfoBlock in block info file");
593 BlockInfo
= std::move(*NewBlockInfo
);
597 if (Error Err
= BlockInfoCursor
.SkipBlock())
602 // Parse the top-level structure. We only allow blocks at the top-level.
603 while (!Stream
.AtEndOfStream()) {
604 Expected
<unsigned> MaybeCode
= Stream
.ReadCode();
606 return MaybeCode
.takeError();
607 if (MaybeCode
.get() != bitc::ENTER_SUBBLOCK
)
608 return reportError("Invalid record at top-level");
610 Expected
<unsigned> MaybeBlockID
= Stream
.ReadSubBlockID();
612 return MaybeBlockID
.takeError();
614 if (Error E
= parseBlock(MaybeBlockID
.get(), 0, O
, CheckHash
))
619 return Error::success();
622 void BitcodeAnalyzer::printStats(BCDumpOptions O
,
623 Optional
<StringRef
> Filename
) {
624 uint64_t BufferSizeBits
= Stream
.getBitcodeBytes().size() * CHAR_BIT
;
625 // Print a summary of the read file.
628 O
.OS
<< "of " << Filename
->data() << ":\n";
629 O
.OS
<< " Total size: ";
630 printSize(O
.OS
, BufferSizeBits
);
632 O
.OS
<< " Stream type: ";
633 switch (CurStreamType
) {
634 case UnknownBitstream
:
637 case LLVMIRBitstream
:
640 case ClangSerializedASTBitstream
:
641 O
.OS
<< "Clang Serialized AST\n";
643 case ClangSerializedDiagnosticsBitstream
:
644 O
.OS
<< "Clang Serialized Diagnostics\n";
646 case LLVMBitstreamRemarks
:
647 O
.OS
<< "LLVM Remarks\n";
650 O
.OS
<< " # Toplevel Blocks: " << NumTopBlocks
<< "\n";
653 // Emit per-block stats.
654 O
.OS
<< "Per-block Summary:\n";
655 for (std::map
<unsigned, PerBlockIDStats
>::iterator I
= BlockIDStats
.begin(),
656 E
= BlockIDStats
.end();
658 O
.OS
<< " Block ID #" << I
->first
;
659 if (Optional
<const char *> BlockName
=
660 GetBlockName(I
->first
, BlockInfo
, CurStreamType
))
661 O
.OS
<< " (" << *BlockName
<< ")";
664 const PerBlockIDStats
&Stats
= I
->second
;
665 O
.OS
<< " Num Instances: " << Stats
.NumInstances
<< "\n";
666 O
.OS
<< " Total Size: ";
667 printSize(O
.OS
, Stats
.NumBits
);
669 double pct
= (Stats
.NumBits
* 100.0) / BufferSizeBits
;
670 O
.OS
<< " Percent of file: " << format("%2.4f%%", pct
) << "\n";
671 if (Stats
.NumInstances
> 1) {
672 O
.OS
<< " Average Size: ";
673 printSize(O
.OS
, Stats
.NumBits
/ (double)Stats
.NumInstances
);
675 O
.OS
<< " Tot/Avg SubBlocks: " << Stats
.NumSubBlocks
<< "/"
676 << Stats
.NumSubBlocks
/ (double)Stats
.NumInstances
<< "\n";
677 O
.OS
<< " Tot/Avg Abbrevs: " << Stats
.NumAbbrevs
<< "/"
678 << Stats
.NumAbbrevs
/ (double)Stats
.NumInstances
<< "\n";
679 O
.OS
<< " Tot/Avg Records: " << Stats
.NumRecords
<< "/"
680 << Stats
.NumRecords
/ (double)Stats
.NumInstances
<< "\n";
682 O
.OS
<< " Num SubBlocks: " << Stats
.NumSubBlocks
<< "\n";
683 O
.OS
<< " Num Abbrevs: " << Stats
.NumAbbrevs
<< "\n";
684 O
.OS
<< " Num Records: " << Stats
.NumRecords
<< "\n";
686 if (Stats
.NumRecords
) {
687 double pct
= (Stats
.NumAbbreviatedRecords
* 100.0) / Stats
.NumRecords
;
688 O
.OS
<< " Percent Abbrevs: " << format("%2.4f%%", pct
) << "\n";
692 // Print a histogram of the codes we see.
693 if (O
.Histogram
&& !Stats
.CodeFreq
.empty()) {
694 std::vector
<std::pair
<unsigned, unsigned>> FreqPairs
; // <freq,code>
695 for (unsigned i
= 0, e
= Stats
.CodeFreq
.size(); i
!= e
; ++i
)
696 if (unsigned Freq
= Stats
.CodeFreq
[i
].NumInstances
)
697 FreqPairs
.push_back(std::make_pair(Freq
, i
));
698 llvm::stable_sort(FreqPairs
);
699 std::reverse(FreqPairs
.begin(), FreqPairs
.end());
701 O
.OS
<< "\tRecord Histogram:\n";
702 O
.OS
<< "\t\t Count # Bits b/Rec % Abv Record Kind\n";
703 for (unsigned i
= 0, e
= FreqPairs
.size(); i
!= e
; ++i
) {
704 const PerRecordStats
&RecStats
= Stats
.CodeFreq
[FreqPairs
[i
].second
];
706 O
.OS
<< format("\t\t%7d %9lu", RecStats
.NumInstances
,
707 (unsigned long)RecStats
.TotalBits
);
709 if (RecStats
.NumInstances
> 1)
710 O
.OS
<< format(" %9.1f",
711 (double)RecStats
.TotalBits
/ RecStats
.NumInstances
);
715 if (RecStats
.NumAbbrev
)
716 O
.OS
<< format(" %7.2f", (double)RecStats
.NumAbbrev
/
717 RecStats
.NumInstances
* 100);
722 if (Optional
<const char *> CodeName
= GetCodeName(
723 FreqPairs
[i
].second
, I
->first
, BlockInfo
, CurStreamType
))
724 O
.OS
<< *CodeName
<< "\n";
726 O
.OS
<< "UnknownCode" << FreqPairs
[i
].second
<< "\n";
733 Error
BitcodeAnalyzer::parseBlock(unsigned BlockID
, unsigned IndentLevel
,
734 Optional
<BCDumpOptions
> O
,
735 Optional
<StringRef
> CheckHash
) {
736 std::string
Indent(IndentLevel
* 2, ' ');
737 uint64_t BlockBitStart
= Stream
.GetCurrentBitNo();
739 // Get the statistics for this BlockID.
740 PerBlockIDStats
&BlockStats
= BlockIDStats
[BlockID
];
742 BlockStats
.NumInstances
++;
744 // BLOCKINFO is a special part of the stream.
745 bool DumpRecords
= O
.hasValue();
746 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
) {
748 O
->OS
<< Indent
<< "<BLOCKINFO_BLOCK/>\n";
749 Expected
<Optional
<BitstreamBlockInfo
>> MaybeNewBlockInfo
=
750 Stream
.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
751 if (!MaybeNewBlockInfo
)
752 return MaybeNewBlockInfo
.takeError();
753 Optional
<BitstreamBlockInfo
> NewBlockInfo
=
754 std::move(MaybeNewBlockInfo
.get());
756 return reportError("Malformed BlockInfoBlock");
757 BlockInfo
= std::move(*NewBlockInfo
);
758 if (Error Err
= Stream
.JumpToBit(BlockBitStart
))
760 // It's not really interesting to dump the contents of the blockinfo
765 unsigned NumWords
= 0;
766 if (Error Err
= Stream
.EnterSubBlock(BlockID
, &NumWords
))
769 // Keep it for later, when we see a MODULE_HASH record
770 uint64_t BlockEntryPos
= Stream
.getCurrentByteNo();
772 Optional
<const char *> BlockName
= None
;
774 O
->OS
<< Indent
<< "<";
775 if ((BlockName
= GetBlockName(BlockID
, BlockInfo
, CurStreamType
)))
778 O
->OS
<< "UnknownBlock" << BlockID
;
780 if (!O
->Symbolic
&& BlockName
)
781 O
->OS
<< " BlockID=" << BlockID
;
783 O
->OS
<< " NumWords=" << NumWords
784 << " BlockCodeSize=" << Stream
.getAbbrevIDWidth() << ">\n";
787 SmallVector
<uint64_t, 64> Record
;
789 // Keep the offset to the metadata index if seen.
790 uint64_t MetadataIndexOffset
= 0;
792 // Read all the records for this block.
794 if (Stream
.AtEndOfStream())
795 return reportError("Premature end of bitstream");
797 uint64_t RecordStartBit
= Stream
.GetCurrentBitNo();
799 Expected
<BitstreamEntry
> MaybeEntry
=
800 Stream
.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs
);
802 return MaybeEntry
.takeError();
803 BitstreamEntry Entry
= MaybeEntry
.get();
805 switch (Entry
.Kind
) {
806 case BitstreamEntry::Error
:
807 return reportError("malformed bitcode file");
808 case BitstreamEntry::EndBlock
: {
809 uint64_t BlockBitEnd
= Stream
.GetCurrentBitNo();
810 BlockStats
.NumBits
+= BlockBitEnd
- BlockBitStart
;
812 O
->OS
<< Indent
<< "</";
814 O
->OS
<< *BlockName
<< ">\n";
816 O
->OS
<< "UnknownBlock" << BlockID
<< ">\n";
818 return Error::success();
821 case BitstreamEntry::SubBlock
: {
822 uint64_t SubBlockBitStart
= Stream
.GetCurrentBitNo();
823 if (Error E
= parseBlock(Entry
.ID
, IndentLevel
+ 1, O
, CheckHash
))
825 ++BlockStats
.NumSubBlocks
;
826 uint64_t SubBlockBitEnd
= Stream
.GetCurrentBitNo();
828 // Don't include subblock sizes in the size of this block.
829 BlockBitStart
+= SubBlockBitEnd
- SubBlockBitStart
;
832 case BitstreamEntry::Record
:
833 // The interesting case.
837 if (Entry
.ID
== bitc::DEFINE_ABBREV
) {
838 if (Error Err
= Stream
.ReadAbbrevRecord())
840 ++BlockStats
.NumAbbrevs
;
846 ++BlockStats
.NumRecords
;
849 uint64_t CurrentRecordPos
= Stream
.GetCurrentBitNo();
850 Expected
<unsigned> MaybeCode
= Stream
.readRecord(Entry
.ID
, Record
, &Blob
);
852 return MaybeCode
.takeError();
853 unsigned Code
= MaybeCode
.get();
855 // Increment the # occurrences of this code.
856 if (BlockStats
.CodeFreq
.size() <= Code
)
857 BlockStats
.CodeFreq
.resize(Code
+ 1);
858 BlockStats
.CodeFreq
[Code
].NumInstances
++;
859 BlockStats
.CodeFreq
[Code
].TotalBits
+=
860 Stream
.GetCurrentBitNo() - RecordStartBit
;
861 if (Entry
.ID
!= bitc::UNABBREV_RECORD
) {
862 BlockStats
.CodeFreq
[Code
].NumAbbrev
++;
863 ++BlockStats
.NumAbbreviatedRecords
;
867 O
->OS
<< Indent
<< " <";
868 Optional
<const char *> CodeName
=
869 GetCodeName(Code
, BlockID
, BlockInfo
, CurStreamType
);
873 O
->OS
<< "UnknownCode" << Code
;
874 if (!O
->Symbolic
&& CodeName
)
875 O
->OS
<< " codeid=" << Code
;
876 const BitCodeAbbrev
*Abbv
= nullptr;
877 if (Entry
.ID
!= bitc::UNABBREV_RECORD
) {
878 Abbv
= Stream
.getAbbrev(Entry
.ID
);
879 O
->OS
<< " abbrevid=" << Entry
.ID
;
882 for (unsigned i
= 0, e
= Record
.size(); i
!= e
; ++i
)
883 O
->OS
<< " op" << i
<< "=" << (int64_t)Record
[i
];
885 // If we found a metadata index, let's verify that we had an offset
886 // before and validate its forward reference offset was correct!
887 if (BlockID
== bitc::METADATA_BLOCK_ID
) {
888 if (Code
== bitc::METADATA_INDEX_OFFSET
) {
889 if (Record
.size() != 2)
890 O
->OS
<< "(Invalid record)";
892 auto Offset
= Record
[0] + (Record
[1] << 32);
893 MetadataIndexOffset
= Stream
.GetCurrentBitNo() + Offset
;
896 if (Code
== bitc::METADATA_INDEX
) {
897 O
->OS
<< " (offset ";
898 if (MetadataIndexOffset
== RecordStartBit
)
901 O
->OS
<< "mismatch: " << MetadataIndexOffset
<< " vs "
902 << RecordStartBit
<< ")";
906 // If we found a module hash, let's verify that it matches!
907 if (BlockID
== bitc::MODULE_BLOCK_ID
&& Code
== bitc::MODULE_CODE_HASH
&&
908 CheckHash
.hasValue()) {
909 if (Record
.size() != 5)
910 O
->OS
<< " (invalid)";
912 // Recompute the hash and compare it to the one in the bitcode
915 Hasher
.update(*CheckHash
);
917 int BlockSize
= (CurrentRecordPos
/ 8) - BlockEntryPos
;
918 auto Ptr
= Stream
.getPointerToByte(BlockEntryPos
, BlockSize
);
919 Hasher
.update(ArrayRef
<uint8_t>(Ptr
, BlockSize
));
920 Hash
= Hasher
.result();
922 std::array
<char, 20> RecordedHash
;
924 for (auto &Val
: Record
) {
925 assert(!(Val
>> 32) && "Unexpected high bits set");
926 support::endian::write32be(&RecordedHash
[Pos
], Val
);
929 if (Hash
== StringRef(RecordedHash
.data(), RecordedHash
.size()))
932 O
->OS
<< " (!mismatch!)";
939 for (unsigned i
= 1, e
= Abbv
->getNumOperandInfos(); i
!= e
; ++i
) {
940 const BitCodeAbbrevOp
&Op
= Abbv
->getOperandInfo(i
);
941 if (!Op
.isEncoding() || Op
.getEncoding() != BitCodeAbbrevOp::Array
)
943 assert(i
+ 2 == e
&& "Array op not second to last");
945 bool ArrayIsPrintable
= true;
946 for (unsigned j
= i
- 1, je
= Record
.size(); j
!= je
; ++j
) {
947 if (!isPrint(static_cast<unsigned char>(Record
[j
]))) {
948 ArrayIsPrintable
= false;
951 Str
+= (char)Record
[j
];
953 if (ArrayIsPrintable
)
954 O
->OS
<< " record string = '" << Str
<< "'";
960 if (canDecodeBlob(Code
, BlockID
)) {
961 if (Error E
= decodeMetadataStringsBlob(Indent
, Record
, Blob
, O
->OS
))
964 O
->OS
<< " blob data = ";
965 if (O
->ShowBinaryBlobs
) {
967 O
->OS
.write_escaped(Blob
, /*hex=*/true) << "'";
969 bool BlobIsPrintable
= true;
970 for (unsigned i
= 0, e
= Blob
.size(); i
!= e
; ++i
)
971 if (!isPrint(static_cast<unsigned char>(Blob
[i
]))) {
972 BlobIsPrintable
= false;
977 O
->OS
<< "'" << Blob
<< "'";
979 O
->OS
<< "unprintable, " << Blob
.size() << " bytes.";
987 // Make sure that we can skip the current record.
988 if (Error Err
= Stream
.JumpToBit(CurrentRecordPos
))
990 if (Expected
<unsigned> Skipped
= Stream
.skipRecord(Entry
.ID
))
993 return Skipped
.takeError();