1 //===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Bitcode/BitcodeAnalyzer.h"
10 #include "llvm/Bitcode/BitcodeReader.h"
11 #include "llvm/Bitcode/LLVMBitCodes.h"
12 #include "llvm/Bitstream/BitCodes.h"
13 #include "llvm/Bitstream/BitstreamReader.h"
14 #include "llvm/Support/Format.h"
15 #include "llvm/Support/SHA1.h"
20 static Error
reportError(StringRef Message
) {
21 return createStringError(std::errc::illegal_byte_sequence
, Message
.data());
24 /// Return a symbolic block name if known, otherwise return null.
25 static std::optional
<const char *>
26 GetBlockName(unsigned BlockID
, const BitstreamBlockInfo
&BlockInfo
,
27 CurStreamTypeType CurStreamType
) {
28 // Standard blocks for all bitcode files.
29 if (BlockID
< bitc::FIRST_APPLICATION_BLOCKID
) {
30 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
)
31 return "BLOCKINFO_BLOCK";
35 // Check to see if we have a blockinfo record for this block, with a name.
36 if (const BitstreamBlockInfo::BlockInfo
*Info
=
37 BlockInfo
.getBlockInfo(BlockID
)) {
38 if (!Info
->Name
.empty())
39 return Info
->Name
.c_str();
42 if (CurStreamType
!= LLVMIRBitstream
)
48 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID
:
49 return "OPERAND_BUNDLE_TAGS_BLOCK";
50 case bitc::MODULE_BLOCK_ID
:
51 return "MODULE_BLOCK";
52 case bitc::PARAMATTR_BLOCK_ID
:
53 return "PARAMATTR_BLOCK";
54 case bitc::PARAMATTR_GROUP_BLOCK_ID
:
55 return "PARAMATTR_GROUP_BLOCK_ID";
56 case bitc::TYPE_BLOCK_ID_NEW
:
57 return "TYPE_BLOCK_ID";
58 case bitc::CONSTANTS_BLOCK_ID
:
59 return "CONSTANTS_BLOCK";
60 case bitc::FUNCTION_BLOCK_ID
:
61 return "FUNCTION_BLOCK";
62 case bitc::IDENTIFICATION_BLOCK_ID
:
63 return "IDENTIFICATION_BLOCK_ID";
64 case bitc::VALUE_SYMTAB_BLOCK_ID
:
65 return "VALUE_SYMTAB";
66 case bitc::METADATA_BLOCK_ID
:
67 return "METADATA_BLOCK";
68 case bitc::METADATA_KIND_BLOCK_ID
:
69 return "METADATA_KIND_BLOCK";
70 case bitc::METADATA_ATTACHMENT_ID
:
71 return "METADATA_ATTACHMENT_BLOCK";
72 case bitc::USELIST_BLOCK_ID
:
73 return "USELIST_BLOCK_ID";
74 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID
:
75 return "GLOBALVAL_SUMMARY_BLOCK";
76 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID
:
77 return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
78 case bitc::MODULE_STRTAB_BLOCK_ID
:
79 return "MODULE_STRTAB_BLOCK";
80 case bitc::STRTAB_BLOCK_ID
:
81 return "STRTAB_BLOCK";
82 case bitc::SYMTAB_BLOCK_ID
:
83 return "SYMTAB_BLOCK";
87 /// Return a symbolic code name if known, otherwise return null.
88 static std::optional
<const char *>
89 GetCodeName(unsigned CodeID
, unsigned BlockID
,
90 const BitstreamBlockInfo
&BlockInfo
,
91 CurStreamTypeType CurStreamType
) {
92 // Standard blocks for all bitcode files.
93 if (BlockID
< bitc::FIRST_APPLICATION_BLOCKID
) {
94 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
) {
98 case bitc::BLOCKINFO_CODE_SETBID
:
100 case bitc::BLOCKINFO_CODE_BLOCKNAME
:
102 case bitc::BLOCKINFO_CODE_SETRECORDNAME
:
103 return "SETRECORDNAME";
109 // Check to see if we have a blockinfo record for this record, with a name.
110 if (const BitstreamBlockInfo::BlockInfo
*Info
=
111 BlockInfo
.getBlockInfo(BlockID
)) {
112 for (const std::pair
<unsigned, std::string
> &RN
: Info
->RecordNames
)
113 if (RN
.first
== CodeID
)
114 return RN
.second
.c_str();
117 if (CurStreamType
!= LLVMIRBitstream
)
120 #define STRINGIFY_CODE(PREFIX, CODE) \
121 case bitc::PREFIX##_##CODE: \
126 case bitc::MODULE_BLOCK_ID
:
130 STRINGIFY_CODE(MODULE_CODE
, VERSION
)
131 STRINGIFY_CODE(MODULE_CODE
, TRIPLE
)
132 STRINGIFY_CODE(MODULE_CODE
, DATALAYOUT
)
133 STRINGIFY_CODE(MODULE_CODE
, ASM
)
134 STRINGIFY_CODE(MODULE_CODE
, SECTIONNAME
)
135 STRINGIFY_CODE(MODULE_CODE
, DEPLIB
) // Deprecated, present in old bitcode
136 STRINGIFY_CODE(MODULE_CODE
, GLOBALVAR
)
137 STRINGIFY_CODE(MODULE_CODE
, FUNCTION
)
138 STRINGIFY_CODE(MODULE_CODE
, ALIAS
)
139 STRINGIFY_CODE(MODULE_CODE
, GCNAME
)
140 STRINGIFY_CODE(MODULE_CODE
, COMDAT
)
141 STRINGIFY_CODE(MODULE_CODE
, VSTOFFSET
)
142 STRINGIFY_CODE(MODULE_CODE
, METADATA_VALUES_UNUSED
)
143 STRINGIFY_CODE(MODULE_CODE
, SOURCE_FILENAME
)
144 STRINGIFY_CODE(MODULE_CODE
, HASH
)
146 case bitc::IDENTIFICATION_BLOCK_ID
:
150 STRINGIFY_CODE(IDENTIFICATION_CODE
, STRING
)
151 STRINGIFY_CODE(IDENTIFICATION_CODE
, EPOCH
)
153 case bitc::PARAMATTR_BLOCK_ID
:
157 // FIXME: Should these be different?
158 case bitc::PARAMATTR_CODE_ENTRY_OLD
:
160 case bitc::PARAMATTR_CODE_ENTRY
:
163 case bitc::PARAMATTR_GROUP_BLOCK_ID
:
167 case bitc::PARAMATTR_GRP_CODE_ENTRY
:
170 case bitc::TYPE_BLOCK_ID_NEW
:
174 STRINGIFY_CODE(TYPE_CODE
, NUMENTRY
)
175 STRINGIFY_CODE(TYPE_CODE
, VOID
)
176 STRINGIFY_CODE(TYPE_CODE
, FLOAT
)
177 STRINGIFY_CODE(TYPE_CODE
, DOUBLE
)
178 STRINGIFY_CODE(TYPE_CODE
, LABEL
)
179 STRINGIFY_CODE(TYPE_CODE
, OPAQUE
)
180 STRINGIFY_CODE(TYPE_CODE
, INTEGER
)
181 STRINGIFY_CODE(TYPE_CODE
, POINTER
)
182 STRINGIFY_CODE(TYPE_CODE
, HALF
)
183 STRINGIFY_CODE(TYPE_CODE
, ARRAY
)
184 STRINGIFY_CODE(TYPE_CODE
, VECTOR
)
185 STRINGIFY_CODE(TYPE_CODE
, X86_FP80
)
186 STRINGIFY_CODE(TYPE_CODE
, FP128
)
187 STRINGIFY_CODE(TYPE_CODE
, PPC_FP128
)
188 STRINGIFY_CODE(TYPE_CODE
, METADATA
)
189 STRINGIFY_CODE(TYPE_CODE
, X86_MMX
)
190 STRINGIFY_CODE(TYPE_CODE
, STRUCT_ANON
)
191 STRINGIFY_CODE(TYPE_CODE
, STRUCT_NAME
)
192 STRINGIFY_CODE(TYPE_CODE
, STRUCT_NAMED
)
193 STRINGIFY_CODE(TYPE_CODE
, FUNCTION
)
194 STRINGIFY_CODE(TYPE_CODE
, TOKEN
)
195 STRINGIFY_CODE(TYPE_CODE
, BFLOAT
)
198 case bitc::CONSTANTS_BLOCK_ID
:
202 STRINGIFY_CODE(CST_CODE
, SETTYPE
)
203 STRINGIFY_CODE(CST_CODE
, NULL
)
204 STRINGIFY_CODE(CST_CODE
, UNDEF
)
205 STRINGIFY_CODE(CST_CODE
, INTEGER
)
206 STRINGIFY_CODE(CST_CODE
, WIDE_INTEGER
)
207 STRINGIFY_CODE(CST_CODE
, FLOAT
)
208 STRINGIFY_CODE(CST_CODE
, AGGREGATE
)
209 STRINGIFY_CODE(CST_CODE
, STRING
)
210 STRINGIFY_CODE(CST_CODE
, CSTRING
)
211 STRINGIFY_CODE(CST_CODE
, CE_BINOP
)
212 STRINGIFY_CODE(CST_CODE
, CE_CAST
)
213 STRINGIFY_CODE(CST_CODE
, CE_GEP
)
214 STRINGIFY_CODE(CST_CODE
, CE_INBOUNDS_GEP
)
215 STRINGIFY_CODE(CST_CODE
, CE_SELECT
)
216 STRINGIFY_CODE(CST_CODE
, CE_EXTRACTELT
)
217 STRINGIFY_CODE(CST_CODE
, CE_INSERTELT
)
218 STRINGIFY_CODE(CST_CODE
, CE_SHUFFLEVEC
)
219 STRINGIFY_CODE(CST_CODE
, CE_CMP
)
220 STRINGIFY_CODE(CST_CODE
, INLINEASM
)
221 STRINGIFY_CODE(CST_CODE
, CE_SHUFVEC_EX
)
222 STRINGIFY_CODE(CST_CODE
, CE_UNOP
)
223 STRINGIFY_CODE(CST_CODE
, DSO_LOCAL_EQUIVALENT
)
224 STRINGIFY_CODE(CST_CODE
, NO_CFI_VALUE
)
225 case bitc::CST_CODE_BLOCKADDRESS
:
226 return "CST_CODE_BLOCKADDRESS";
227 STRINGIFY_CODE(CST_CODE
, DATA
)
229 case bitc::FUNCTION_BLOCK_ID
:
233 STRINGIFY_CODE(FUNC_CODE
, DECLAREBLOCKS
)
234 STRINGIFY_CODE(FUNC_CODE
, INST_BINOP
)
235 STRINGIFY_CODE(FUNC_CODE
, INST_CAST
)
236 STRINGIFY_CODE(FUNC_CODE
, INST_GEP_OLD
)
237 STRINGIFY_CODE(FUNC_CODE
, INST_INBOUNDS_GEP_OLD
)
238 STRINGIFY_CODE(FUNC_CODE
, INST_SELECT
)
239 STRINGIFY_CODE(FUNC_CODE
, INST_EXTRACTELT
)
240 STRINGIFY_CODE(FUNC_CODE
, INST_INSERTELT
)
241 STRINGIFY_CODE(FUNC_CODE
, INST_SHUFFLEVEC
)
242 STRINGIFY_CODE(FUNC_CODE
, INST_CMP
)
243 STRINGIFY_CODE(FUNC_CODE
, INST_RET
)
244 STRINGIFY_CODE(FUNC_CODE
, INST_BR
)
245 STRINGIFY_CODE(FUNC_CODE
, INST_SWITCH
)
246 STRINGIFY_CODE(FUNC_CODE
, INST_INVOKE
)
247 STRINGIFY_CODE(FUNC_CODE
, INST_UNOP
)
248 STRINGIFY_CODE(FUNC_CODE
, INST_UNREACHABLE
)
249 STRINGIFY_CODE(FUNC_CODE
, INST_CLEANUPRET
)
250 STRINGIFY_CODE(FUNC_CODE
, INST_CATCHRET
)
251 STRINGIFY_CODE(FUNC_CODE
, INST_CATCHPAD
)
252 STRINGIFY_CODE(FUNC_CODE
, INST_PHI
)
253 STRINGIFY_CODE(FUNC_CODE
, INST_ALLOCA
)
254 STRINGIFY_CODE(FUNC_CODE
, INST_LOAD
)
255 STRINGIFY_CODE(FUNC_CODE
, INST_VAARG
)
256 STRINGIFY_CODE(FUNC_CODE
, INST_STORE
)
257 STRINGIFY_CODE(FUNC_CODE
, INST_EXTRACTVAL
)
258 STRINGIFY_CODE(FUNC_CODE
, INST_INSERTVAL
)
259 STRINGIFY_CODE(FUNC_CODE
, INST_CMP2
)
260 STRINGIFY_CODE(FUNC_CODE
, INST_VSELECT
)
261 STRINGIFY_CODE(FUNC_CODE
, DEBUG_LOC_AGAIN
)
262 STRINGIFY_CODE(FUNC_CODE
, INST_CALL
)
263 STRINGIFY_CODE(FUNC_CODE
, DEBUG_LOC
)
264 STRINGIFY_CODE(FUNC_CODE
, INST_GEP
)
265 STRINGIFY_CODE(FUNC_CODE
, OPERAND_BUNDLE
)
266 STRINGIFY_CODE(FUNC_CODE
, INST_FENCE
)
267 STRINGIFY_CODE(FUNC_CODE
, INST_ATOMICRMW
)
268 STRINGIFY_CODE(FUNC_CODE
, INST_LOADATOMIC
)
269 STRINGIFY_CODE(FUNC_CODE
, INST_STOREATOMIC
)
270 STRINGIFY_CODE(FUNC_CODE
, INST_CMPXCHG
)
271 STRINGIFY_CODE(FUNC_CODE
, INST_CALLBR
)
272 STRINGIFY_CODE(FUNC_CODE
, BLOCKADDR_USERS
)
274 case bitc::VALUE_SYMTAB_BLOCK_ID
:
278 STRINGIFY_CODE(VST_CODE
, ENTRY
)
279 STRINGIFY_CODE(VST_CODE
, BBENTRY
)
280 STRINGIFY_CODE(VST_CODE
, FNENTRY
)
281 STRINGIFY_CODE(VST_CODE
, COMBINED_ENTRY
)
283 case bitc::MODULE_STRTAB_BLOCK_ID
:
287 STRINGIFY_CODE(MST_CODE
, ENTRY
)
288 STRINGIFY_CODE(MST_CODE
, HASH
)
290 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID
:
291 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID
:
295 STRINGIFY_CODE(FS
, PERMODULE
)
296 STRINGIFY_CODE(FS
, PERMODULE_PROFILE
)
297 STRINGIFY_CODE(FS
, PERMODULE_RELBF
)
298 STRINGIFY_CODE(FS
, PERMODULE_GLOBALVAR_INIT_REFS
)
299 STRINGIFY_CODE(FS
, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS
)
300 STRINGIFY_CODE(FS
, COMBINED
)
301 STRINGIFY_CODE(FS
, COMBINED_PROFILE
)
302 STRINGIFY_CODE(FS
, COMBINED_GLOBALVAR_INIT_REFS
)
303 STRINGIFY_CODE(FS
, ALIAS
)
304 STRINGIFY_CODE(FS
, COMBINED_ALIAS
)
305 STRINGIFY_CODE(FS
, COMBINED_ORIGINAL_NAME
)
306 STRINGIFY_CODE(FS
, VERSION
)
307 STRINGIFY_CODE(FS
, FLAGS
)
308 STRINGIFY_CODE(FS
, TYPE_TESTS
)
309 STRINGIFY_CODE(FS
, TYPE_TEST_ASSUME_VCALLS
)
310 STRINGIFY_CODE(FS
, TYPE_CHECKED_LOAD_VCALLS
)
311 STRINGIFY_CODE(FS
, TYPE_TEST_ASSUME_CONST_VCALL
)
312 STRINGIFY_CODE(FS
, TYPE_CHECKED_LOAD_CONST_VCALL
)
313 STRINGIFY_CODE(FS
, VALUE_GUID
)
314 STRINGIFY_CODE(FS
, CFI_FUNCTION_DEFS
)
315 STRINGIFY_CODE(FS
, CFI_FUNCTION_DECLS
)
316 STRINGIFY_CODE(FS
, TYPE_ID
)
317 STRINGIFY_CODE(FS
, TYPE_ID_METADATA
)
318 STRINGIFY_CODE(FS
, BLOCK_COUNT
)
319 STRINGIFY_CODE(FS
, PARAM_ACCESS
)
320 STRINGIFY_CODE(FS
, PERMODULE_CALLSITE_INFO
)
321 STRINGIFY_CODE(FS
, PERMODULE_ALLOC_INFO
)
322 STRINGIFY_CODE(FS
, COMBINED_CALLSITE_INFO
)
323 STRINGIFY_CODE(FS
, COMBINED_ALLOC_INFO
)
324 STRINGIFY_CODE(FS
, STACK_IDS
)
326 case bitc::METADATA_ATTACHMENT_ID
:
330 STRINGIFY_CODE(METADATA
, ATTACHMENT
)
332 case bitc::METADATA_BLOCK_ID
:
336 STRINGIFY_CODE(METADATA
, STRING_OLD
)
337 STRINGIFY_CODE(METADATA
, VALUE
)
338 STRINGIFY_CODE(METADATA
, NODE
)
339 STRINGIFY_CODE(METADATA
, NAME
)
340 STRINGIFY_CODE(METADATA
, DISTINCT_NODE
)
341 STRINGIFY_CODE(METADATA
, KIND
) // Older bitcode has it in a MODULE_BLOCK
342 STRINGIFY_CODE(METADATA
, LOCATION
)
343 STRINGIFY_CODE(METADATA
, OLD_NODE
)
344 STRINGIFY_CODE(METADATA
, OLD_FN_NODE
)
345 STRINGIFY_CODE(METADATA
, NAMED_NODE
)
346 STRINGIFY_CODE(METADATA
, GENERIC_DEBUG
)
347 STRINGIFY_CODE(METADATA
, SUBRANGE
)
348 STRINGIFY_CODE(METADATA
, ENUMERATOR
)
349 STRINGIFY_CODE(METADATA
, BASIC_TYPE
)
350 STRINGIFY_CODE(METADATA
, FILE)
351 STRINGIFY_CODE(METADATA
, DERIVED_TYPE
)
352 STRINGIFY_CODE(METADATA
, COMPOSITE_TYPE
)
353 STRINGIFY_CODE(METADATA
, SUBROUTINE_TYPE
)
354 STRINGIFY_CODE(METADATA
, COMPILE_UNIT
)
355 STRINGIFY_CODE(METADATA
, SUBPROGRAM
)
356 STRINGIFY_CODE(METADATA
, LEXICAL_BLOCK
)
357 STRINGIFY_CODE(METADATA
, LEXICAL_BLOCK_FILE
)
358 STRINGIFY_CODE(METADATA
, NAMESPACE
)
359 STRINGIFY_CODE(METADATA
, TEMPLATE_TYPE
)
360 STRINGIFY_CODE(METADATA
, TEMPLATE_VALUE
)
361 STRINGIFY_CODE(METADATA
, GLOBAL_VAR
)
362 STRINGIFY_CODE(METADATA
, LOCAL_VAR
)
363 STRINGIFY_CODE(METADATA
, EXPRESSION
)
364 STRINGIFY_CODE(METADATA
, OBJC_PROPERTY
)
365 STRINGIFY_CODE(METADATA
, IMPORTED_ENTITY
)
366 STRINGIFY_CODE(METADATA
, MODULE
)
367 STRINGIFY_CODE(METADATA
, MACRO
)
368 STRINGIFY_CODE(METADATA
, MACRO_FILE
)
369 STRINGIFY_CODE(METADATA
, STRINGS
)
370 STRINGIFY_CODE(METADATA
, GLOBAL_DECL_ATTACHMENT
)
371 STRINGIFY_CODE(METADATA
, GLOBAL_VAR_EXPR
)
372 STRINGIFY_CODE(METADATA
, INDEX_OFFSET
)
373 STRINGIFY_CODE(METADATA
, INDEX
)
374 STRINGIFY_CODE(METADATA
, ARG_LIST
)
376 case bitc::METADATA_KIND_BLOCK_ID
:
380 STRINGIFY_CODE(METADATA
, KIND
)
382 case bitc::USELIST_BLOCK_ID
:
386 case bitc::USELIST_CODE_DEFAULT
:
387 return "USELIST_CODE_DEFAULT";
388 case bitc::USELIST_CODE_BB
:
389 return "USELIST_CODE_BB";
392 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID
:
396 case bitc::OPERAND_BUNDLE_TAG
:
397 return "OPERAND_BUNDLE_TAG";
399 case bitc::STRTAB_BLOCK_ID
:
403 case bitc::STRTAB_BLOB
:
406 case bitc::SYMTAB_BLOCK_ID
:
410 case bitc::SYMTAB_BLOB
:
414 #undef STRINGIFY_CODE
417 static void printSize(raw_ostream
&OS
, double Bits
) {
418 OS
<< format("%.2f/%.2fB/%luW", Bits
, Bits
/ 8, (unsigned long)(Bits
/ 32));
420 static void printSize(raw_ostream
&OS
, uint64_t Bits
) {
421 OS
<< format("%lub/%.2fB/%luW", (unsigned long)Bits
, (double)Bits
/ 8,
422 (unsigned long)(Bits
/ 32));
425 static Expected
<CurStreamTypeType
> ReadSignature(BitstreamCursor
&Stream
) {
426 auto tryRead
= [&Stream
](char &Dest
, size_t size
) -> Error
{
427 if (Expected
<SimpleBitstreamCursor::word_t
> MaybeWord
= Stream
.Read(size
))
428 Dest
= MaybeWord
.get();
430 return MaybeWord
.takeError();
431 return Error::success();
435 if (Error Err
= tryRead(Signature
[0], 8))
436 return std::move(Err
);
437 if (Error Err
= tryRead(Signature
[1], 8))
438 return std::move(Err
);
440 // Autodetect the file contents, if it is one we know.
441 if (Signature
[0] == 'C' && Signature
[1] == 'P') {
442 if (Error Err
= tryRead(Signature
[2], 8))
443 return std::move(Err
);
444 if (Error Err
= tryRead(Signature
[3], 8))
445 return std::move(Err
);
446 if (Signature
[2] == 'C' && Signature
[3] == 'H')
447 return ClangSerializedASTBitstream
;
448 } else if (Signature
[0] == 'D' && Signature
[1] == 'I') {
449 if (Error Err
= tryRead(Signature
[2], 8))
450 return std::move(Err
);
451 if (Error Err
= tryRead(Signature
[3], 8))
452 return std::move(Err
);
453 if (Signature
[2] == 'A' && Signature
[3] == 'G')
454 return ClangSerializedDiagnosticsBitstream
;
455 } else if (Signature
[0] == 'R' && Signature
[1] == 'M') {
456 if (Error Err
= tryRead(Signature
[2], 8))
457 return std::move(Err
);
458 if (Error Err
= tryRead(Signature
[3], 8))
459 return std::move(Err
);
460 if (Signature
[2] == 'R' && Signature
[3] == 'K')
461 return LLVMBitstreamRemarks
;
463 if (Error Err
= tryRead(Signature
[2], 4))
464 return std::move(Err
);
465 if (Error Err
= tryRead(Signature
[3], 4))
466 return std::move(Err
);
467 if (Error Err
= tryRead(Signature
[4], 4))
468 return std::move(Err
);
469 if (Error Err
= tryRead(Signature
[5], 4))
470 return std::move(Err
);
471 if (Signature
[0] == 'B' && Signature
[1] == 'C' && Signature
[2] == 0x0 &&
472 Signature
[3] == 0xC && Signature
[4] == 0xE && Signature
[5] == 0xD)
473 return LLVMIRBitstream
;
475 return UnknownBitstream
;
478 static Expected
<CurStreamTypeType
> analyzeHeader(std::optional
<BCDumpOptions
> O
,
479 BitstreamCursor
&Stream
) {
480 ArrayRef
<uint8_t> Bytes
= Stream
.getBitcodeBytes();
481 const unsigned char *BufPtr
= (const unsigned char *)Bytes
.data();
482 const unsigned char *EndBufPtr
= BufPtr
+ Bytes
.size();
484 // If we have a wrapper header, parse it and ignore the non-bc file
485 // contents. The magic number is 0x0B17C0DE stored in little endian.
486 if (isBitcodeWrapper(BufPtr
, EndBufPtr
)) {
487 if (Bytes
.size() < BWH_HeaderSize
)
488 return reportError("Invalid bitcode wrapper header");
491 unsigned Magic
= support::endian::read32le(&BufPtr
[BWH_MagicField
]);
492 unsigned Version
= support::endian::read32le(&BufPtr
[BWH_VersionField
]);
493 unsigned Offset
= support::endian::read32le(&BufPtr
[BWH_OffsetField
]);
494 unsigned Size
= support::endian::read32le(&BufPtr
[BWH_SizeField
]);
495 unsigned CPUType
= support::endian::read32le(&BufPtr
[BWH_CPUTypeField
]);
497 O
->OS
<< "<BITCODE_WRAPPER_HEADER"
498 << " Magic=" << format_hex(Magic
, 10)
499 << " Version=" << format_hex(Version
, 10)
500 << " Offset=" << format_hex(Offset
, 10)
501 << " Size=" << format_hex(Size
, 10)
502 << " CPUType=" << format_hex(CPUType
, 10) << "/>\n";
505 if (SkipBitcodeWrapperHeader(BufPtr
, EndBufPtr
, true))
506 return reportError("Invalid bitcode wrapper header");
509 // Use the cursor modified by skipping the wrapper header.
510 Stream
= BitstreamCursor(ArrayRef
<uint8_t>(BufPtr
, EndBufPtr
));
512 return ReadSignature(Stream
);
515 static bool canDecodeBlob(unsigned Code
, unsigned BlockID
) {
516 return BlockID
== bitc::METADATA_BLOCK_ID
&& Code
== bitc::METADATA_STRINGS
;
519 Error
BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent
,
520 ArrayRef
<uint64_t> Record
,
524 return reportError("Cannot decode empty blob.");
526 if (Record
.size() != 2)
528 "Decoding metadata strings blob needs two record entries.");
530 unsigned NumStrings
= Record
[0];
531 unsigned StringsOffset
= Record
[1];
532 OS
<< " num-strings = " << NumStrings
<< " {\n";
534 StringRef Lengths
= Blob
.slice(0, StringsOffset
);
535 SimpleBitstreamCursor
R(Lengths
);
536 StringRef Strings
= Blob
.drop_front(StringsOffset
);
538 if (R
.AtEndOfStream())
539 return reportError("bad length");
542 if (Error E
= R
.ReadVBR(6).moveInto(Size
))
544 if (Strings
.size() < Size
)
545 return reportError("truncated chars");
547 OS
<< Indent
<< " '";
548 OS
.write_escaped(Strings
.slice(0, Size
), /*hex=*/true);
550 Strings
= Strings
.drop_front(Size
);
551 } while (--NumStrings
);
553 OS
<< Indent
<< " }";
554 return Error::success();
557 BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer
,
558 std::optional
<StringRef
> BlockInfoBuffer
)
561 BlockInfoStream
.emplace(*BlockInfoBuffer
);
564 Error
BitcodeAnalyzer::analyze(std::optional
<BCDumpOptions
> O
,
565 std::optional
<StringRef
> CheckHash
) {
566 if (Error E
= analyzeHeader(O
, Stream
).moveInto(CurStreamType
))
569 Stream
.setBlockInfo(&BlockInfo
);
571 // Read block info from BlockInfoStream, if specified.
572 // The block info must be a top-level block.
573 if (BlockInfoStream
) {
574 BitstreamCursor
BlockInfoCursor(*BlockInfoStream
);
575 if (Error E
= analyzeHeader(O
, BlockInfoCursor
).takeError())
578 while (!BlockInfoCursor
.AtEndOfStream()) {
579 Expected
<unsigned> MaybeCode
= BlockInfoCursor
.ReadCode();
581 return MaybeCode
.takeError();
582 if (MaybeCode
.get() != bitc::ENTER_SUBBLOCK
)
583 return reportError("Invalid record at top-level in block info file");
585 Expected
<unsigned> MaybeBlockID
= BlockInfoCursor
.ReadSubBlockID();
587 return MaybeBlockID
.takeError();
588 if (MaybeBlockID
.get() == bitc::BLOCKINFO_BLOCK_ID
) {
589 std::optional
<BitstreamBlockInfo
> NewBlockInfo
;
591 BlockInfoCursor
.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true)
592 .moveInto(NewBlockInfo
))
595 return reportError("Malformed BlockInfoBlock in block info file");
596 BlockInfo
= std::move(*NewBlockInfo
);
600 if (Error Err
= BlockInfoCursor
.SkipBlock())
605 // Parse the top-level structure. We only allow blocks at the top-level.
606 while (!Stream
.AtEndOfStream()) {
607 Expected
<unsigned> MaybeCode
= Stream
.ReadCode();
609 return MaybeCode
.takeError();
610 if (MaybeCode
.get() != bitc::ENTER_SUBBLOCK
)
611 return reportError("Invalid record at top-level");
613 Expected
<unsigned> MaybeBlockID
= Stream
.ReadSubBlockID();
615 return MaybeBlockID
.takeError();
617 if (Error E
= parseBlock(MaybeBlockID
.get(), 0, O
, CheckHash
))
622 return Error::success();
625 void BitcodeAnalyzer::printStats(BCDumpOptions O
,
626 std::optional
<StringRef
> Filename
) {
627 uint64_t BufferSizeBits
= Stream
.getBitcodeBytes().size() * CHAR_BIT
;
628 // Print a summary of the read file.
631 O
.OS
<< "of " << Filename
->data() << ":\n";
632 O
.OS
<< " Total size: ";
633 printSize(O
.OS
, BufferSizeBits
);
635 O
.OS
<< " Stream type: ";
636 switch (CurStreamType
) {
637 case UnknownBitstream
:
640 case LLVMIRBitstream
:
643 case ClangSerializedASTBitstream
:
644 O
.OS
<< "Clang Serialized AST\n";
646 case ClangSerializedDiagnosticsBitstream
:
647 O
.OS
<< "Clang Serialized Diagnostics\n";
649 case LLVMBitstreamRemarks
:
650 O
.OS
<< "LLVM Remarks\n";
653 O
.OS
<< " # Toplevel Blocks: " << NumTopBlocks
<< "\n";
656 // Emit per-block stats.
657 O
.OS
<< "Per-block Summary:\n";
658 for (const auto &Stat
: BlockIDStats
) {
659 O
.OS
<< " Block ID #" << Stat
.first
;
660 if (std::optional
<const char *> BlockName
=
661 GetBlockName(Stat
.first
, BlockInfo
, CurStreamType
))
662 O
.OS
<< " (" << *BlockName
<< ")";
665 const PerBlockIDStats
&Stats
= Stat
.second
;
666 O
.OS
<< " Num Instances: " << Stats
.NumInstances
<< "\n";
667 O
.OS
<< " Total Size: ";
668 printSize(O
.OS
, Stats
.NumBits
);
670 double pct
= (Stats
.NumBits
* 100.0) / BufferSizeBits
;
671 O
.OS
<< " Percent of file: " << format("%2.4f%%", pct
) << "\n";
672 if (Stats
.NumInstances
> 1) {
673 O
.OS
<< " Average Size: ";
674 printSize(O
.OS
, Stats
.NumBits
/ (double)Stats
.NumInstances
);
676 O
.OS
<< " Tot/Avg SubBlocks: " << Stats
.NumSubBlocks
<< "/"
677 << Stats
.NumSubBlocks
/ (double)Stats
.NumInstances
<< "\n";
678 O
.OS
<< " Tot/Avg Abbrevs: " << Stats
.NumAbbrevs
<< "/"
679 << Stats
.NumAbbrevs
/ (double)Stats
.NumInstances
<< "\n";
680 O
.OS
<< " Tot/Avg Records: " << Stats
.NumRecords
<< "/"
681 << Stats
.NumRecords
/ (double)Stats
.NumInstances
<< "\n";
683 O
.OS
<< " Num SubBlocks: " << Stats
.NumSubBlocks
<< "\n";
684 O
.OS
<< " Num Abbrevs: " << Stats
.NumAbbrevs
<< "\n";
685 O
.OS
<< " Num Records: " << Stats
.NumRecords
<< "\n";
687 if (Stats
.NumRecords
) {
688 double pct
= (Stats
.NumAbbreviatedRecords
* 100.0) / Stats
.NumRecords
;
689 O
.OS
<< " Percent Abbrevs: " << format("%2.4f%%", pct
) << "\n";
693 // Print a histogram of the codes we see.
694 if (O
.Histogram
&& !Stats
.CodeFreq
.empty()) {
695 std::vector
<std::pair
<unsigned, unsigned>> FreqPairs
; // <freq,code>
696 for (unsigned i
= 0, e
= Stats
.CodeFreq
.size(); i
!= e
; ++i
)
697 if (unsigned Freq
= Stats
.CodeFreq
[i
].NumInstances
)
698 FreqPairs
.push_back(std::make_pair(Freq
, i
));
699 llvm::stable_sort(FreqPairs
);
700 std::reverse(FreqPairs
.begin(), FreqPairs
.end());
702 O
.OS
<< "\tRecord Histogram:\n";
703 O
.OS
<< "\t\t Count # Bits b/Rec % Abv Record Kind\n";
704 for (const auto &FreqPair
: FreqPairs
) {
705 const PerRecordStats
&RecStats
= Stats
.CodeFreq
[FreqPair
.second
];
707 O
.OS
<< format("\t\t%7d %9lu", RecStats
.NumInstances
,
708 (unsigned long)RecStats
.TotalBits
);
710 if (RecStats
.NumInstances
> 1)
711 O
.OS
<< format(" %9.1f",
712 (double)RecStats
.TotalBits
/ RecStats
.NumInstances
);
716 if (RecStats
.NumAbbrev
)
717 O
.OS
<< format(" %7.2f", (double)RecStats
.NumAbbrev
/
718 RecStats
.NumInstances
* 100);
723 if (std::optional
<const char *> CodeName
= GetCodeName(
724 FreqPair
.second
, Stat
.first
, BlockInfo
, CurStreamType
))
725 O
.OS
<< *CodeName
<< "\n";
727 O
.OS
<< "UnknownCode" << FreqPair
.second
<< "\n";
734 Error
BitcodeAnalyzer::parseBlock(unsigned BlockID
, unsigned IndentLevel
,
735 std::optional
<BCDumpOptions
> O
,
736 std::optional
<StringRef
> CheckHash
) {
737 std::string
Indent(IndentLevel
* 2, ' ');
738 uint64_t BlockBitStart
= Stream
.GetCurrentBitNo();
740 // Get the statistics for this BlockID.
741 PerBlockIDStats
&BlockStats
= BlockIDStats
[BlockID
];
743 BlockStats
.NumInstances
++;
745 // BLOCKINFO is a special part of the stream.
746 bool DumpRecords
= O
.has_value();
747 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
) {
748 if (O
&& !O
->DumpBlockinfo
)
749 O
->OS
<< Indent
<< "<BLOCKINFO_BLOCK/>\n";
750 std::optional
<BitstreamBlockInfo
> NewBlockInfo
;
751 if (Error E
= Stream
.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true)
752 .moveInto(NewBlockInfo
))
755 return reportError("Malformed BlockInfoBlock");
756 BlockInfo
= std::move(*NewBlockInfo
);
757 if (Error Err
= Stream
.JumpToBit(BlockBitStart
))
759 // It's not really interesting to dump the contents of the blockinfo
760 // block, so only do it if the user explicitly requests it.
761 DumpRecords
= O
&& O
->DumpBlockinfo
;
764 unsigned NumWords
= 0;
765 if (Error Err
= Stream
.EnterSubBlock(BlockID
, &NumWords
))
768 // Keep it for later, when we see a MODULE_HASH record
769 uint64_t BlockEntryPos
= Stream
.getCurrentByteNo();
771 std::optional
<const char *> BlockName
;
773 O
->OS
<< Indent
<< "<";
774 if ((BlockName
= GetBlockName(BlockID
, BlockInfo
, CurStreamType
)))
777 O
->OS
<< "UnknownBlock" << BlockID
;
779 if (!O
->Symbolic
&& BlockName
)
780 O
->OS
<< " BlockID=" << BlockID
;
782 O
->OS
<< " NumWords=" << NumWords
783 << " BlockCodeSize=" << Stream
.getAbbrevIDWidth() << ">\n";
786 SmallVector
<uint64_t, 64> Record
;
788 // Keep the offset to the metadata index if seen.
789 uint64_t MetadataIndexOffset
= 0;
791 // Read all the records for this block.
793 if (Stream
.AtEndOfStream())
794 return reportError("Premature end of bitstream");
796 uint64_t RecordStartBit
= Stream
.GetCurrentBitNo();
798 BitstreamEntry Entry
;
799 if (Error E
= Stream
.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs
)
803 switch (Entry
.Kind
) {
804 case BitstreamEntry::Error
:
805 return reportError("malformed bitcode file");
806 case BitstreamEntry::EndBlock
: {
807 uint64_t BlockBitEnd
= Stream
.GetCurrentBitNo();
808 BlockStats
.NumBits
+= BlockBitEnd
- BlockBitStart
;
810 O
->OS
<< Indent
<< "</";
812 O
->OS
<< *BlockName
<< ">\n";
814 O
->OS
<< "UnknownBlock" << BlockID
<< ">\n";
816 return Error::success();
819 case BitstreamEntry::SubBlock
: {
820 uint64_t SubBlockBitStart
= Stream
.GetCurrentBitNo();
821 if (Error E
= parseBlock(Entry
.ID
, IndentLevel
+ 1, O
, CheckHash
))
823 ++BlockStats
.NumSubBlocks
;
824 uint64_t SubBlockBitEnd
= Stream
.GetCurrentBitNo();
826 // Don't include subblock sizes in the size of this block.
827 BlockBitStart
+= SubBlockBitEnd
- SubBlockBitStart
;
830 case BitstreamEntry::Record
:
831 // The interesting case.
835 if (Entry
.ID
== bitc::DEFINE_ABBREV
) {
836 if (Error Err
= Stream
.ReadAbbrevRecord())
838 ++BlockStats
.NumAbbrevs
;
844 ++BlockStats
.NumRecords
;
847 uint64_t CurrentRecordPos
= Stream
.GetCurrentBitNo();
849 if (Error E
= Stream
.readRecord(Entry
.ID
, Record
, &Blob
).moveInto(Code
))
852 // Increment the # occurrences of this code.
853 if (BlockStats
.CodeFreq
.size() <= Code
)
854 BlockStats
.CodeFreq
.resize(Code
+ 1);
855 BlockStats
.CodeFreq
[Code
].NumInstances
++;
856 BlockStats
.CodeFreq
[Code
].TotalBits
+=
857 Stream
.GetCurrentBitNo() - RecordStartBit
;
858 if (Entry
.ID
!= bitc::UNABBREV_RECORD
) {
859 BlockStats
.CodeFreq
[Code
].NumAbbrev
++;
860 ++BlockStats
.NumAbbreviatedRecords
;
864 O
->OS
<< Indent
<< " <";
865 std::optional
<const char *> CodeName
=
866 GetCodeName(Code
, BlockID
, BlockInfo
, CurStreamType
);
870 O
->OS
<< "UnknownCode" << Code
;
871 if (!O
->Symbolic
&& CodeName
)
872 O
->OS
<< " codeid=" << Code
;
873 const BitCodeAbbrev
*Abbv
= nullptr;
874 if (Entry
.ID
!= bitc::UNABBREV_RECORD
) {
875 Expected
<const BitCodeAbbrev
*> MaybeAbbv
= Stream
.getAbbrev(Entry
.ID
);
877 return MaybeAbbv
.takeError();
878 Abbv
= MaybeAbbv
.get();
879 O
->OS
<< " abbrevid=" << Entry
.ID
;
882 for (unsigned i
= 0, e
= Record
.size(); i
!= e
; ++i
)
883 O
->OS
<< " op" << i
<< "=" << (int64_t)Record
[i
];
885 // If we found a metadata index, let's verify that we had an offset
886 // before and validate its forward reference offset was correct!
887 if (BlockID
== bitc::METADATA_BLOCK_ID
) {
888 if (Code
== bitc::METADATA_INDEX_OFFSET
) {
889 if (Record
.size() != 2)
890 O
->OS
<< "(Invalid record)";
892 auto Offset
= Record
[0] + (Record
[1] << 32);
893 MetadataIndexOffset
= Stream
.GetCurrentBitNo() + Offset
;
896 if (Code
== bitc::METADATA_INDEX
) {
897 O
->OS
<< " (offset ";
898 if (MetadataIndexOffset
== RecordStartBit
)
901 O
->OS
<< "mismatch: " << MetadataIndexOffset
<< " vs "
902 << RecordStartBit
<< ")";
906 // If we found a module hash, let's verify that it matches!
907 if (BlockID
== bitc::MODULE_BLOCK_ID
&& Code
== bitc::MODULE_CODE_HASH
&&
909 if (Record
.size() != 5)
910 O
->OS
<< " (invalid)";
912 // Recompute the hash and compare it to the one in the bitcode
914 std::array
<uint8_t, 20> Hash
;
915 Hasher
.update(*CheckHash
);
917 int BlockSize
= (CurrentRecordPos
/ 8) - BlockEntryPos
;
918 auto Ptr
= Stream
.getPointerToByte(BlockEntryPos
, BlockSize
);
919 Hasher
.update(ArrayRef
<uint8_t>(Ptr
, BlockSize
));
920 Hash
= Hasher
.result();
922 std::array
<uint8_t, 20> RecordedHash
;
924 for (auto &Val
: Record
) {
925 assert(!(Val
>> 32) && "Unexpected high bits set");
926 support::endian::write32be(&RecordedHash
[Pos
], Val
);
929 if (Hash
== RecordedHash
)
932 O
->OS
<< " (!mismatch!)";
939 for (unsigned i
= 1, e
= Abbv
->getNumOperandInfos(); i
!= e
; ++i
) {
940 const BitCodeAbbrevOp
&Op
= Abbv
->getOperandInfo(i
);
941 if (!Op
.isEncoding() || Op
.getEncoding() != BitCodeAbbrevOp::Array
)
943 assert(i
+ 2 == e
&& "Array op not second to last");
945 bool ArrayIsPrintable
= true;
946 for (unsigned j
= i
- 1, je
= Record
.size(); j
!= je
; ++j
) {
947 if (!isPrint(static_cast<unsigned char>(Record
[j
]))) {
948 ArrayIsPrintable
= false;
951 Str
+= (char)Record
[j
];
953 if (ArrayIsPrintable
)
954 O
->OS
<< " record string = '" << Str
<< "'";
960 if (canDecodeBlob(Code
, BlockID
)) {
961 if (Error E
= decodeMetadataStringsBlob(Indent
, Record
, Blob
, O
->OS
))
964 O
->OS
<< " blob data = ";
965 if (O
->ShowBinaryBlobs
) {
967 O
->OS
.write_escaped(Blob
, /*hex=*/true) << "'";
969 bool BlobIsPrintable
= true;
971 if (!isPrint(static_cast<unsigned char>(C
))) {
972 BlobIsPrintable
= false;
977 O
->OS
<< "'" << Blob
<< "'";
979 O
->OS
<< "unprintable, " << Blob
.size() << " bytes.";
987 // Make sure that we can skip the current record.
988 if (Error Err
= Stream
.JumpToBit(CurrentRecordPos
))
990 if (Expected
<unsigned> Skipped
= Stream
.skipRecord(Entry
.ID
))
993 return Skipped
.takeError();