1 //===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Bitcode/BitcodeAnalyzer.h"
10 #include "llvm/Bitcode/BitcodeReader.h"
11 #include "llvm/Bitcode/LLVMBitCodes.h"
12 #include "llvm/Bitstream/BitCodes.h"
13 #include "llvm/Bitstream/BitstreamReader.h"
14 #include "llvm/Support/Format.h"
15 #include "llvm/Support/SHA1.h"
20 static Error
reportError(StringRef Message
) {
21 return createStringError(std::errc::illegal_byte_sequence
, Message
.data());
24 /// Return a symbolic block name if known, otherwise return null.
25 static std::optional
<const char *>
26 GetBlockName(unsigned BlockID
, const BitstreamBlockInfo
&BlockInfo
,
27 CurStreamTypeType CurStreamType
) {
28 // Standard blocks for all bitcode files.
29 if (BlockID
< bitc::FIRST_APPLICATION_BLOCKID
) {
30 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
)
31 return "BLOCKINFO_BLOCK";
35 // Check to see if we have a blockinfo record for this block, with a name.
36 if (const BitstreamBlockInfo::BlockInfo
*Info
=
37 BlockInfo
.getBlockInfo(BlockID
)) {
38 if (!Info
->Name
.empty())
39 return Info
->Name
.c_str();
42 if (CurStreamType
!= LLVMIRBitstream
)
48 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID
:
49 return "OPERAND_BUNDLE_TAGS_BLOCK";
50 case bitc::MODULE_BLOCK_ID
:
51 return "MODULE_BLOCK";
52 case bitc::PARAMATTR_BLOCK_ID
:
53 return "PARAMATTR_BLOCK";
54 case bitc::PARAMATTR_GROUP_BLOCK_ID
:
55 return "PARAMATTR_GROUP_BLOCK_ID";
56 case bitc::TYPE_BLOCK_ID_NEW
:
57 return "TYPE_BLOCK_ID";
58 case bitc::CONSTANTS_BLOCK_ID
:
59 return "CONSTANTS_BLOCK";
60 case bitc::FUNCTION_BLOCK_ID
:
61 return "FUNCTION_BLOCK";
62 case bitc::IDENTIFICATION_BLOCK_ID
:
63 return "IDENTIFICATION_BLOCK_ID";
64 case bitc::VALUE_SYMTAB_BLOCK_ID
:
65 return "VALUE_SYMTAB";
66 case bitc::METADATA_BLOCK_ID
:
67 return "METADATA_BLOCK";
68 case bitc::METADATA_KIND_BLOCK_ID
:
69 return "METADATA_KIND_BLOCK";
70 case bitc::METADATA_ATTACHMENT_ID
:
71 return "METADATA_ATTACHMENT_BLOCK";
72 case bitc::USELIST_BLOCK_ID
:
73 return "USELIST_BLOCK_ID";
74 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID
:
75 return "GLOBALVAL_SUMMARY_BLOCK";
76 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID
:
77 return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
78 case bitc::MODULE_STRTAB_BLOCK_ID
:
79 return "MODULE_STRTAB_BLOCK";
80 case bitc::STRTAB_BLOCK_ID
:
81 return "STRTAB_BLOCK";
82 case bitc::SYMTAB_BLOCK_ID
:
83 return "SYMTAB_BLOCK";
87 /// Return a symbolic code name if known, otherwise return null.
88 static std::optional
<const char *>
89 GetCodeName(unsigned CodeID
, unsigned BlockID
,
90 const BitstreamBlockInfo
&BlockInfo
,
91 CurStreamTypeType CurStreamType
) {
92 // Standard blocks for all bitcode files.
93 if (BlockID
< bitc::FIRST_APPLICATION_BLOCKID
) {
94 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
) {
98 case bitc::BLOCKINFO_CODE_SETBID
:
100 case bitc::BLOCKINFO_CODE_BLOCKNAME
:
102 case bitc::BLOCKINFO_CODE_SETRECORDNAME
:
103 return "SETRECORDNAME";
109 // Check to see if we have a blockinfo record for this record, with a name.
110 if (const BitstreamBlockInfo::BlockInfo
*Info
=
111 BlockInfo
.getBlockInfo(BlockID
)) {
112 for (const std::pair
<unsigned, std::string
> &RN
: Info
->RecordNames
)
113 if (RN
.first
== CodeID
)
114 return RN
.second
.c_str();
117 if (CurStreamType
!= LLVMIRBitstream
)
120 #define STRINGIFY_CODE(PREFIX, CODE) \
121 case bitc::PREFIX##_##CODE: \
126 case bitc::MODULE_BLOCK_ID
:
130 STRINGIFY_CODE(MODULE_CODE
, VERSION
)
131 STRINGIFY_CODE(MODULE_CODE
, TRIPLE
)
132 STRINGIFY_CODE(MODULE_CODE
, DATALAYOUT
)
133 STRINGIFY_CODE(MODULE_CODE
, ASM
)
134 STRINGIFY_CODE(MODULE_CODE
, SECTIONNAME
)
135 STRINGIFY_CODE(MODULE_CODE
, DEPLIB
) // Deprecated, present in old bitcode
136 STRINGIFY_CODE(MODULE_CODE
, GLOBALVAR
)
137 STRINGIFY_CODE(MODULE_CODE
, FUNCTION
)
138 STRINGIFY_CODE(MODULE_CODE
, ALIAS
)
139 STRINGIFY_CODE(MODULE_CODE
, GCNAME
)
140 STRINGIFY_CODE(MODULE_CODE
, COMDAT
)
141 STRINGIFY_CODE(MODULE_CODE
, VSTOFFSET
)
142 STRINGIFY_CODE(MODULE_CODE
, METADATA_VALUES_UNUSED
)
143 STRINGIFY_CODE(MODULE_CODE
, SOURCE_FILENAME
)
144 STRINGIFY_CODE(MODULE_CODE
, HASH
)
146 case bitc::IDENTIFICATION_BLOCK_ID
:
150 STRINGIFY_CODE(IDENTIFICATION_CODE
, STRING
)
151 STRINGIFY_CODE(IDENTIFICATION_CODE
, EPOCH
)
153 case bitc::PARAMATTR_BLOCK_ID
:
157 // FIXME: Should these be different?
158 case bitc::PARAMATTR_CODE_ENTRY_OLD
:
160 case bitc::PARAMATTR_CODE_ENTRY
:
163 case bitc::PARAMATTR_GROUP_BLOCK_ID
:
167 case bitc::PARAMATTR_GRP_CODE_ENTRY
:
170 case bitc::TYPE_BLOCK_ID_NEW
:
174 STRINGIFY_CODE(TYPE_CODE
, NUMENTRY
)
175 STRINGIFY_CODE(TYPE_CODE
, VOID
)
176 STRINGIFY_CODE(TYPE_CODE
, FLOAT
)
177 STRINGIFY_CODE(TYPE_CODE
, DOUBLE
)
178 STRINGIFY_CODE(TYPE_CODE
, LABEL
)
179 STRINGIFY_CODE(TYPE_CODE
, OPAQUE
)
180 STRINGIFY_CODE(TYPE_CODE
, INTEGER
)
181 STRINGIFY_CODE(TYPE_CODE
, POINTER
)
182 STRINGIFY_CODE(TYPE_CODE
, HALF
)
183 STRINGIFY_CODE(TYPE_CODE
, ARRAY
)
184 STRINGIFY_CODE(TYPE_CODE
, VECTOR
)
185 STRINGIFY_CODE(TYPE_CODE
, X86_FP80
)
186 STRINGIFY_CODE(TYPE_CODE
, FP128
)
187 STRINGIFY_CODE(TYPE_CODE
, PPC_FP128
)
188 STRINGIFY_CODE(TYPE_CODE
, METADATA
)
189 STRINGIFY_CODE(TYPE_CODE
, X86_MMX
)
190 STRINGIFY_CODE(TYPE_CODE
, STRUCT_ANON
)
191 STRINGIFY_CODE(TYPE_CODE
, STRUCT_NAME
)
192 STRINGIFY_CODE(TYPE_CODE
, STRUCT_NAMED
)
193 STRINGIFY_CODE(TYPE_CODE
, FUNCTION
)
194 STRINGIFY_CODE(TYPE_CODE
, TOKEN
)
195 STRINGIFY_CODE(TYPE_CODE
, BFLOAT
)
198 case bitc::CONSTANTS_BLOCK_ID
:
202 STRINGIFY_CODE(CST_CODE
, SETTYPE
)
203 STRINGIFY_CODE(CST_CODE
, NULL
)
204 STRINGIFY_CODE(CST_CODE
, UNDEF
)
205 STRINGIFY_CODE(CST_CODE
, INTEGER
)
206 STRINGIFY_CODE(CST_CODE
, WIDE_INTEGER
)
207 STRINGIFY_CODE(CST_CODE
, FLOAT
)
208 STRINGIFY_CODE(CST_CODE
, AGGREGATE
)
209 STRINGIFY_CODE(CST_CODE
, STRING
)
210 STRINGIFY_CODE(CST_CODE
, CSTRING
)
211 STRINGIFY_CODE(CST_CODE
, CE_BINOP
)
212 STRINGIFY_CODE(CST_CODE
, CE_CAST
)
213 STRINGIFY_CODE(CST_CODE
, CE_GEP
)
214 STRINGIFY_CODE(CST_CODE
, CE_INBOUNDS_GEP
)
215 STRINGIFY_CODE(CST_CODE
, CE_SELECT
)
216 STRINGIFY_CODE(CST_CODE
, CE_EXTRACTELT
)
217 STRINGIFY_CODE(CST_CODE
, CE_INSERTELT
)
218 STRINGIFY_CODE(CST_CODE
, CE_SHUFFLEVEC
)
219 STRINGIFY_CODE(CST_CODE
, CE_CMP
)
220 STRINGIFY_CODE(CST_CODE
, INLINEASM
)
221 STRINGIFY_CODE(CST_CODE
, CE_SHUFVEC_EX
)
222 STRINGIFY_CODE(CST_CODE
, CE_UNOP
)
223 STRINGIFY_CODE(CST_CODE
, DSO_LOCAL_EQUIVALENT
)
224 STRINGIFY_CODE(CST_CODE
, NO_CFI_VALUE
)
225 STRINGIFY_CODE(CST_CODE
, PTRAUTH
)
226 case bitc::CST_CODE_BLOCKADDRESS
:
227 return "CST_CODE_BLOCKADDRESS";
228 STRINGIFY_CODE(CST_CODE
, DATA
)
230 case bitc::FUNCTION_BLOCK_ID
:
234 STRINGIFY_CODE(FUNC_CODE
, DECLAREBLOCKS
)
235 STRINGIFY_CODE(FUNC_CODE
, INST_BINOP
)
236 STRINGIFY_CODE(FUNC_CODE
, INST_CAST
)
237 STRINGIFY_CODE(FUNC_CODE
, INST_GEP_OLD
)
238 STRINGIFY_CODE(FUNC_CODE
, INST_INBOUNDS_GEP_OLD
)
239 STRINGIFY_CODE(FUNC_CODE
, INST_SELECT
)
240 STRINGIFY_CODE(FUNC_CODE
, INST_EXTRACTELT
)
241 STRINGIFY_CODE(FUNC_CODE
, INST_INSERTELT
)
242 STRINGIFY_CODE(FUNC_CODE
, INST_SHUFFLEVEC
)
243 STRINGIFY_CODE(FUNC_CODE
, INST_CMP
)
244 STRINGIFY_CODE(FUNC_CODE
, INST_RET
)
245 STRINGIFY_CODE(FUNC_CODE
, INST_BR
)
246 STRINGIFY_CODE(FUNC_CODE
, INST_SWITCH
)
247 STRINGIFY_CODE(FUNC_CODE
, INST_INVOKE
)
248 STRINGIFY_CODE(FUNC_CODE
, INST_UNOP
)
249 STRINGIFY_CODE(FUNC_CODE
, INST_UNREACHABLE
)
250 STRINGIFY_CODE(FUNC_CODE
, INST_CLEANUPRET
)
251 STRINGIFY_CODE(FUNC_CODE
, INST_CATCHRET
)
252 STRINGIFY_CODE(FUNC_CODE
, INST_CATCHPAD
)
253 STRINGIFY_CODE(FUNC_CODE
, INST_PHI
)
254 STRINGIFY_CODE(FUNC_CODE
, INST_ALLOCA
)
255 STRINGIFY_CODE(FUNC_CODE
, INST_LOAD
)
256 STRINGIFY_CODE(FUNC_CODE
, INST_VAARG
)
257 STRINGIFY_CODE(FUNC_CODE
, INST_STORE
)
258 STRINGIFY_CODE(FUNC_CODE
, INST_EXTRACTVAL
)
259 STRINGIFY_CODE(FUNC_CODE
, INST_INSERTVAL
)
260 STRINGIFY_CODE(FUNC_CODE
, INST_CMP2
)
261 STRINGIFY_CODE(FUNC_CODE
, INST_VSELECT
)
262 STRINGIFY_CODE(FUNC_CODE
, DEBUG_LOC_AGAIN
)
263 STRINGIFY_CODE(FUNC_CODE
, INST_CALL
)
264 STRINGIFY_CODE(FUNC_CODE
, DEBUG_LOC
)
265 STRINGIFY_CODE(FUNC_CODE
, INST_GEP
)
266 STRINGIFY_CODE(FUNC_CODE
, OPERAND_BUNDLE
)
267 STRINGIFY_CODE(FUNC_CODE
, INST_FENCE
)
268 STRINGIFY_CODE(FUNC_CODE
, INST_ATOMICRMW
)
269 STRINGIFY_CODE(FUNC_CODE
, INST_LOADATOMIC
)
270 STRINGIFY_CODE(FUNC_CODE
, INST_STOREATOMIC
)
271 STRINGIFY_CODE(FUNC_CODE
, INST_CMPXCHG
)
272 STRINGIFY_CODE(FUNC_CODE
, INST_CALLBR
)
273 STRINGIFY_CODE(FUNC_CODE
, BLOCKADDR_USERS
)
274 STRINGIFY_CODE(FUNC_CODE
, DEBUG_RECORD_DECLARE
)
275 STRINGIFY_CODE(FUNC_CODE
, DEBUG_RECORD_VALUE
)
276 STRINGIFY_CODE(FUNC_CODE
, DEBUG_RECORD_ASSIGN
)
277 STRINGIFY_CODE(FUNC_CODE
, DEBUG_RECORD_VALUE_SIMPLE
)
278 STRINGIFY_CODE(FUNC_CODE
, DEBUG_RECORD_LABEL
)
280 case bitc::VALUE_SYMTAB_BLOCK_ID
:
284 STRINGIFY_CODE(VST_CODE
, ENTRY
)
285 STRINGIFY_CODE(VST_CODE
, BBENTRY
)
286 STRINGIFY_CODE(VST_CODE
, FNENTRY
)
287 STRINGIFY_CODE(VST_CODE
, COMBINED_ENTRY
)
289 case bitc::MODULE_STRTAB_BLOCK_ID
:
293 STRINGIFY_CODE(MST_CODE
, ENTRY
)
294 STRINGIFY_CODE(MST_CODE
, HASH
)
296 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID
:
297 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID
:
301 STRINGIFY_CODE(FS
, PERMODULE
)
302 STRINGIFY_CODE(FS
, PERMODULE_PROFILE
)
303 STRINGIFY_CODE(FS
, PERMODULE_RELBF
)
304 STRINGIFY_CODE(FS
, PERMODULE_GLOBALVAR_INIT_REFS
)
305 STRINGIFY_CODE(FS
, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS
)
306 STRINGIFY_CODE(FS
, COMBINED
)
307 STRINGIFY_CODE(FS
, COMBINED_PROFILE
)
308 STRINGIFY_CODE(FS
, COMBINED_GLOBALVAR_INIT_REFS
)
309 STRINGIFY_CODE(FS
, ALIAS
)
310 STRINGIFY_CODE(FS
, COMBINED_ALIAS
)
311 STRINGIFY_CODE(FS
, COMBINED_ORIGINAL_NAME
)
312 STRINGIFY_CODE(FS
, VERSION
)
313 STRINGIFY_CODE(FS
, FLAGS
)
314 STRINGIFY_CODE(FS
, TYPE_TESTS
)
315 STRINGIFY_CODE(FS
, TYPE_TEST_ASSUME_VCALLS
)
316 STRINGIFY_CODE(FS
, TYPE_CHECKED_LOAD_VCALLS
)
317 STRINGIFY_CODE(FS
, TYPE_TEST_ASSUME_CONST_VCALL
)
318 STRINGIFY_CODE(FS
, TYPE_CHECKED_LOAD_CONST_VCALL
)
319 STRINGIFY_CODE(FS
, VALUE_GUID
)
320 STRINGIFY_CODE(FS
, CFI_FUNCTION_DEFS
)
321 STRINGIFY_CODE(FS
, CFI_FUNCTION_DECLS
)
322 STRINGIFY_CODE(FS
, TYPE_ID
)
323 STRINGIFY_CODE(FS
, TYPE_ID_METADATA
)
324 STRINGIFY_CODE(FS
, BLOCK_COUNT
)
325 STRINGIFY_CODE(FS
, PARAM_ACCESS
)
326 STRINGIFY_CODE(FS
, PERMODULE_CALLSITE_INFO
)
327 STRINGIFY_CODE(FS
, PERMODULE_ALLOC_INFO
)
328 STRINGIFY_CODE(FS
, COMBINED_CALLSITE_INFO
)
329 STRINGIFY_CODE(FS
, COMBINED_ALLOC_INFO
)
330 STRINGIFY_CODE(FS
, STACK_IDS
)
331 STRINGIFY_CODE(FS
, ALLOC_CONTEXT_IDS
)
332 STRINGIFY_CODE(FS
, CONTEXT_RADIX_TREE_ARRAY
)
334 case bitc::METADATA_ATTACHMENT_ID
:
338 STRINGIFY_CODE(METADATA
, ATTACHMENT
)
340 case bitc::METADATA_BLOCK_ID
:
344 STRINGIFY_CODE(METADATA
, STRING_OLD
)
345 STRINGIFY_CODE(METADATA
, VALUE
)
346 STRINGIFY_CODE(METADATA
, NODE
)
347 STRINGIFY_CODE(METADATA
, NAME
)
348 STRINGIFY_CODE(METADATA
, DISTINCT_NODE
)
349 STRINGIFY_CODE(METADATA
, KIND
) // Older bitcode has it in a MODULE_BLOCK
350 STRINGIFY_CODE(METADATA
, LOCATION
)
351 STRINGIFY_CODE(METADATA
, OLD_NODE
)
352 STRINGIFY_CODE(METADATA
, OLD_FN_NODE
)
353 STRINGIFY_CODE(METADATA
, NAMED_NODE
)
354 STRINGIFY_CODE(METADATA
, GENERIC_DEBUG
)
355 STRINGIFY_CODE(METADATA
, SUBRANGE
)
356 STRINGIFY_CODE(METADATA
, ENUMERATOR
)
357 STRINGIFY_CODE(METADATA
, BASIC_TYPE
)
358 STRINGIFY_CODE(METADATA
, FILE)
359 STRINGIFY_CODE(METADATA
, DERIVED_TYPE
)
360 STRINGIFY_CODE(METADATA
, COMPOSITE_TYPE
)
361 STRINGIFY_CODE(METADATA
, SUBROUTINE_TYPE
)
362 STRINGIFY_CODE(METADATA
, COMPILE_UNIT
)
363 STRINGIFY_CODE(METADATA
, SUBPROGRAM
)
364 STRINGIFY_CODE(METADATA
, LEXICAL_BLOCK
)
365 STRINGIFY_CODE(METADATA
, LEXICAL_BLOCK_FILE
)
366 STRINGIFY_CODE(METADATA
, NAMESPACE
)
367 STRINGIFY_CODE(METADATA
, TEMPLATE_TYPE
)
368 STRINGIFY_CODE(METADATA
, TEMPLATE_VALUE
)
369 STRINGIFY_CODE(METADATA
, GLOBAL_VAR
)
370 STRINGIFY_CODE(METADATA
, LOCAL_VAR
)
371 STRINGIFY_CODE(METADATA
, EXPRESSION
)
372 STRINGIFY_CODE(METADATA
, OBJC_PROPERTY
)
373 STRINGIFY_CODE(METADATA
, IMPORTED_ENTITY
)
374 STRINGIFY_CODE(METADATA
, MODULE
)
375 STRINGIFY_CODE(METADATA
, MACRO
)
376 STRINGIFY_CODE(METADATA
, MACRO_FILE
)
377 STRINGIFY_CODE(METADATA
, STRINGS
)
378 STRINGIFY_CODE(METADATA
, GLOBAL_DECL_ATTACHMENT
)
379 STRINGIFY_CODE(METADATA
, GLOBAL_VAR_EXPR
)
380 STRINGIFY_CODE(METADATA
, INDEX_OFFSET
)
381 STRINGIFY_CODE(METADATA
, INDEX
)
382 STRINGIFY_CODE(METADATA
, ARG_LIST
)
384 case bitc::METADATA_KIND_BLOCK_ID
:
388 STRINGIFY_CODE(METADATA
, KIND
)
390 case bitc::USELIST_BLOCK_ID
:
394 case bitc::USELIST_CODE_DEFAULT
:
395 return "USELIST_CODE_DEFAULT";
396 case bitc::USELIST_CODE_BB
:
397 return "USELIST_CODE_BB";
400 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID
:
404 case bitc::OPERAND_BUNDLE_TAG
:
405 return "OPERAND_BUNDLE_TAG";
407 case bitc::STRTAB_BLOCK_ID
:
411 case bitc::STRTAB_BLOB
:
414 case bitc::SYMTAB_BLOCK_ID
:
418 case bitc::SYMTAB_BLOB
:
422 #undef STRINGIFY_CODE
425 static void printSize(raw_ostream
&OS
, double Bits
) {
426 OS
<< format("%.2f/%.2fB/%luW", Bits
, Bits
/ 8, (unsigned long)(Bits
/ 32));
428 static void printSize(raw_ostream
&OS
, uint64_t Bits
) {
429 OS
<< format("%lub/%.2fB/%luW", (unsigned long)Bits
, (double)Bits
/ 8,
430 (unsigned long)(Bits
/ 32));
433 static Expected
<CurStreamTypeType
> ReadSignature(BitstreamCursor
&Stream
) {
434 auto tryRead
= [&Stream
](char &Dest
, size_t size
) -> Error
{
435 if (Expected
<SimpleBitstreamCursor::word_t
> MaybeWord
= Stream
.Read(size
))
436 Dest
= MaybeWord
.get();
438 return MaybeWord
.takeError();
439 return Error::success();
443 if (Error Err
= tryRead(Signature
[0], 8))
444 return std::move(Err
);
445 if (Error Err
= tryRead(Signature
[1], 8))
446 return std::move(Err
);
448 // Autodetect the file contents, if it is one we know.
449 if (Signature
[0] == 'C' && Signature
[1] == 'P') {
450 if (Error Err
= tryRead(Signature
[2], 8))
451 return std::move(Err
);
452 if (Error Err
= tryRead(Signature
[3], 8))
453 return std::move(Err
);
454 if (Signature
[2] == 'C' && Signature
[3] == 'H')
455 return ClangSerializedASTBitstream
;
456 } else if (Signature
[0] == 'D' && Signature
[1] == 'I') {
457 if (Error Err
= tryRead(Signature
[2], 8))
458 return std::move(Err
);
459 if (Error Err
= tryRead(Signature
[3], 8))
460 return std::move(Err
);
461 if (Signature
[2] == 'A' && Signature
[3] == 'G')
462 return ClangSerializedDiagnosticsBitstream
;
463 } else if (Signature
[0] == 'R' && Signature
[1] == 'M') {
464 if (Error Err
= tryRead(Signature
[2], 8))
465 return std::move(Err
);
466 if (Error Err
= tryRead(Signature
[3], 8))
467 return std::move(Err
);
468 if (Signature
[2] == 'R' && Signature
[3] == 'K')
469 return LLVMBitstreamRemarks
;
471 if (Error Err
= tryRead(Signature
[2], 4))
472 return std::move(Err
);
473 if (Error Err
= tryRead(Signature
[3], 4))
474 return std::move(Err
);
475 if (Error Err
= tryRead(Signature
[4], 4))
476 return std::move(Err
);
477 if (Error Err
= tryRead(Signature
[5], 4))
478 return std::move(Err
);
479 if (Signature
[0] == 'B' && Signature
[1] == 'C' && Signature
[2] == 0x0 &&
480 Signature
[3] == 0xC && Signature
[4] == 0xE && Signature
[5] == 0xD)
481 return LLVMIRBitstream
;
483 return UnknownBitstream
;
486 static Expected
<CurStreamTypeType
> analyzeHeader(std::optional
<BCDumpOptions
> O
,
487 BitstreamCursor
&Stream
) {
488 ArrayRef
<uint8_t> Bytes
= Stream
.getBitcodeBytes();
489 const unsigned char *BufPtr
= (const unsigned char *)Bytes
.data();
490 const unsigned char *EndBufPtr
= BufPtr
+ Bytes
.size();
492 // If we have a wrapper header, parse it and ignore the non-bc file
493 // contents. The magic number is 0x0B17C0DE stored in little endian.
494 if (isBitcodeWrapper(BufPtr
, EndBufPtr
)) {
495 if (Bytes
.size() < BWH_HeaderSize
)
496 return reportError("Invalid bitcode wrapper header");
499 unsigned Magic
= support::endian::read32le(&BufPtr
[BWH_MagicField
]);
500 unsigned Version
= support::endian::read32le(&BufPtr
[BWH_VersionField
]);
501 unsigned Offset
= support::endian::read32le(&BufPtr
[BWH_OffsetField
]);
502 unsigned Size
= support::endian::read32le(&BufPtr
[BWH_SizeField
]);
503 unsigned CPUType
= support::endian::read32le(&BufPtr
[BWH_CPUTypeField
]);
505 O
->OS
<< "<BITCODE_WRAPPER_HEADER"
506 << " Magic=" << format_hex(Magic
, 10)
507 << " Version=" << format_hex(Version
, 10)
508 << " Offset=" << format_hex(Offset
, 10)
509 << " Size=" << format_hex(Size
, 10)
510 << " CPUType=" << format_hex(CPUType
, 10) << "/>\n";
513 if (SkipBitcodeWrapperHeader(BufPtr
, EndBufPtr
, true))
514 return reportError("Invalid bitcode wrapper header");
517 // Use the cursor modified by skipping the wrapper header.
518 Stream
= BitstreamCursor(ArrayRef
<uint8_t>(BufPtr
, EndBufPtr
));
520 return ReadSignature(Stream
);
523 static bool canDecodeBlob(unsigned Code
, unsigned BlockID
) {
524 return BlockID
== bitc::METADATA_BLOCK_ID
&& Code
== bitc::METADATA_STRINGS
;
527 Error
BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent
,
528 ArrayRef
<uint64_t> Record
,
532 return reportError("Cannot decode empty blob.");
534 if (Record
.size() != 2)
536 "Decoding metadata strings blob needs two record entries.");
538 unsigned NumStrings
= Record
[0];
539 unsigned StringsOffset
= Record
[1];
540 OS
<< " num-strings = " << NumStrings
<< " {\n";
542 StringRef Lengths
= Blob
.slice(0, StringsOffset
);
543 SimpleBitstreamCursor
R(Lengths
);
544 StringRef Strings
= Blob
.drop_front(StringsOffset
);
546 if (R
.AtEndOfStream())
547 return reportError("bad length");
550 if (Error E
= R
.ReadVBR(6).moveInto(Size
))
552 if (Strings
.size() < Size
)
553 return reportError("truncated chars");
555 OS
<< Indent
<< " '";
556 OS
.write_escaped(Strings
.slice(0, Size
), /*hex=*/true);
558 Strings
= Strings
.drop_front(Size
);
559 } while (--NumStrings
);
561 OS
<< Indent
<< " }";
562 return Error::success();
565 BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer
,
566 std::optional
<StringRef
> BlockInfoBuffer
)
569 BlockInfoStream
.emplace(*BlockInfoBuffer
);
572 Error
BitcodeAnalyzer::analyze(std::optional
<BCDumpOptions
> O
,
573 std::optional
<StringRef
> CheckHash
) {
574 if (Error E
= analyzeHeader(O
, Stream
).moveInto(CurStreamType
))
577 Stream
.setBlockInfo(&BlockInfo
);
579 // Read block info from BlockInfoStream, if specified.
580 // The block info must be a top-level block.
581 if (BlockInfoStream
) {
582 BitstreamCursor
BlockInfoCursor(*BlockInfoStream
);
583 if (Error E
= analyzeHeader(O
, BlockInfoCursor
).takeError())
586 while (!BlockInfoCursor
.AtEndOfStream()) {
587 Expected
<unsigned> MaybeCode
= BlockInfoCursor
.ReadCode();
589 return MaybeCode
.takeError();
590 if (MaybeCode
.get() != bitc::ENTER_SUBBLOCK
)
591 return reportError("Invalid record at top-level in block info file");
593 Expected
<unsigned> MaybeBlockID
= BlockInfoCursor
.ReadSubBlockID();
595 return MaybeBlockID
.takeError();
596 if (MaybeBlockID
.get() == bitc::BLOCKINFO_BLOCK_ID
) {
597 std::optional
<BitstreamBlockInfo
> NewBlockInfo
;
599 BlockInfoCursor
.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true)
600 .moveInto(NewBlockInfo
))
603 return reportError("Malformed BlockInfoBlock in block info file");
604 BlockInfo
= std::move(*NewBlockInfo
);
608 if (Error Err
= BlockInfoCursor
.SkipBlock())
613 // Parse the top-level structure. We only allow blocks at the top-level.
614 while (!Stream
.AtEndOfStream()) {
615 Expected
<unsigned> MaybeCode
= Stream
.ReadCode();
617 return MaybeCode
.takeError();
618 if (MaybeCode
.get() != bitc::ENTER_SUBBLOCK
)
619 return reportError("Invalid record at top-level");
621 Expected
<unsigned> MaybeBlockID
= Stream
.ReadSubBlockID();
623 return MaybeBlockID
.takeError();
625 if (Error E
= parseBlock(MaybeBlockID
.get(), 0, O
, CheckHash
))
630 return Error::success();
633 void BitcodeAnalyzer::printStats(BCDumpOptions O
,
634 std::optional
<StringRef
> Filename
) {
635 uint64_t BufferSizeBits
= Stream
.getBitcodeBytes().size() * CHAR_BIT
;
636 // Print a summary of the read file.
639 O
.OS
<< "of " << Filename
->data() << ":\n";
640 O
.OS
<< " Total size: ";
641 printSize(O
.OS
, BufferSizeBits
);
643 O
.OS
<< " Stream type: ";
644 switch (CurStreamType
) {
645 case UnknownBitstream
:
648 case LLVMIRBitstream
:
651 case ClangSerializedASTBitstream
:
652 O
.OS
<< "Clang Serialized AST\n";
654 case ClangSerializedDiagnosticsBitstream
:
655 O
.OS
<< "Clang Serialized Diagnostics\n";
657 case LLVMBitstreamRemarks
:
658 O
.OS
<< "LLVM Remarks\n";
661 O
.OS
<< " # Toplevel Blocks: " << NumTopBlocks
<< "\n";
664 // Emit per-block stats.
665 O
.OS
<< "Per-block Summary:\n";
666 for (const auto &Stat
: BlockIDStats
) {
667 O
.OS
<< " Block ID #" << Stat
.first
;
668 if (std::optional
<const char *> BlockName
=
669 GetBlockName(Stat
.first
, BlockInfo
, CurStreamType
))
670 O
.OS
<< " (" << *BlockName
<< ")";
673 const PerBlockIDStats
&Stats
= Stat
.second
;
674 O
.OS
<< " Num Instances: " << Stats
.NumInstances
<< "\n";
675 O
.OS
<< " Total Size: ";
676 printSize(O
.OS
, Stats
.NumBits
);
678 double pct
= (Stats
.NumBits
* 100.0) / BufferSizeBits
;
679 O
.OS
<< " Percent of file: " << format("%2.4f%%", pct
) << "\n";
680 if (Stats
.NumInstances
> 1) {
681 O
.OS
<< " Average Size: ";
682 printSize(O
.OS
, Stats
.NumBits
/ (double)Stats
.NumInstances
);
684 O
.OS
<< " Tot/Avg SubBlocks: " << Stats
.NumSubBlocks
<< "/"
685 << Stats
.NumSubBlocks
/ (double)Stats
.NumInstances
<< "\n";
686 O
.OS
<< " Tot/Avg Abbrevs: " << Stats
.NumAbbrevs
<< "/"
687 << Stats
.NumAbbrevs
/ (double)Stats
.NumInstances
<< "\n";
688 O
.OS
<< " Tot/Avg Records: " << Stats
.NumRecords
<< "/"
689 << Stats
.NumRecords
/ (double)Stats
.NumInstances
<< "\n";
691 O
.OS
<< " Num SubBlocks: " << Stats
.NumSubBlocks
<< "\n";
692 O
.OS
<< " Num Abbrevs: " << Stats
.NumAbbrevs
<< "\n";
693 O
.OS
<< " Num Records: " << Stats
.NumRecords
<< "\n";
695 if (Stats
.NumRecords
) {
696 double pct
= (Stats
.NumAbbreviatedRecords
* 100.0) / Stats
.NumRecords
;
697 O
.OS
<< " Percent Abbrevs: " << format("%2.4f%%", pct
) << "\n";
701 // Print a histogram of the codes we see.
702 if (O
.Histogram
&& !Stats
.CodeFreq
.empty()) {
703 std::vector
<std::pair
<unsigned, unsigned>> FreqPairs
; // <freq,code>
704 for (unsigned i
= 0, e
= Stats
.CodeFreq
.size(); i
!= e
; ++i
)
705 if (unsigned Freq
= Stats
.CodeFreq
[i
].NumInstances
)
706 FreqPairs
.push_back(std::make_pair(Freq
, i
));
707 llvm::stable_sort(FreqPairs
);
708 std::reverse(FreqPairs
.begin(), FreqPairs
.end());
710 O
.OS
<< "\tRecord Histogram:\n";
711 O
.OS
<< "\t\t Count # Bits b/Rec % Abv Record Kind\n";
712 for (const auto &FreqPair
: FreqPairs
) {
713 const PerRecordStats
&RecStats
= Stats
.CodeFreq
[FreqPair
.second
];
715 O
.OS
<< format("\t\t%7d %9lu", RecStats
.NumInstances
,
716 (unsigned long)RecStats
.TotalBits
);
718 if (RecStats
.NumInstances
> 1)
719 O
.OS
<< format(" %9.1f",
720 (double)RecStats
.TotalBits
/ RecStats
.NumInstances
);
724 if (RecStats
.NumAbbrev
)
725 O
.OS
<< format(" %7.2f", (double)RecStats
.NumAbbrev
/
726 RecStats
.NumInstances
* 100);
731 if (std::optional
<const char *> CodeName
= GetCodeName(
732 FreqPair
.second
, Stat
.first
, BlockInfo
, CurStreamType
))
733 O
.OS
<< *CodeName
<< "\n";
735 O
.OS
<< "UnknownCode" << FreqPair
.second
<< "\n";
742 Error
BitcodeAnalyzer::parseBlock(unsigned BlockID
, unsigned IndentLevel
,
743 std::optional
<BCDumpOptions
> O
,
744 std::optional
<StringRef
> CheckHash
) {
745 std::string
Indent(IndentLevel
* 2, ' ');
746 uint64_t BlockBitStart
= Stream
.GetCurrentBitNo();
748 // Get the statistics for this BlockID.
749 PerBlockIDStats
&BlockStats
= BlockIDStats
[BlockID
];
751 BlockStats
.NumInstances
++;
753 // BLOCKINFO is a special part of the stream.
754 bool DumpRecords
= O
.has_value();
755 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
) {
756 if (O
&& !O
->DumpBlockinfo
)
757 O
->OS
<< Indent
<< "<BLOCKINFO_BLOCK/>\n";
758 std::optional
<BitstreamBlockInfo
> NewBlockInfo
;
759 if (Error E
= Stream
.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true)
760 .moveInto(NewBlockInfo
))
763 return reportError("Malformed BlockInfoBlock");
764 BlockInfo
= std::move(*NewBlockInfo
);
765 if (Error Err
= Stream
.JumpToBit(BlockBitStart
))
767 // It's not really interesting to dump the contents of the blockinfo
768 // block, so only do it if the user explicitly requests it.
769 DumpRecords
= O
&& O
->DumpBlockinfo
;
772 unsigned NumWords
= 0;
773 if (Error Err
= Stream
.EnterSubBlock(BlockID
, &NumWords
))
776 // Keep it for later, when we see a MODULE_HASH record
777 uint64_t BlockEntryPos
= Stream
.getCurrentByteNo();
779 std::optional
<const char *> BlockName
;
781 O
->OS
<< Indent
<< "<";
782 if ((BlockName
= GetBlockName(BlockID
, BlockInfo
, CurStreamType
)))
785 O
->OS
<< "UnknownBlock" << BlockID
;
787 if (!O
->Symbolic
&& BlockName
)
788 O
->OS
<< " BlockID=" << BlockID
;
790 O
->OS
<< " NumWords=" << NumWords
791 << " BlockCodeSize=" << Stream
.getAbbrevIDWidth() << ">\n";
794 SmallVector
<uint64_t, 64> Record
;
796 // Keep the offset to the metadata index if seen.
797 uint64_t MetadataIndexOffset
= 0;
799 // Read all the records for this block.
801 if (Stream
.AtEndOfStream())
802 return reportError("Premature end of bitstream");
804 uint64_t RecordStartBit
= Stream
.GetCurrentBitNo();
806 BitstreamEntry Entry
;
807 if (Error E
= Stream
.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs
)
811 switch (Entry
.Kind
) {
812 case BitstreamEntry::Error
:
813 return reportError("malformed bitcode file");
814 case BitstreamEntry::EndBlock
: {
815 uint64_t BlockBitEnd
= Stream
.GetCurrentBitNo();
816 BlockStats
.NumBits
+= BlockBitEnd
- BlockBitStart
;
818 O
->OS
<< Indent
<< "</";
820 O
->OS
<< *BlockName
<< ">\n";
822 O
->OS
<< "UnknownBlock" << BlockID
<< ">\n";
824 return Error::success();
827 case BitstreamEntry::SubBlock
: {
828 uint64_t SubBlockBitStart
= Stream
.GetCurrentBitNo();
829 if (Error E
= parseBlock(Entry
.ID
, IndentLevel
+ 1, O
, CheckHash
))
831 ++BlockStats
.NumSubBlocks
;
832 uint64_t SubBlockBitEnd
= Stream
.GetCurrentBitNo();
834 // Don't include subblock sizes in the size of this block.
835 BlockBitStart
+= SubBlockBitEnd
- SubBlockBitStart
;
838 case BitstreamEntry::Record
:
839 // The interesting case.
843 if (Entry
.ID
== bitc::DEFINE_ABBREV
) {
844 if (Error Err
= Stream
.ReadAbbrevRecord())
846 ++BlockStats
.NumAbbrevs
;
852 ++BlockStats
.NumRecords
;
855 uint64_t CurrentRecordPos
= Stream
.GetCurrentBitNo();
857 if (Error E
= Stream
.readRecord(Entry
.ID
, Record
, &Blob
).moveInto(Code
))
860 // Increment the # occurrences of this code.
861 if (BlockStats
.CodeFreq
.size() <= Code
)
862 BlockStats
.CodeFreq
.resize(Code
+ 1);
863 BlockStats
.CodeFreq
[Code
].NumInstances
++;
864 BlockStats
.CodeFreq
[Code
].TotalBits
+=
865 Stream
.GetCurrentBitNo() - RecordStartBit
;
866 if (Entry
.ID
!= bitc::UNABBREV_RECORD
) {
867 BlockStats
.CodeFreq
[Code
].NumAbbrev
++;
868 ++BlockStats
.NumAbbreviatedRecords
;
872 O
->OS
<< Indent
<< " <";
873 std::optional
<const char *> CodeName
=
874 GetCodeName(Code
, BlockID
, BlockInfo
, CurStreamType
);
878 O
->OS
<< "UnknownCode" << Code
;
879 if (!O
->Symbolic
&& CodeName
)
880 O
->OS
<< " codeid=" << Code
;
881 const BitCodeAbbrev
*Abbv
= nullptr;
882 if (Entry
.ID
!= bitc::UNABBREV_RECORD
) {
883 Expected
<const BitCodeAbbrev
*> MaybeAbbv
= Stream
.getAbbrev(Entry
.ID
);
885 return MaybeAbbv
.takeError();
886 Abbv
= MaybeAbbv
.get();
887 O
->OS
<< " abbrevid=" << Entry
.ID
;
890 for (unsigned i
= 0, e
= Record
.size(); i
!= e
; ++i
)
891 O
->OS
<< " op" << i
<< "=" << (int64_t)Record
[i
];
893 // If we found a metadata index, let's verify that we had an offset
894 // before and validate its forward reference offset was correct!
895 if (BlockID
== bitc::METADATA_BLOCK_ID
) {
896 if (Code
== bitc::METADATA_INDEX_OFFSET
) {
897 if (Record
.size() != 2)
898 O
->OS
<< "(Invalid record)";
900 auto Offset
= Record
[0] + (Record
[1] << 32);
901 MetadataIndexOffset
= Stream
.GetCurrentBitNo() + Offset
;
904 if (Code
== bitc::METADATA_INDEX
) {
905 O
->OS
<< " (offset ";
906 if (MetadataIndexOffset
== RecordStartBit
)
909 O
->OS
<< "mismatch: " << MetadataIndexOffset
<< " vs "
910 << RecordStartBit
<< ")";
914 // If we found a module hash, let's verify that it matches!
915 if (BlockID
== bitc::MODULE_BLOCK_ID
&& Code
== bitc::MODULE_CODE_HASH
&&
917 if (Record
.size() != 5)
918 O
->OS
<< " (invalid)";
920 // Recompute the hash and compare it to the one in the bitcode
922 std::array
<uint8_t, 20> Hash
;
923 Hasher
.update(*CheckHash
);
925 int BlockSize
= (CurrentRecordPos
/ 8) - BlockEntryPos
;
926 auto Ptr
= Stream
.getPointerToByte(BlockEntryPos
, BlockSize
);
927 Hasher
.update(ArrayRef
<uint8_t>(Ptr
, BlockSize
));
928 Hash
= Hasher
.result();
930 std::array
<uint8_t, 20> RecordedHash
;
932 for (auto &Val
: Record
) {
933 assert(!(Val
>> 32) && "Unexpected high bits set");
934 support::endian::write32be(&RecordedHash
[Pos
], Val
);
937 if (Hash
== RecordedHash
)
940 O
->OS
<< " (!mismatch!)";
947 for (unsigned i
= 1, e
= Abbv
->getNumOperandInfos(); i
!= e
; ++i
) {
948 const BitCodeAbbrevOp
&Op
= Abbv
->getOperandInfo(i
);
949 if (!Op
.isEncoding() || Op
.getEncoding() != BitCodeAbbrevOp::Array
)
951 assert(i
+ 2 == e
&& "Array op not second to last");
953 bool ArrayIsPrintable
= true;
954 for (unsigned j
= i
- 1, je
= Record
.size(); j
!= je
; ++j
) {
955 if (!isPrint(static_cast<unsigned char>(Record
[j
]))) {
956 ArrayIsPrintable
= false;
959 Str
+= (char)Record
[j
];
961 if (ArrayIsPrintable
)
962 O
->OS
<< " record string = '" << Str
<< "'";
968 if (canDecodeBlob(Code
, BlockID
)) {
969 if (Error E
= decodeMetadataStringsBlob(Indent
, Record
, Blob
, O
->OS
))
972 O
->OS
<< " blob data = ";
973 if (O
->ShowBinaryBlobs
) {
975 O
->OS
.write_escaped(Blob
, /*hex=*/true) << "'";
977 bool BlobIsPrintable
= true;
979 if (!isPrint(static_cast<unsigned char>(C
))) {
980 BlobIsPrintable
= false;
985 O
->OS
<< "'" << Blob
<< "'";
987 O
->OS
<< "unprintable, " << Blob
.size() << " bytes.";
995 // Make sure that we can skip the current record.
996 if (Error Err
= Stream
.JumpToBit(CurrentRecordPos
))
998 if (Expected
<unsigned> Skipped
= Stream
.skipRecord(Entry
.ID
))
1001 return Skipped
.takeError();