1 //===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Bitcode/BitcodeAnalyzer.h"
10 #include "llvm/Bitcode/BitcodeReader.h"
11 #include "llvm/Bitcode/LLVMBitCodes.h"
12 #include "llvm/Bitstream/BitCodes.h"
13 #include "llvm/Bitstream/BitstreamReader.h"
14 #include "llvm/Support/Format.h"
15 #include "llvm/Support/SHA1.h"
20 static Error
reportError(StringRef Message
) {
21 return createStringError(std::errc::illegal_byte_sequence
, Message
.data());
24 /// Return a symbolic block name if known, otherwise return null.
25 static std::optional
<const char *>
26 GetBlockName(unsigned BlockID
, const BitstreamBlockInfo
&BlockInfo
,
27 CurStreamTypeType CurStreamType
) {
28 // Standard blocks for all bitcode files.
29 if (BlockID
< bitc::FIRST_APPLICATION_BLOCKID
) {
30 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
)
31 return "BLOCKINFO_BLOCK";
35 // Check to see if we have a blockinfo record for this block, with a name.
36 if (const BitstreamBlockInfo::BlockInfo
*Info
=
37 BlockInfo
.getBlockInfo(BlockID
)) {
38 if (!Info
->Name
.empty())
39 return Info
->Name
.c_str();
42 if (CurStreamType
!= LLVMIRBitstream
)
48 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID
:
49 return "OPERAND_BUNDLE_TAGS_BLOCK";
50 case bitc::MODULE_BLOCK_ID
:
51 return "MODULE_BLOCK";
52 case bitc::PARAMATTR_BLOCK_ID
:
53 return "PARAMATTR_BLOCK";
54 case bitc::PARAMATTR_GROUP_BLOCK_ID
:
55 return "PARAMATTR_GROUP_BLOCK_ID";
56 case bitc::TYPE_BLOCK_ID_NEW
:
57 return "TYPE_BLOCK_ID";
58 case bitc::CONSTANTS_BLOCK_ID
:
59 return "CONSTANTS_BLOCK";
60 case bitc::FUNCTION_BLOCK_ID
:
61 return "FUNCTION_BLOCK";
62 case bitc::IDENTIFICATION_BLOCK_ID
:
63 return "IDENTIFICATION_BLOCK_ID";
64 case bitc::VALUE_SYMTAB_BLOCK_ID
:
65 return "VALUE_SYMTAB";
66 case bitc::METADATA_BLOCK_ID
:
67 return "METADATA_BLOCK";
68 case bitc::METADATA_KIND_BLOCK_ID
:
69 return "METADATA_KIND_BLOCK";
70 case bitc::METADATA_ATTACHMENT_ID
:
71 return "METADATA_ATTACHMENT_BLOCK";
72 case bitc::USELIST_BLOCK_ID
:
73 return "USELIST_BLOCK_ID";
74 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID
:
75 return "GLOBALVAL_SUMMARY_BLOCK";
76 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID
:
77 return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
78 case bitc::MODULE_STRTAB_BLOCK_ID
:
79 return "MODULE_STRTAB_BLOCK";
80 case bitc::STRTAB_BLOCK_ID
:
81 return "STRTAB_BLOCK";
82 case bitc::SYMTAB_BLOCK_ID
:
83 return "SYMTAB_BLOCK";
87 /// Return a symbolic code name if known, otherwise return null.
88 static std::optional
<const char *>
89 GetCodeName(unsigned CodeID
, unsigned BlockID
,
90 const BitstreamBlockInfo
&BlockInfo
,
91 CurStreamTypeType CurStreamType
) {
92 // Standard blocks for all bitcode files.
93 if (BlockID
< bitc::FIRST_APPLICATION_BLOCKID
) {
94 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
) {
98 case bitc::BLOCKINFO_CODE_SETBID
:
100 case bitc::BLOCKINFO_CODE_BLOCKNAME
:
102 case bitc::BLOCKINFO_CODE_SETRECORDNAME
:
103 return "SETRECORDNAME";
109 // Check to see if we have a blockinfo record for this record, with a name.
110 if (const BitstreamBlockInfo::BlockInfo
*Info
=
111 BlockInfo
.getBlockInfo(BlockID
)) {
112 for (const std::pair
<unsigned, std::string
> &RN
: Info
->RecordNames
)
113 if (RN
.first
== CodeID
)
114 return RN
.second
.c_str();
117 if (CurStreamType
!= LLVMIRBitstream
)
120 #define STRINGIFY_CODE(PREFIX, CODE) \
121 case bitc::PREFIX##_##CODE: \
126 case bitc::MODULE_BLOCK_ID
:
130 STRINGIFY_CODE(MODULE_CODE
, VERSION
)
131 STRINGIFY_CODE(MODULE_CODE
, TRIPLE
)
132 STRINGIFY_CODE(MODULE_CODE
, DATALAYOUT
)
133 STRINGIFY_CODE(MODULE_CODE
, ASM
)
134 STRINGIFY_CODE(MODULE_CODE
, SECTIONNAME
)
135 STRINGIFY_CODE(MODULE_CODE
, DEPLIB
) // Deprecated, present in old bitcode
136 STRINGIFY_CODE(MODULE_CODE
, GLOBALVAR
)
137 STRINGIFY_CODE(MODULE_CODE
, FUNCTION
)
138 STRINGIFY_CODE(MODULE_CODE
, ALIAS
)
139 STRINGIFY_CODE(MODULE_CODE
, GCNAME
)
140 STRINGIFY_CODE(MODULE_CODE
, COMDAT
)
141 STRINGIFY_CODE(MODULE_CODE
, VSTOFFSET
)
142 STRINGIFY_CODE(MODULE_CODE
, METADATA_VALUES_UNUSED
)
143 STRINGIFY_CODE(MODULE_CODE
, SOURCE_FILENAME
)
144 STRINGIFY_CODE(MODULE_CODE
, HASH
)
146 case bitc::IDENTIFICATION_BLOCK_ID
:
150 STRINGIFY_CODE(IDENTIFICATION_CODE
, STRING
)
151 STRINGIFY_CODE(IDENTIFICATION_CODE
, EPOCH
)
153 case bitc::PARAMATTR_BLOCK_ID
:
157 // FIXME: Should these be different?
158 case bitc::PARAMATTR_CODE_ENTRY_OLD
:
160 case bitc::PARAMATTR_CODE_ENTRY
:
163 case bitc::PARAMATTR_GROUP_BLOCK_ID
:
167 case bitc::PARAMATTR_GRP_CODE_ENTRY
:
170 case bitc::TYPE_BLOCK_ID_NEW
:
174 STRINGIFY_CODE(TYPE_CODE
, NUMENTRY
)
175 STRINGIFY_CODE(TYPE_CODE
, VOID
)
176 STRINGIFY_CODE(TYPE_CODE
, FLOAT
)
177 STRINGIFY_CODE(TYPE_CODE
, DOUBLE
)
178 STRINGIFY_CODE(TYPE_CODE
, LABEL
)
179 STRINGIFY_CODE(TYPE_CODE
, OPAQUE
)
180 STRINGIFY_CODE(TYPE_CODE
, INTEGER
)
181 STRINGIFY_CODE(TYPE_CODE
, POINTER
)
182 STRINGIFY_CODE(TYPE_CODE
, HALF
)
183 STRINGIFY_CODE(TYPE_CODE
, ARRAY
)
184 STRINGIFY_CODE(TYPE_CODE
, VECTOR
)
185 STRINGIFY_CODE(TYPE_CODE
, X86_FP80
)
186 STRINGIFY_CODE(TYPE_CODE
, FP128
)
187 STRINGIFY_CODE(TYPE_CODE
, PPC_FP128
)
188 STRINGIFY_CODE(TYPE_CODE
, METADATA
)
189 STRINGIFY_CODE(TYPE_CODE
, X86_MMX
)
190 STRINGIFY_CODE(TYPE_CODE
, STRUCT_ANON
)
191 STRINGIFY_CODE(TYPE_CODE
, STRUCT_NAME
)
192 STRINGIFY_CODE(TYPE_CODE
, STRUCT_NAMED
)
193 STRINGIFY_CODE(TYPE_CODE
, FUNCTION
)
194 STRINGIFY_CODE(TYPE_CODE
, TOKEN
)
195 STRINGIFY_CODE(TYPE_CODE
, BFLOAT
)
198 case bitc::CONSTANTS_BLOCK_ID
:
202 STRINGIFY_CODE(CST_CODE
, SETTYPE
)
203 STRINGIFY_CODE(CST_CODE
, NULL
)
204 STRINGIFY_CODE(CST_CODE
, UNDEF
)
205 STRINGIFY_CODE(CST_CODE
, INTEGER
)
206 STRINGIFY_CODE(CST_CODE
, WIDE_INTEGER
)
207 STRINGIFY_CODE(CST_CODE
, FLOAT
)
208 STRINGIFY_CODE(CST_CODE
, AGGREGATE
)
209 STRINGIFY_CODE(CST_CODE
, STRING
)
210 STRINGIFY_CODE(CST_CODE
, CSTRING
)
211 STRINGIFY_CODE(CST_CODE
, CE_BINOP
)
212 STRINGIFY_CODE(CST_CODE
, CE_CAST
)
213 STRINGIFY_CODE(CST_CODE
, CE_GEP
)
214 STRINGIFY_CODE(CST_CODE
, CE_INBOUNDS_GEP
)
215 STRINGIFY_CODE(CST_CODE
, CE_SELECT
)
216 STRINGIFY_CODE(CST_CODE
, CE_EXTRACTELT
)
217 STRINGIFY_CODE(CST_CODE
, CE_INSERTELT
)
218 STRINGIFY_CODE(CST_CODE
, CE_SHUFFLEVEC
)
219 STRINGIFY_CODE(CST_CODE
, CE_CMP
)
220 STRINGIFY_CODE(CST_CODE
, INLINEASM
)
221 STRINGIFY_CODE(CST_CODE
, CE_SHUFVEC_EX
)
222 STRINGIFY_CODE(CST_CODE
, CE_UNOP
)
223 STRINGIFY_CODE(CST_CODE
, DSO_LOCAL_EQUIVALENT
)
224 STRINGIFY_CODE(CST_CODE
, NO_CFI_VALUE
)
225 STRINGIFY_CODE(CST_CODE
, PTRAUTH
)
226 case bitc::CST_CODE_BLOCKADDRESS
:
227 return "CST_CODE_BLOCKADDRESS";
228 STRINGIFY_CODE(CST_CODE
, DATA
)
230 case bitc::FUNCTION_BLOCK_ID
:
234 STRINGIFY_CODE(FUNC_CODE
, DECLAREBLOCKS
)
235 STRINGIFY_CODE(FUNC_CODE
, INST_BINOP
)
236 STRINGIFY_CODE(FUNC_CODE
, INST_CAST
)
237 STRINGIFY_CODE(FUNC_CODE
, INST_GEP_OLD
)
238 STRINGIFY_CODE(FUNC_CODE
, INST_INBOUNDS_GEP_OLD
)
239 STRINGIFY_CODE(FUNC_CODE
, INST_SELECT
)
240 STRINGIFY_CODE(FUNC_CODE
, INST_EXTRACTELT
)
241 STRINGIFY_CODE(FUNC_CODE
, INST_INSERTELT
)
242 STRINGIFY_CODE(FUNC_CODE
, INST_SHUFFLEVEC
)
243 STRINGIFY_CODE(FUNC_CODE
, INST_CMP
)
244 STRINGIFY_CODE(FUNC_CODE
, INST_RET
)
245 STRINGIFY_CODE(FUNC_CODE
, INST_BR
)
246 STRINGIFY_CODE(FUNC_CODE
, INST_SWITCH
)
247 STRINGIFY_CODE(FUNC_CODE
, INST_INVOKE
)
248 STRINGIFY_CODE(FUNC_CODE
, INST_UNOP
)
249 STRINGIFY_CODE(FUNC_CODE
, INST_UNREACHABLE
)
250 STRINGIFY_CODE(FUNC_CODE
, INST_CLEANUPRET
)
251 STRINGIFY_CODE(FUNC_CODE
, INST_CATCHRET
)
252 STRINGIFY_CODE(FUNC_CODE
, INST_CATCHPAD
)
253 STRINGIFY_CODE(FUNC_CODE
, INST_PHI
)
254 STRINGIFY_CODE(FUNC_CODE
, INST_ALLOCA
)
255 STRINGIFY_CODE(FUNC_CODE
, INST_LOAD
)
256 STRINGIFY_CODE(FUNC_CODE
, INST_VAARG
)
257 STRINGIFY_CODE(FUNC_CODE
, INST_STORE
)
258 STRINGIFY_CODE(FUNC_CODE
, INST_EXTRACTVAL
)
259 STRINGIFY_CODE(FUNC_CODE
, INST_INSERTVAL
)
260 STRINGIFY_CODE(FUNC_CODE
, INST_CMP2
)
261 STRINGIFY_CODE(FUNC_CODE
, INST_VSELECT
)
262 STRINGIFY_CODE(FUNC_CODE
, DEBUG_LOC_AGAIN
)
263 STRINGIFY_CODE(FUNC_CODE
, INST_CALL
)
264 STRINGIFY_CODE(FUNC_CODE
, DEBUG_LOC
)
265 STRINGIFY_CODE(FUNC_CODE
, INST_GEP
)
266 STRINGIFY_CODE(FUNC_CODE
, OPERAND_BUNDLE
)
267 STRINGIFY_CODE(FUNC_CODE
, INST_FENCE
)
268 STRINGIFY_CODE(FUNC_CODE
, INST_ATOMICRMW
)
269 STRINGIFY_CODE(FUNC_CODE
, INST_LOADATOMIC
)
270 STRINGIFY_CODE(FUNC_CODE
, INST_STOREATOMIC
)
271 STRINGIFY_CODE(FUNC_CODE
, INST_CMPXCHG
)
272 STRINGIFY_CODE(FUNC_CODE
, INST_CALLBR
)
273 STRINGIFY_CODE(FUNC_CODE
, BLOCKADDR_USERS
)
274 STRINGIFY_CODE(FUNC_CODE
, DEBUG_RECORD_DECLARE
)
275 STRINGIFY_CODE(FUNC_CODE
, DEBUG_RECORD_VALUE
)
276 STRINGIFY_CODE(FUNC_CODE
, DEBUG_RECORD_ASSIGN
)
277 STRINGIFY_CODE(FUNC_CODE
, DEBUG_RECORD_VALUE_SIMPLE
)
278 STRINGIFY_CODE(FUNC_CODE
, DEBUG_RECORD_LABEL
)
280 case bitc::VALUE_SYMTAB_BLOCK_ID
:
284 STRINGIFY_CODE(VST_CODE
, ENTRY
)
285 STRINGIFY_CODE(VST_CODE
, BBENTRY
)
286 STRINGIFY_CODE(VST_CODE
, FNENTRY
)
287 STRINGIFY_CODE(VST_CODE
, COMBINED_ENTRY
)
289 case bitc::MODULE_STRTAB_BLOCK_ID
:
293 STRINGIFY_CODE(MST_CODE
, ENTRY
)
294 STRINGIFY_CODE(MST_CODE
, HASH
)
296 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID
:
297 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID
:
301 STRINGIFY_CODE(FS
, PERMODULE
)
302 STRINGIFY_CODE(FS
, PERMODULE_PROFILE
)
303 STRINGIFY_CODE(FS
, PERMODULE_RELBF
)
304 STRINGIFY_CODE(FS
, PERMODULE_GLOBALVAR_INIT_REFS
)
305 STRINGIFY_CODE(FS
, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS
)
306 STRINGIFY_CODE(FS
, COMBINED
)
307 STRINGIFY_CODE(FS
, COMBINED_PROFILE
)
308 STRINGIFY_CODE(FS
, COMBINED_GLOBALVAR_INIT_REFS
)
309 STRINGIFY_CODE(FS
, ALIAS
)
310 STRINGIFY_CODE(FS
, COMBINED_ALIAS
)
311 STRINGIFY_CODE(FS
, COMBINED_ORIGINAL_NAME
)
312 STRINGIFY_CODE(FS
, VERSION
)
313 STRINGIFY_CODE(FS
, FLAGS
)
314 STRINGIFY_CODE(FS
, TYPE_TESTS
)
315 STRINGIFY_CODE(FS
, TYPE_TEST_ASSUME_VCALLS
)
316 STRINGIFY_CODE(FS
, TYPE_CHECKED_LOAD_VCALLS
)
317 STRINGIFY_CODE(FS
, TYPE_TEST_ASSUME_CONST_VCALL
)
318 STRINGIFY_CODE(FS
, TYPE_CHECKED_LOAD_CONST_VCALL
)
319 STRINGIFY_CODE(FS
, VALUE_GUID
)
320 STRINGIFY_CODE(FS
, CFI_FUNCTION_DEFS
)
321 STRINGIFY_CODE(FS
, CFI_FUNCTION_DECLS
)
322 STRINGIFY_CODE(FS
, TYPE_ID
)
323 STRINGIFY_CODE(FS
, TYPE_ID_METADATA
)
324 STRINGIFY_CODE(FS
, BLOCK_COUNT
)
325 STRINGIFY_CODE(FS
, PARAM_ACCESS
)
326 STRINGIFY_CODE(FS
, PERMODULE_CALLSITE_INFO
)
327 STRINGIFY_CODE(FS
, PERMODULE_ALLOC_INFO
)
328 STRINGIFY_CODE(FS
, COMBINED_CALLSITE_INFO
)
329 STRINGIFY_CODE(FS
, COMBINED_ALLOC_INFO
)
330 STRINGIFY_CODE(FS
, STACK_IDS
)
331 STRINGIFY_CODE(FS
, ALLOC_CONTEXT_IDS
)
333 case bitc::METADATA_ATTACHMENT_ID
:
337 STRINGIFY_CODE(METADATA
, ATTACHMENT
)
339 case bitc::METADATA_BLOCK_ID
:
343 STRINGIFY_CODE(METADATA
, STRING_OLD
)
344 STRINGIFY_CODE(METADATA
, VALUE
)
345 STRINGIFY_CODE(METADATA
, NODE
)
346 STRINGIFY_CODE(METADATA
, NAME
)
347 STRINGIFY_CODE(METADATA
, DISTINCT_NODE
)
348 STRINGIFY_CODE(METADATA
, KIND
) // Older bitcode has it in a MODULE_BLOCK
349 STRINGIFY_CODE(METADATA
, LOCATION
)
350 STRINGIFY_CODE(METADATA
, OLD_NODE
)
351 STRINGIFY_CODE(METADATA
, OLD_FN_NODE
)
352 STRINGIFY_CODE(METADATA
, NAMED_NODE
)
353 STRINGIFY_CODE(METADATA
, GENERIC_DEBUG
)
354 STRINGIFY_CODE(METADATA
, SUBRANGE
)
355 STRINGIFY_CODE(METADATA
, ENUMERATOR
)
356 STRINGIFY_CODE(METADATA
, BASIC_TYPE
)
357 STRINGIFY_CODE(METADATA
, FILE)
358 STRINGIFY_CODE(METADATA
, DERIVED_TYPE
)
359 STRINGIFY_CODE(METADATA
, COMPOSITE_TYPE
)
360 STRINGIFY_CODE(METADATA
, SUBROUTINE_TYPE
)
361 STRINGIFY_CODE(METADATA
, COMPILE_UNIT
)
362 STRINGIFY_CODE(METADATA
, SUBPROGRAM
)
363 STRINGIFY_CODE(METADATA
, LEXICAL_BLOCK
)
364 STRINGIFY_CODE(METADATA
, LEXICAL_BLOCK_FILE
)
365 STRINGIFY_CODE(METADATA
, NAMESPACE
)
366 STRINGIFY_CODE(METADATA
, TEMPLATE_TYPE
)
367 STRINGIFY_CODE(METADATA
, TEMPLATE_VALUE
)
368 STRINGIFY_CODE(METADATA
, GLOBAL_VAR
)
369 STRINGIFY_CODE(METADATA
, LOCAL_VAR
)
370 STRINGIFY_CODE(METADATA
, EXPRESSION
)
371 STRINGIFY_CODE(METADATA
, OBJC_PROPERTY
)
372 STRINGIFY_CODE(METADATA
, IMPORTED_ENTITY
)
373 STRINGIFY_CODE(METADATA
, MODULE
)
374 STRINGIFY_CODE(METADATA
, MACRO
)
375 STRINGIFY_CODE(METADATA
, MACRO_FILE
)
376 STRINGIFY_CODE(METADATA
, STRINGS
)
377 STRINGIFY_CODE(METADATA
, GLOBAL_DECL_ATTACHMENT
)
378 STRINGIFY_CODE(METADATA
, GLOBAL_VAR_EXPR
)
379 STRINGIFY_CODE(METADATA
, INDEX_OFFSET
)
380 STRINGIFY_CODE(METADATA
, INDEX
)
381 STRINGIFY_CODE(METADATA
, ARG_LIST
)
383 case bitc::METADATA_KIND_BLOCK_ID
:
387 STRINGIFY_CODE(METADATA
, KIND
)
389 case bitc::USELIST_BLOCK_ID
:
393 case bitc::USELIST_CODE_DEFAULT
:
394 return "USELIST_CODE_DEFAULT";
395 case bitc::USELIST_CODE_BB
:
396 return "USELIST_CODE_BB";
399 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID
:
403 case bitc::OPERAND_BUNDLE_TAG
:
404 return "OPERAND_BUNDLE_TAG";
406 case bitc::STRTAB_BLOCK_ID
:
410 case bitc::STRTAB_BLOB
:
413 case bitc::SYMTAB_BLOCK_ID
:
417 case bitc::SYMTAB_BLOB
:
421 #undef STRINGIFY_CODE
424 static void printSize(raw_ostream
&OS
, double Bits
) {
425 OS
<< format("%.2f/%.2fB/%luW", Bits
, Bits
/ 8, (unsigned long)(Bits
/ 32));
427 static void printSize(raw_ostream
&OS
, uint64_t Bits
) {
428 OS
<< format("%lub/%.2fB/%luW", (unsigned long)Bits
, (double)Bits
/ 8,
429 (unsigned long)(Bits
/ 32));
432 static Expected
<CurStreamTypeType
> ReadSignature(BitstreamCursor
&Stream
) {
433 auto tryRead
= [&Stream
](char &Dest
, size_t size
) -> Error
{
434 if (Expected
<SimpleBitstreamCursor::word_t
> MaybeWord
= Stream
.Read(size
))
435 Dest
= MaybeWord
.get();
437 return MaybeWord
.takeError();
438 return Error::success();
442 if (Error Err
= tryRead(Signature
[0], 8))
443 return std::move(Err
);
444 if (Error Err
= tryRead(Signature
[1], 8))
445 return std::move(Err
);
447 // Autodetect the file contents, if it is one we know.
448 if (Signature
[0] == 'C' && Signature
[1] == 'P') {
449 if (Error Err
= tryRead(Signature
[2], 8))
450 return std::move(Err
);
451 if (Error Err
= tryRead(Signature
[3], 8))
452 return std::move(Err
);
453 if (Signature
[2] == 'C' && Signature
[3] == 'H')
454 return ClangSerializedASTBitstream
;
455 } else if (Signature
[0] == 'D' && Signature
[1] == 'I') {
456 if (Error Err
= tryRead(Signature
[2], 8))
457 return std::move(Err
);
458 if (Error Err
= tryRead(Signature
[3], 8))
459 return std::move(Err
);
460 if (Signature
[2] == 'A' && Signature
[3] == 'G')
461 return ClangSerializedDiagnosticsBitstream
;
462 } else if (Signature
[0] == 'R' && Signature
[1] == 'M') {
463 if (Error Err
= tryRead(Signature
[2], 8))
464 return std::move(Err
);
465 if (Error Err
= tryRead(Signature
[3], 8))
466 return std::move(Err
);
467 if (Signature
[2] == 'R' && Signature
[3] == 'K')
468 return LLVMBitstreamRemarks
;
470 if (Error Err
= tryRead(Signature
[2], 4))
471 return std::move(Err
);
472 if (Error Err
= tryRead(Signature
[3], 4))
473 return std::move(Err
);
474 if (Error Err
= tryRead(Signature
[4], 4))
475 return std::move(Err
);
476 if (Error Err
= tryRead(Signature
[5], 4))
477 return std::move(Err
);
478 if (Signature
[0] == 'B' && Signature
[1] == 'C' && Signature
[2] == 0x0 &&
479 Signature
[3] == 0xC && Signature
[4] == 0xE && Signature
[5] == 0xD)
480 return LLVMIRBitstream
;
482 return UnknownBitstream
;
485 static Expected
<CurStreamTypeType
> analyzeHeader(std::optional
<BCDumpOptions
> O
,
486 BitstreamCursor
&Stream
) {
487 ArrayRef
<uint8_t> Bytes
= Stream
.getBitcodeBytes();
488 const unsigned char *BufPtr
= (const unsigned char *)Bytes
.data();
489 const unsigned char *EndBufPtr
= BufPtr
+ Bytes
.size();
491 // If we have a wrapper header, parse it and ignore the non-bc file
492 // contents. The magic number is 0x0B17C0DE stored in little endian.
493 if (isBitcodeWrapper(BufPtr
, EndBufPtr
)) {
494 if (Bytes
.size() < BWH_HeaderSize
)
495 return reportError("Invalid bitcode wrapper header");
498 unsigned Magic
= support::endian::read32le(&BufPtr
[BWH_MagicField
]);
499 unsigned Version
= support::endian::read32le(&BufPtr
[BWH_VersionField
]);
500 unsigned Offset
= support::endian::read32le(&BufPtr
[BWH_OffsetField
]);
501 unsigned Size
= support::endian::read32le(&BufPtr
[BWH_SizeField
]);
502 unsigned CPUType
= support::endian::read32le(&BufPtr
[BWH_CPUTypeField
]);
504 O
->OS
<< "<BITCODE_WRAPPER_HEADER"
505 << " Magic=" << format_hex(Magic
, 10)
506 << " Version=" << format_hex(Version
, 10)
507 << " Offset=" << format_hex(Offset
, 10)
508 << " Size=" << format_hex(Size
, 10)
509 << " CPUType=" << format_hex(CPUType
, 10) << "/>\n";
512 if (SkipBitcodeWrapperHeader(BufPtr
, EndBufPtr
, true))
513 return reportError("Invalid bitcode wrapper header");
516 // Use the cursor modified by skipping the wrapper header.
517 Stream
= BitstreamCursor(ArrayRef
<uint8_t>(BufPtr
, EndBufPtr
));
519 return ReadSignature(Stream
);
522 static bool canDecodeBlob(unsigned Code
, unsigned BlockID
) {
523 return BlockID
== bitc::METADATA_BLOCK_ID
&& Code
== bitc::METADATA_STRINGS
;
526 Error
BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent
,
527 ArrayRef
<uint64_t> Record
,
531 return reportError("Cannot decode empty blob.");
533 if (Record
.size() != 2)
535 "Decoding metadata strings blob needs two record entries.");
537 unsigned NumStrings
= Record
[0];
538 unsigned StringsOffset
= Record
[1];
539 OS
<< " num-strings = " << NumStrings
<< " {\n";
541 StringRef Lengths
= Blob
.slice(0, StringsOffset
);
542 SimpleBitstreamCursor
R(Lengths
);
543 StringRef Strings
= Blob
.drop_front(StringsOffset
);
545 if (R
.AtEndOfStream())
546 return reportError("bad length");
549 if (Error E
= R
.ReadVBR(6).moveInto(Size
))
551 if (Strings
.size() < Size
)
552 return reportError("truncated chars");
554 OS
<< Indent
<< " '";
555 OS
.write_escaped(Strings
.slice(0, Size
), /*hex=*/true);
557 Strings
= Strings
.drop_front(Size
);
558 } while (--NumStrings
);
560 OS
<< Indent
<< " }";
561 return Error::success();
564 BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer
,
565 std::optional
<StringRef
> BlockInfoBuffer
)
568 BlockInfoStream
.emplace(*BlockInfoBuffer
);
571 Error
BitcodeAnalyzer::analyze(std::optional
<BCDumpOptions
> O
,
572 std::optional
<StringRef
> CheckHash
) {
573 if (Error E
= analyzeHeader(O
, Stream
).moveInto(CurStreamType
))
576 Stream
.setBlockInfo(&BlockInfo
);
578 // Read block info from BlockInfoStream, if specified.
579 // The block info must be a top-level block.
580 if (BlockInfoStream
) {
581 BitstreamCursor
BlockInfoCursor(*BlockInfoStream
);
582 if (Error E
= analyzeHeader(O
, BlockInfoCursor
).takeError())
585 while (!BlockInfoCursor
.AtEndOfStream()) {
586 Expected
<unsigned> MaybeCode
= BlockInfoCursor
.ReadCode();
588 return MaybeCode
.takeError();
589 if (MaybeCode
.get() != bitc::ENTER_SUBBLOCK
)
590 return reportError("Invalid record at top-level in block info file");
592 Expected
<unsigned> MaybeBlockID
= BlockInfoCursor
.ReadSubBlockID();
594 return MaybeBlockID
.takeError();
595 if (MaybeBlockID
.get() == bitc::BLOCKINFO_BLOCK_ID
) {
596 std::optional
<BitstreamBlockInfo
> NewBlockInfo
;
598 BlockInfoCursor
.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true)
599 .moveInto(NewBlockInfo
))
602 return reportError("Malformed BlockInfoBlock in block info file");
603 BlockInfo
= std::move(*NewBlockInfo
);
607 if (Error Err
= BlockInfoCursor
.SkipBlock())
612 // Parse the top-level structure. We only allow blocks at the top-level.
613 while (!Stream
.AtEndOfStream()) {
614 Expected
<unsigned> MaybeCode
= Stream
.ReadCode();
616 return MaybeCode
.takeError();
617 if (MaybeCode
.get() != bitc::ENTER_SUBBLOCK
)
618 return reportError("Invalid record at top-level");
620 Expected
<unsigned> MaybeBlockID
= Stream
.ReadSubBlockID();
622 return MaybeBlockID
.takeError();
624 if (Error E
= parseBlock(MaybeBlockID
.get(), 0, O
, CheckHash
))
629 return Error::success();
632 void BitcodeAnalyzer::printStats(BCDumpOptions O
,
633 std::optional
<StringRef
> Filename
) {
634 uint64_t BufferSizeBits
= Stream
.getBitcodeBytes().size() * CHAR_BIT
;
635 // Print a summary of the read file.
638 O
.OS
<< "of " << Filename
->data() << ":\n";
639 O
.OS
<< " Total size: ";
640 printSize(O
.OS
, BufferSizeBits
);
642 O
.OS
<< " Stream type: ";
643 switch (CurStreamType
) {
644 case UnknownBitstream
:
647 case LLVMIRBitstream
:
650 case ClangSerializedASTBitstream
:
651 O
.OS
<< "Clang Serialized AST\n";
653 case ClangSerializedDiagnosticsBitstream
:
654 O
.OS
<< "Clang Serialized Diagnostics\n";
656 case LLVMBitstreamRemarks
:
657 O
.OS
<< "LLVM Remarks\n";
660 O
.OS
<< " # Toplevel Blocks: " << NumTopBlocks
<< "\n";
663 // Emit per-block stats.
664 O
.OS
<< "Per-block Summary:\n";
665 for (const auto &Stat
: BlockIDStats
) {
666 O
.OS
<< " Block ID #" << Stat
.first
;
667 if (std::optional
<const char *> BlockName
=
668 GetBlockName(Stat
.first
, BlockInfo
, CurStreamType
))
669 O
.OS
<< " (" << *BlockName
<< ")";
672 const PerBlockIDStats
&Stats
= Stat
.second
;
673 O
.OS
<< " Num Instances: " << Stats
.NumInstances
<< "\n";
674 O
.OS
<< " Total Size: ";
675 printSize(O
.OS
, Stats
.NumBits
);
677 double pct
= (Stats
.NumBits
* 100.0) / BufferSizeBits
;
678 O
.OS
<< " Percent of file: " << format("%2.4f%%", pct
) << "\n";
679 if (Stats
.NumInstances
> 1) {
680 O
.OS
<< " Average Size: ";
681 printSize(O
.OS
, Stats
.NumBits
/ (double)Stats
.NumInstances
);
683 O
.OS
<< " Tot/Avg SubBlocks: " << Stats
.NumSubBlocks
<< "/"
684 << Stats
.NumSubBlocks
/ (double)Stats
.NumInstances
<< "\n";
685 O
.OS
<< " Tot/Avg Abbrevs: " << Stats
.NumAbbrevs
<< "/"
686 << Stats
.NumAbbrevs
/ (double)Stats
.NumInstances
<< "\n";
687 O
.OS
<< " Tot/Avg Records: " << Stats
.NumRecords
<< "/"
688 << Stats
.NumRecords
/ (double)Stats
.NumInstances
<< "\n";
690 O
.OS
<< " Num SubBlocks: " << Stats
.NumSubBlocks
<< "\n";
691 O
.OS
<< " Num Abbrevs: " << Stats
.NumAbbrevs
<< "\n";
692 O
.OS
<< " Num Records: " << Stats
.NumRecords
<< "\n";
694 if (Stats
.NumRecords
) {
695 double pct
= (Stats
.NumAbbreviatedRecords
* 100.0) / Stats
.NumRecords
;
696 O
.OS
<< " Percent Abbrevs: " << format("%2.4f%%", pct
) << "\n";
700 // Print a histogram of the codes we see.
701 if (O
.Histogram
&& !Stats
.CodeFreq
.empty()) {
702 std::vector
<std::pair
<unsigned, unsigned>> FreqPairs
; // <freq,code>
703 for (unsigned i
= 0, e
= Stats
.CodeFreq
.size(); i
!= e
; ++i
)
704 if (unsigned Freq
= Stats
.CodeFreq
[i
].NumInstances
)
705 FreqPairs
.push_back(std::make_pair(Freq
, i
));
706 llvm::stable_sort(FreqPairs
);
707 std::reverse(FreqPairs
.begin(), FreqPairs
.end());
709 O
.OS
<< "\tRecord Histogram:\n";
710 O
.OS
<< "\t\t Count # Bits b/Rec % Abv Record Kind\n";
711 for (const auto &FreqPair
: FreqPairs
) {
712 const PerRecordStats
&RecStats
= Stats
.CodeFreq
[FreqPair
.second
];
714 O
.OS
<< format("\t\t%7d %9lu", RecStats
.NumInstances
,
715 (unsigned long)RecStats
.TotalBits
);
717 if (RecStats
.NumInstances
> 1)
718 O
.OS
<< format(" %9.1f",
719 (double)RecStats
.TotalBits
/ RecStats
.NumInstances
);
723 if (RecStats
.NumAbbrev
)
724 O
.OS
<< format(" %7.2f", (double)RecStats
.NumAbbrev
/
725 RecStats
.NumInstances
* 100);
730 if (std::optional
<const char *> CodeName
= GetCodeName(
731 FreqPair
.second
, Stat
.first
, BlockInfo
, CurStreamType
))
732 O
.OS
<< *CodeName
<< "\n";
734 O
.OS
<< "UnknownCode" << FreqPair
.second
<< "\n";
741 Error
BitcodeAnalyzer::parseBlock(unsigned BlockID
, unsigned IndentLevel
,
742 std::optional
<BCDumpOptions
> O
,
743 std::optional
<StringRef
> CheckHash
) {
744 std::string
Indent(IndentLevel
* 2, ' ');
745 uint64_t BlockBitStart
= Stream
.GetCurrentBitNo();
747 // Get the statistics for this BlockID.
748 PerBlockIDStats
&BlockStats
= BlockIDStats
[BlockID
];
750 BlockStats
.NumInstances
++;
752 // BLOCKINFO is a special part of the stream.
753 bool DumpRecords
= O
.has_value();
754 if (BlockID
== bitc::BLOCKINFO_BLOCK_ID
) {
755 if (O
&& !O
->DumpBlockinfo
)
756 O
->OS
<< Indent
<< "<BLOCKINFO_BLOCK/>\n";
757 std::optional
<BitstreamBlockInfo
> NewBlockInfo
;
758 if (Error E
= Stream
.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true)
759 .moveInto(NewBlockInfo
))
762 return reportError("Malformed BlockInfoBlock");
763 BlockInfo
= std::move(*NewBlockInfo
);
764 if (Error Err
= Stream
.JumpToBit(BlockBitStart
))
766 // It's not really interesting to dump the contents of the blockinfo
767 // block, so only do it if the user explicitly requests it.
768 DumpRecords
= O
&& O
->DumpBlockinfo
;
771 unsigned NumWords
= 0;
772 if (Error Err
= Stream
.EnterSubBlock(BlockID
, &NumWords
))
775 // Keep it for later, when we see a MODULE_HASH record
776 uint64_t BlockEntryPos
= Stream
.getCurrentByteNo();
778 std::optional
<const char *> BlockName
;
780 O
->OS
<< Indent
<< "<";
781 if ((BlockName
= GetBlockName(BlockID
, BlockInfo
, CurStreamType
)))
784 O
->OS
<< "UnknownBlock" << BlockID
;
786 if (!O
->Symbolic
&& BlockName
)
787 O
->OS
<< " BlockID=" << BlockID
;
789 O
->OS
<< " NumWords=" << NumWords
790 << " BlockCodeSize=" << Stream
.getAbbrevIDWidth() << ">\n";
793 SmallVector
<uint64_t, 64> Record
;
795 // Keep the offset to the metadata index if seen.
796 uint64_t MetadataIndexOffset
= 0;
798 // Read all the records for this block.
800 if (Stream
.AtEndOfStream())
801 return reportError("Premature end of bitstream");
803 uint64_t RecordStartBit
= Stream
.GetCurrentBitNo();
805 BitstreamEntry Entry
;
806 if (Error E
= Stream
.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs
)
810 switch (Entry
.Kind
) {
811 case BitstreamEntry::Error
:
812 return reportError("malformed bitcode file");
813 case BitstreamEntry::EndBlock
: {
814 uint64_t BlockBitEnd
= Stream
.GetCurrentBitNo();
815 BlockStats
.NumBits
+= BlockBitEnd
- BlockBitStart
;
817 O
->OS
<< Indent
<< "</";
819 O
->OS
<< *BlockName
<< ">\n";
821 O
->OS
<< "UnknownBlock" << BlockID
<< ">\n";
823 return Error::success();
826 case BitstreamEntry::SubBlock
: {
827 uint64_t SubBlockBitStart
= Stream
.GetCurrentBitNo();
828 if (Error E
= parseBlock(Entry
.ID
, IndentLevel
+ 1, O
, CheckHash
))
830 ++BlockStats
.NumSubBlocks
;
831 uint64_t SubBlockBitEnd
= Stream
.GetCurrentBitNo();
833 // Don't include subblock sizes in the size of this block.
834 BlockBitStart
+= SubBlockBitEnd
- SubBlockBitStart
;
837 case BitstreamEntry::Record
:
838 // The interesting case.
842 if (Entry
.ID
== bitc::DEFINE_ABBREV
) {
843 if (Error Err
= Stream
.ReadAbbrevRecord())
845 ++BlockStats
.NumAbbrevs
;
851 ++BlockStats
.NumRecords
;
854 uint64_t CurrentRecordPos
= Stream
.GetCurrentBitNo();
856 if (Error E
= Stream
.readRecord(Entry
.ID
, Record
, &Blob
).moveInto(Code
))
859 // Increment the # occurrences of this code.
860 if (BlockStats
.CodeFreq
.size() <= Code
)
861 BlockStats
.CodeFreq
.resize(Code
+ 1);
862 BlockStats
.CodeFreq
[Code
].NumInstances
++;
863 BlockStats
.CodeFreq
[Code
].TotalBits
+=
864 Stream
.GetCurrentBitNo() - RecordStartBit
;
865 if (Entry
.ID
!= bitc::UNABBREV_RECORD
) {
866 BlockStats
.CodeFreq
[Code
].NumAbbrev
++;
867 ++BlockStats
.NumAbbreviatedRecords
;
871 O
->OS
<< Indent
<< " <";
872 std::optional
<const char *> CodeName
=
873 GetCodeName(Code
, BlockID
, BlockInfo
, CurStreamType
);
877 O
->OS
<< "UnknownCode" << Code
;
878 if (!O
->Symbolic
&& CodeName
)
879 O
->OS
<< " codeid=" << Code
;
880 const BitCodeAbbrev
*Abbv
= nullptr;
881 if (Entry
.ID
!= bitc::UNABBREV_RECORD
) {
882 Expected
<const BitCodeAbbrev
*> MaybeAbbv
= Stream
.getAbbrev(Entry
.ID
);
884 return MaybeAbbv
.takeError();
885 Abbv
= MaybeAbbv
.get();
886 O
->OS
<< " abbrevid=" << Entry
.ID
;
889 for (unsigned i
= 0, e
= Record
.size(); i
!= e
; ++i
)
890 O
->OS
<< " op" << i
<< "=" << (int64_t)Record
[i
];
892 // If we found a metadata index, let's verify that we had an offset
893 // before and validate its forward reference offset was correct!
894 if (BlockID
== bitc::METADATA_BLOCK_ID
) {
895 if (Code
== bitc::METADATA_INDEX_OFFSET
) {
896 if (Record
.size() != 2)
897 O
->OS
<< "(Invalid record)";
899 auto Offset
= Record
[0] + (Record
[1] << 32);
900 MetadataIndexOffset
= Stream
.GetCurrentBitNo() + Offset
;
903 if (Code
== bitc::METADATA_INDEX
) {
904 O
->OS
<< " (offset ";
905 if (MetadataIndexOffset
== RecordStartBit
)
908 O
->OS
<< "mismatch: " << MetadataIndexOffset
<< " vs "
909 << RecordStartBit
<< ")";
913 // If we found a module hash, let's verify that it matches!
914 if (BlockID
== bitc::MODULE_BLOCK_ID
&& Code
== bitc::MODULE_CODE_HASH
&&
916 if (Record
.size() != 5)
917 O
->OS
<< " (invalid)";
919 // Recompute the hash and compare it to the one in the bitcode
921 std::array
<uint8_t, 20> Hash
;
922 Hasher
.update(*CheckHash
);
924 int BlockSize
= (CurrentRecordPos
/ 8) - BlockEntryPos
;
925 auto Ptr
= Stream
.getPointerToByte(BlockEntryPos
, BlockSize
);
926 Hasher
.update(ArrayRef
<uint8_t>(Ptr
, BlockSize
));
927 Hash
= Hasher
.result();
929 std::array
<uint8_t, 20> RecordedHash
;
931 for (auto &Val
: Record
) {
932 assert(!(Val
>> 32) && "Unexpected high bits set");
933 support::endian::write32be(&RecordedHash
[Pos
], Val
);
936 if (Hash
== RecordedHash
)
939 O
->OS
<< " (!mismatch!)";
946 for (unsigned i
= 1, e
= Abbv
->getNumOperandInfos(); i
!= e
; ++i
) {
947 const BitCodeAbbrevOp
&Op
= Abbv
->getOperandInfo(i
);
948 if (!Op
.isEncoding() || Op
.getEncoding() != BitCodeAbbrevOp::Array
)
950 assert(i
+ 2 == e
&& "Array op not second to last");
952 bool ArrayIsPrintable
= true;
953 for (unsigned j
= i
- 1, je
= Record
.size(); j
!= je
; ++j
) {
954 if (!isPrint(static_cast<unsigned char>(Record
[j
]))) {
955 ArrayIsPrintable
= false;
958 Str
+= (char)Record
[j
];
960 if (ArrayIsPrintable
)
961 O
->OS
<< " record string = '" << Str
<< "'";
967 if (canDecodeBlob(Code
, BlockID
)) {
968 if (Error E
= decodeMetadataStringsBlob(Indent
, Record
, Blob
, O
->OS
))
971 O
->OS
<< " blob data = ";
972 if (O
->ShowBinaryBlobs
) {
974 O
->OS
.write_escaped(Blob
, /*hex=*/true) << "'";
976 bool BlobIsPrintable
= true;
978 if (!isPrint(static_cast<unsigned char>(C
))) {
979 BlobIsPrintable
= false;
984 O
->OS
<< "'" << Blob
<< "'";
986 O
->OS
<< "unprintable, " << Blob
.size() << " bytes.";
994 // Make sure that we can skip the current record.
995 if (Error Err
= Stream
.JumpToBit(CurrentRecordPos
))
997 if (Expected
<unsigned> Skipped
= Stream
.skipRecord(Entry
.ID
))
1000 return Skipped
.takeError();