1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Chris Lattner and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This header defines the BitstreamReader class. This class can be used to
11 // read an arbitrary bitstream, regardless of its contents.
13 //===----------------------------------------------------------------------===//
15 #ifndef BITSTREAM_READER_H
16 #define BITSTREAM_READER_H
18 #include "llvm/Bitcode/BitCodes.h"
25 class BitstreamReader
{
26 const unsigned char *NextChar
;
27 const unsigned char *LastChar
;
28 friend class Deserializer
;
30 /// CurWord - This is the current data we have pulled from the stream but have
31 /// not returned to the client.
34 /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
35 /// is always from [0...31] inclusive.
36 unsigned BitsInCurWord
;
38 // CurCodeSize - This is the declared size of code values used for the current
42 /// CurAbbrevs - Abbrevs installed at in this block.
43 std::vector
<BitCodeAbbrev
*> CurAbbrevs
;
46 unsigned PrevCodeSize
;
47 std::vector
<BitCodeAbbrev
*> PrevAbbrevs
;
48 explicit Block(unsigned PCS
) : PrevCodeSize(PCS
) {}
51 /// BlockScope - This tracks the codesize of parent blocks.
52 SmallVector
<Block
, 8> BlockScope
;
54 /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
55 /// These describe abbreviations that all blocks of the specified ID inherit.
58 std::vector
<BitCodeAbbrev
*> Abbrevs
;
60 std::vector
<BlockInfo
> BlockInfoRecords
;
62 /// FirstChar - This remembers the first byte of the stream.
63 const unsigned char *FirstChar
;
66 NextChar
= FirstChar
= LastChar
= 0;
72 BitstreamReader(const unsigned char *Start
, const unsigned char *End
) {
76 void init(const unsigned char *Start
, const unsigned char *End
) {
77 NextChar
= FirstChar
= Start
;
79 assert(((End
-Start
) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
86 // Abbrevs could still exist if the stream was broken. If so, don't leak
88 for (unsigned i
= 0, e
= CurAbbrevs
.size(); i
!= e
; ++i
)
89 CurAbbrevs
[i
]->dropRef();
91 for (unsigned S
= 0, e
= BlockScope
.size(); S
!= e
; ++S
) {
92 std::vector
<BitCodeAbbrev
*> &Abbrevs
= BlockScope
[S
].PrevAbbrevs
;
93 for (unsigned i
= 0, e
= Abbrevs
.size(); i
!= e
; ++i
)
94 Abbrevs
[i
]->dropRef();
97 // Free the BlockInfoRecords.
98 while (!BlockInfoRecords
.empty()) {
99 BlockInfo
&Info
= BlockInfoRecords
.back();
100 // Free blockinfo abbrev info.
101 for (unsigned i
= 0, e
= Info
.Abbrevs
.size(); i
!= e
; ++i
)
102 Info
.Abbrevs
[i
]->dropRef();
103 BlockInfoRecords
.pop_back();
107 bool AtEndOfStream() const {
108 return NextChar
== LastChar
&& BitsInCurWord
== 0;
111 /// GetCurrentBitNo - Return the bit # of the bit we are reading.
112 uint64_t GetCurrentBitNo() const {
113 return (NextChar
-FirstChar
)*8 + ((32-BitsInCurWord
) & 31);
116 /// JumpToBit - Reset the stream to the specified bit number.
117 void JumpToBit(uint64_t BitNo
) {
118 uintptr_t ByteNo
= uintptr_t(BitNo
/8) & ~3;
119 uintptr_t WordBitNo
= uintptr_t(BitNo
) & 31;
120 assert(ByteNo
< (uintptr_t)(LastChar
-FirstChar
) && "Invalid location");
122 // Move the cursor to the right word.
123 NextChar
= FirstChar
+ByteNo
;
126 // Skip over any bits that are already consumed.
133 /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
134 unsigned GetAbbrevIDWidth() const { return CurCodeSize
; }
136 uint32_t Read(unsigned NumBits
) {
137 // If the field is fully contained by CurWord, return it quickly.
138 if (BitsInCurWord
>= NumBits
) {
139 uint32_t R
= CurWord
& ((1U << NumBits
)-1);
141 BitsInCurWord
-= NumBits
;
145 // If we run out of data, stop at the end of the stream.
146 if (LastChar
== NextChar
) {
152 unsigned R
= CurWord
;
154 // Read the next word from the stream.
155 CurWord
= (NextChar
[0] << 0) | (NextChar
[1] << 8) |
156 (NextChar
[2] << 16) | (NextChar
[3] << 24);
159 // Extract NumBits-BitsInCurWord from what we just read.
160 unsigned BitsLeft
= NumBits
-BitsInCurWord
;
162 // Be careful here, BitsLeft is in the range [1..32] inclusive.
163 R
|= (CurWord
& (~0U >> (32-BitsLeft
))) << BitsInCurWord
;
165 // BitsLeft bits have just been used up from CurWord.
167 CurWord
>>= BitsLeft
;
170 BitsInCurWord
= 32-BitsLeft
;
174 uint64_t Read64(unsigned NumBits
) {
175 if (NumBits
<= 32) return Read(NumBits
);
177 uint64_t V
= Read(32);
178 return V
| (uint64_t)Read(NumBits
-32) << 32;
181 uint32_t ReadVBR(unsigned NumBits
) {
182 uint32_t Piece
= Read(NumBits
);
183 if ((Piece
& (1U << (NumBits
-1))) == 0)
187 unsigned NextBit
= 0;
189 Result
|= (Piece
& ((1U << (NumBits
-1))-1)) << NextBit
;
191 if ((Piece
& (1U << (NumBits
-1))) == 0)
194 NextBit
+= NumBits
-1;
195 Piece
= Read(NumBits
);
199 uint64_t ReadVBR64(unsigned NumBits
) {
200 uint64_t Piece
= Read(NumBits
);
201 if ((Piece
& (1U << (NumBits
-1))) == 0)
205 unsigned NextBit
= 0;
207 Result
|= (Piece
& ((1U << (NumBits
-1))-1)) << NextBit
;
209 if ((Piece
& (1U << (NumBits
-1))) == 0)
212 NextBit
+= NumBits
-1;
213 Piece
= Read(NumBits
);
223 unsigned ReadCode() {
224 return Read(CurCodeSize
);
227 //===--------------------------------------------------------------------===//
228 // Block Manipulation
229 //===--------------------------------------------------------------------===//
232 /// getBlockInfo - If there is block info for the specified ID, return it,
233 /// otherwise return null.
234 BlockInfo
*getBlockInfo(unsigned BlockID
) {
235 // Common case, the most recent entry matches BlockID.
236 if (!BlockInfoRecords
.empty() && BlockInfoRecords
.back().BlockID
== BlockID
)
237 return &BlockInfoRecords
.back();
239 for (unsigned i
= 0, e
= BlockInfoRecords
.size(); i
!= e
; ++i
)
240 if (BlockInfoRecords
[i
].BlockID
== BlockID
)
241 return &BlockInfoRecords
[i
];
248 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
250 /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
252 unsigned ReadSubBlockID() {
253 return ReadVBR(bitc::BlockIDWidth
);
256 /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
257 /// over the body of this block. If the block record is malformed, return
260 // Read and ignore the codelen value. Since we are skipping this block, we
261 // don't care what code widths are used inside of it.
262 ReadVBR(bitc::CodeLenWidth
);
264 unsigned NumWords
= Read(bitc::BlockSizeWidth
);
266 // Check that the block wasn't partially defined, and that the offset isn't
268 if (AtEndOfStream() || NextChar
+NumWords
*4 > LastChar
)
271 NextChar
+= NumWords
*4;
275 /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
276 /// the block, and return true if the block is valid.
277 bool EnterSubBlock(unsigned BlockID
, unsigned *NumWordsP
= 0) {
278 // Save the current block's state on BlockScope.
279 BlockScope
.push_back(Block(CurCodeSize
));
280 BlockScope
.back().PrevAbbrevs
.swap(CurAbbrevs
);
282 // Add the abbrevs specific to this block to the CurAbbrevs list.
283 if (BlockInfo
*Info
= getBlockInfo(BlockID
)) {
284 for (unsigned i
= 0, e
= Info
->Abbrevs
.size(); i
!= e
; ++i
) {
285 CurAbbrevs
.push_back(Info
->Abbrevs
[i
]);
286 CurAbbrevs
.back()->addRef();
290 // Get the codesize of this block.
291 CurCodeSize
= ReadVBR(bitc::CodeLenWidth
);
293 unsigned NumWords
= Read(bitc::BlockSizeWidth
);
294 if (NumWordsP
) *NumWordsP
= NumWords
;
296 // Validate that this block is sane.
297 if (CurCodeSize
== 0 || AtEndOfStream() || NextChar
+NumWords
*4 > LastChar
)
303 bool ReadBlockEnd() {
304 if (BlockScope
.empty()) return true;
307 // [END_BLOCK, <align4bytes>]
315 void PopBlockScope() {
316 CurCodeSize
= BlockScope
.back().PrevCodeSize
;
318 // Delete abbrevs from popped scope.
319 for (unsigned i
= 0, e
= CurAbbrevs
.size(); i
!= e
; ++i
)
320 CurAbbrevs
[i
]->dropRef();
322 BlockScope
.back().PrevAbbrevs
.swap(CurAbbrevs
);
323 BlockScope
.pop_back();
326 //===--------------------------------------------------------------------===//
328 //===--------------------------------------------------------------------===//
331 void ReadAbbreviatedField(const BitCodeAbbrevOp
&Op
,
332 SmallVectorImpl
<uint64_t> &Vals
) {
333 if (Op
.isLiteral()) {
334 // If the abbrev specifies the literal value to use, use it.
335 Vals
.push_back(Op
.getLiteralValue());
337 // Decode the value as we are commanded.
338 switch (Op
.getEncoding()) {
339 default: assert(0 && "Unknown encoding!");
340 case BitCodeAbbrevOp::Fixed
:
341 Vals
.push_back(Read((unsigned)Op
.getEncodingData()));
343 case BitCodeAbbrevOp::VBR
:
344 Vals
.push_back(ReadVBR64((unsigned)Op
.getEncodingData()));
346 case BitCodeAbbrevOp::Char6
:
347 Vals
.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6)));
353 unsigned ReadRecord(unsigned AbbrevID
, SmallVectorImpl
<uint64_t> &Vals
) {
354 if (AbbrevID
== bitc::UNABBREV_RECORD
) {
355 unsigned Code
= ReadVBR(6);
356 unsigned NumElts
= ReadVBR(6);
357 for (unsigned i
= 0; i
!= NumElts
; ++i
)
358 Vals
.push_back(ReadVBR64(6));
362 unsigned AbbrevNo
= AbbrevID
-bitc::FIRST_APPLICATION_ABBREV
;
363 assert(AbbrevNo
< CurAbbrevs
.size() && "Invalid abbrev #!");
364 BitCodeAbbrev
*Abbv
= CurAbbrevs
[AbbrevNo
];
366 for (unsigned i
= 0, e
= Abbv
->getNumOperandInfos(); i
!= e
; ++i
) {
367 const BitCodeAbbrevOp
&Op
= Abbv
->getOperandInfo(i
);
368 if (Op
.isLiteral() || Op
.getEncoding() != BitCodeAbbrevOp::Array
) {
369 ReadAbbreviatedField(Op
, Vals
);
371 // Array case. Read the number of elements as a vbr6.
372 unsigned NumElts
= ReadVBR(6);
374 // Get the element encoding.
375 assert(i
+2 == e
&& "array op not second to last?");
376 const BitCodeAbbrevOp
&EltEnc
= Abbv
->getOperandInfo(++i
);
378 // Read all the elements.
379 for (; NumElts
; --NumElts
)
380 ReadAbbreviatedField(EltEnc
, Vals
);
384 unsigned Code
= (unsigned)Vals
[0];
385 Vals
.erase(Vals
.begin());
389 //===--------------------------------------------------------------------===//
391 //===--------------------------------------------------------------------===//
393 void ReadAbbrevRecord() {
394 BitCodeAbbrev
*Abbv
= new BitCodeAbbrev();
395 unsigned NumOpInfo
= ReadVBR(5);
396 for (unsigned i
= 0; i
!= NumOpInfo
; ++i
) {
397 bool IsLiteral
= Read(1) ? true : false;
399 Abbv
->Add(BitCodeAbbrevOp(ReadVBR64(8)));
403 BitCodeAbbrevOp::Encoding E
= (BitCodeAbbrevOp::Encoding
)Read(3);
404 if (BitCodeAbbrevOp::hasEncodingData(E
))
405 Abbv
->Add(BitCodeAbbrevOp(E
, ReadVBR64(5)));
407 Abbv
->Add(BitCodeAbbrevOp(E
));
409 CurAbbrevs
.push_back(Abbv
);
412 //===--------------------------------------------------------------------===//
413 // BlockInfo Block Reading
414 //===--------------------------------------------------------------------===//
417 BlockInfo
&getOrCreateBlockInfo(unsigned BlockID
) {
418 if (BlockInfo
*BI
= getBlockInfo(BlockID
))
421 // Otherwise, add a new record.
422 BlockInfoRecords
.push_back(BlockInfo());
423 BlockInfoRecords
.back().BlockID
= BlockID
;
424 return BlockInfoRecords
.back();
429 bool ReadBlockInfoBlock() {
430 if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID
)) return true;
432 SmallVector
<uint64_t, 64> Record
;
433 BlockInfo
*CurBlockInfo
= 0;
435 // Read all the records for this module.
437 unsigned Code
= ReadCode();
438 if (Code
== bitc::END_BLOCK
)
439 return ReadBlockEnd();
440 if (Code
== bitc::ENTER_SUBBLOCK
) {
442 if (SkipBlock()) return true;
446 // Read abbrev records, associate them with CurBID.
447 if (Code
== bitc::DEFINE_ABBREV
) {
448 if (!CurBlockInfo
) return true;
451 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the
452 // appropriate BlockInfo.
453 BitCodeAbbrev
*Abbv
= CurAbbrevs
.back();
454 CurAbbrevs
.pop_back();
455 CurBlockInfo
->Abbrevs
.push_back(Abbv
);
461 switch (ReadRecord(Code
, Record
)) {
462 default: break; // Default behavior, ignore unknown content.
463 case bitc::BLOCKINFO_CODE_SETBID
:
464 if (Record
.size() < 1) return true;
465 CurBlockInfo
= &getOrCreateBlockInfo((unsigned)Record
[0]);
472 } // End llvm namespace