1 //===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This header defines the BitstreamWriter class. This class can be used to
10 // write an arbitrary bitstream, regardless of its contents.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_BITCODE_BITSTREAMWRITER_H
15 #define LLVM_BITCODE_BITSTREAMWRITER_H
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Bitcode/BitCodes.h"
22 #include "llvm/Support/Endian.h"
27 class BitstreamWriter
{
28 SmallVectorImpl
<char> &Out
;
30 /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
33 /// CurValue - The current value. Only bits < CurBit are valid.
36 /// CurCodeSize - This is the declared size of code values used for the
37 /// current block, in bits.
40 /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
41 /// selected BLOCK ID.
42 unsigned BlockInfoCurBID
;
44 /// CurAbbrevs - Abbrevs installed at in this block.
45 std::vector
<std::shared_ptr
<BitCodeAbbrev
>> CurAbbrevs
;
48 unsigned PrevCodeSize
;
50 std::vector
<std::shared_ptr
<BitCodeAbbrev
>> PrevAbbrevs
;
51 Block(unsigned PCS
, size_t SSW
) : PrevCodeSize(PCS
), StartSizeWord(SSW
) {}
54 /// BlockScope - This tracks the current blocks that we have entered.
55 std::vector
<Block
> BlockScope
;
57 /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
58 /// These describe abbreviations that all blocks of the specified ID inherit.
61 std::vector
<std::shared_ptr
<BitCodeAbbrev
>> Abbrevs
;
63 std::vector
<BlockInfo
> BlockInfoRecords
;
65 void WriteByte(unsigned char Value
) {
69 void WriteWord(unsigned Value
) {
70 Value
= support::endian::byte_swap
<uint32_t, support::little
>(Value
);
71 Out
.append(reinterpret_cast<const char *>(&Value
),
72 reinterpret_cast<const char *>(&Value
+ 1));
75 size_t GetBufferOffset() const { return Out
.size(); }
77 size_t GetWordIndex() const {
78 size_t Offset
= GetBufferOffset();
79 assert((Offset
& 3) == 0 && "Not 32-bit aligned");
84 explicit BitstreamWriter(SmallVectorImpl
<char> &O
)
85 : Out(O
), CurBit(0), CurValue(0), CurCodeSize(2) {}
88 assert(CurBit
== 0 && "Unflushed data remaining");
89 assert(BlockScope
.empty() && CurAbbrevs
.empty() && "Block imbalance");
92 /// Retrieve the current position in the stream, in bits.
93 uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit
; }
95 /// Retrieve the number of bits currently used to encode an abbrev ID.
96 unsigned GetAbbrevIDWidth() const { return CurCodeSize
; }
98 //===--------------------------------------------------------------------===//
99 // Basic Primitives for emitting bits to the stream.
100 //===--------------------------------------------------------------------===//
102 /// Backpatch a 32-bit word in the output at the given bit offset
103 /// with the specified value.
104 void BackpatchWord(uint64_t BitNo
, unsigned NewWord
) {
105 using namespace llvm::support
;
106 unsigned ByteNo
= BitNo
/ 8;
107 assert((!endian::readAtBitAlignment
<uint32_t, little
, unaligned
>(
108 &Out
[ByteNo
], BitNo
& 7)) &&
109 "Expected to be patching over 0-value placeholders");
110 endian::writeAtBitAlignment
<uint32_t, little
, unaligned
>(
111 &Out
[ByteNo
], NewWord
, BitNo
& 7);
114 void BackpatchWord64(uint64_t BitNo
, uint64_t Val
) {
115 BackpatchWord(BitNo
, (uint32_t)Val
);
116 BackpatchWord(BitNo
+ 32, (uint32_t)(Val
>> 32));
119 void Emit(uint32_t Val
, unsigned NumBits
) {
120 assert(NumBits
&& NumBits
<= 32 && "Invalid value size!");
121 assert((Val
& ~(~0U >> (32-NumBits
))) == 0 && "High bits set!");
122 CurValue
|= Val
<< CurBit
;
123 if (CurBit
+ NumBits
< 32) {
128 // Add the current word.
132 CurValue
= Val
>> (32-CurBit
);
135 CurBit
= (CurBit
+NumBits
) & 31;
146 void EmitVBR(uint32_t Val
, unsigned NumBits
) {
147 assert(NumBits
<= 32 && "Too many bits to emit!");
148 uint32_t Threshold
= 1U << (NumBits
-1);
150 // Emit the bits with VBR encoding, NumBits-1 bits at a time.
151 while (Val
>= Threshold
) {
152 Emit((Val
& ((1 << (NumBits
-1))-1)) | (1 << (NumBits
-1)), NumBits
);
159 void EmitVBR64(uint64_t Val
, unsigned NumBits
) {
160 assert(NumBits
<= 32 && "Too many bits to emit!");
161 if ((uint32_t)Val
== Val
)
162 return EmitVBR((uint32_t)Val
, NumBits
);
164 uint32_t Threshold
= 1U << (NumBits
-1);
166 // Emit the bits with VBR encoding, NumBits-1 bits at a time.
167 while (Val
>= Threshold
) {
168 Emit(((uint32_t)Val
& ((1 << (NumBits
-1))-1)) |
169 (1 << (NumBits
-1)), NumBits
);
173 Emit((uint32_t)Val
, NumBits
);
176 /// EmitCode - Emit the specified code.
177 void EmitCode(unsigned Val
) {
178 Emit(Val
, CurCodeSize
);
181 //===--------------------------------------------------------------------===//
182 // Block Manipulation
183 //===--------------------------------------------------------------------===//
185 /// getBlockInfo - If there is block info for the specified ID, return it,
186 /// otherwise return null.
187 BlockInfo
*getBlockInfo(unsigned BlockID
) {
188 // Common case, the most recent entry matches BlockID.
189 if (!BlockInfoRecords
.empty() && BlockInfoRecords
.back().BlockID
== BlockID
)
190 return &BlockInfoRecords
.back();
192 for (unsigned i
= 0, e
= static_cast<unsigned>(BlockInfoRecords
.size());
194 if (BlockInfoRecords
[i
].BlockID
== BlockID
)
195 return &BlockInfoRecords
[i
];
199 void EnterSubblock(unsigned BlockID
, unsigned CodeLen
) {
201 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
202 EmitCode(bitc::ENTER_SUBBLOCK
);
203 EmitVBR(BlockID
, bitc::BlockIDWidth
);
204 EmitVBR(CodeLen
, bitc::CodeLenWidth
);
207 size_t BlockSizeWordIndex
= GetWordIndex();
208 unsigned OldCodeSize
= CurCodeSize
;
210 // Emit a placeholder, which will be replaced when the block is popped.
211 Emit(0, bitc::BlockSizeWidth
);
213 CurCodeSize
= CodeLen
;
215 // Push the outer block's abbrev set onto the stack, start out with an
217 BlockScope
.emplace_back(OldCodeSize
, BlockSizeWordIndex
);
218 BlockScope
.back().PrevAbbrevs
.swap(CurAbbrevs
);
220 // If there is a blockinfo for this BlockID, add all the predefined abbrevs
221 // to the abbrev list.
222 if (BlockInfo
*Info
= getBlockInfo(BlockID
)) {
223 CurAbbrevs
.insert(CurAbbrevs
.end(), Info
->Abbrevs
.begin(),
224 Info
->Abbrevs
.end());
229 assert(!BlockScope
.empty() && "Block scope imbalance!");
230 const Block
&B
= BlockScope
.back();
233 // [END_BLOCK, <align4bytes>]
234 EmitCode(bitc::END_BLOCK
);
237 // Compute the size of the block, in words, not counting the size field.
238 size_t SizeInWords
= GetWordIndex() - B
.StartSizeWord
- 1;
239 uint64_t BitNo
= uint64_t(B
.StartSizeWord
) * 32;
241 // Update the block size field in the header of this sub-block.
242 BackpatchWord(BitNo
, SizeInWords
);
244 // Restore the inner block's code size and abbrev table.
245 CurCodeSize
= B
.PrevCodeSize
;
246 CurAbbrevs
= std::move(B
.PrevAbbrevs
);
247 BlockScope
.pop_back();
250 //===--------------------------------------------------------------------===//
252 //===--------------------------------------------------------------------===//
255 /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
256 /// record. This is a no-op, since the abbrev specifies the literal to use.
257 template<typename uintty
>
258 void EmitAbbreviatedLiteral(const BitCodeAbbrevOp
&Op
, uintty V
) {
259 assert(Op
.isLiteral() && "Not a literal");
260 // If the abbrev specifies the literal value to use, don't emit
262 assert(V
== Op
.getLiteralValue() &&
263 "Invalid abbrev for record!");
266 /// EmitAbbreviatedField - Emit a single scalar field value with the specified
268 template<typename uintty
>
269 void EmitAbbreviatedField(const BitCodeAbbrevOp
&Op
, uintty V
) {
270 assert(!Op
.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
272 // Encode the value as we are commanded.
273 switch (Op
.getEncoding()) {
274 default: llvm_unreachable("Unknown encoding!");
275 case BitCodeAbbrevOp::Fixed
:
276 if (Op
.getEncodingData())
277 Emit((unsigned)V
, (unsigned)Op
.getEncodingData());
279 case BitCodeAbbrevOp::VBR
:
280 if (Op
.getEncodingData())
281 EmitVBR64(V
, (unsigned)Op
.getEncodingData());
283 case BitCodeAbbrevOp::Char6
:
284 Emit(BitCodeAbbrevOp::EncodeChar6((char)V
), 6);
289 /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
290 /// emission code. If BlobData is non-null, then it specifies an array of
291 /// data that should be emitted as part of the Blob or Array operand that is
292 /// known to exist at the end of the record. If Code is specified, then
293 /// it is the record code to emit before the Vals, which must not contain
295 template <typename uintty
>
296 void EmitRecordWithAbbrevImpl(unsigned Abbrev
, ArrayRef
<uintty
> Vals
,
297 StringRef Blob
, Optional
<unsigned> Code
) {
298 const char *BlobData
= Blob
.data();
299 unsigned BlobLen
= (unsigned) Blob
.size();
300 unsigned AbbrevNo
= Abbrev
-bitc::FIRST_APPLICATION_ABBREV
;
301 assert(AbbrevNo
< CurAbbrevs
.size() && "Invalid abbrev #!");
302 const BitCodeAbbrev
*Abbv
= CurAbbrevs
[AbbrevNo
].get();
306 unsigned i
= 0, e
= static_cast<unsigned>(Abbv
->getNumOperandInfos());
308 assert(e
&& "Expected non-empty abbreviation");
309 const BitCodeAbbrevOp
&Op
= Abbv
->getOperandInfo(i
++);
312 EmitAbbreviatedLiteral(Op
, Code
.getValue());
314 assert(Op
.getEncoding() != BitCodeAbbrevOp::Array
&&
315 Op
.getEncoding() != BitCodeAbbrevOp::Blob
&&
316 "Expected literal or scalar");
317 EmitAbbreviatedField(Op
, Code
.getValue());
321 unsigned RecordIdx
= 0;
322 for (; i
!= e
; ++i
) {
323 const BitCodeAbbrevOp
&Op
= Abbv
->getOperandInfo(i
);
324 if (Op
.isLiteral()) {
325 assert(RecordIdx
< Vals
.size() && "Invalid abbrev/record");
326 EmitAbbreviatedLiteral(Op
, Vals
[RecordIdx
]);
328 } else if (Op
.getEncoding() == BitCodeAbbrevOp::Array
) {
330 assert(i
+ 2 == e
&& "array op not second to last?");
331 const BitCodeAbbrevOp
&EltEnc
= Abbv
->getOperandInfo(++i
);
333 // If this record has blob data, emit it, otherwise we must have record
334 // entries to encode this way.
336 assert(RecordIdx
== Vals
.size() &&
337 "Blob data and record entries specified for array!");
338 // Emit a vbr6 to indicate the number of elements present.
339 EmitVBR(static_cast<uint32_t>(BlobLen
), 6);
342 for (unsigned i
= 0; i
!= BlobLen
; ++i
)
343 EmitAbbreviatedField(EltEnc
, (unsigned char)BlobData
[i
]);
345 // Know that blob data is consumed for assertion below.
348 // Emit a vbr6 to indicate the number of elements present.
349 EmitVBR(static_cast<uint32_t>(Vals
.size()-RecordIdx
), 6);
352 for (unsigned e
= Vals
.size(); RecordIdx
!= e
; ++RecordIdx
)
353 EmitAbbreviatedField(EltEnc
, Vals
[RecordIdx
]);
355 } else if (Op
.getEncoding() == BitCodeAbbrevOp::Blob
) {
356 // If this record has blob data, emit it, otherwise we must have record
357 // entries to encode this way.
360 assert(RecordIdx
== Vals
.size() &&
361 "Blob data and record entries specified for blob operand!");
363 assert(Blob
.data() == BlobData
&& "BlobData got moved");
364 assert(Blob
.size() == BlobLen
&& "BlobLen got changed");
368 emitBlob(Vals
.slice(RecordIdx
));
370 } else { // Single scalar field.
371 assert(RecordIdx
< Vals
.size() && "Invalid abbrev/record");
372 EmitAbbreviatedField(Op
, Vals
[RecordIdx
]);
376 assert(RecordIdx
== Vals
.size() && "Not all record operands emitted!");
377 assert(BlobData
== nullptr &&
378 "Blob data specified for record that doesn't use it!");
382 /// Emit a blob, including flushing before and tail-padding.
383 template <class UIntTy
>
384 void emitBlob(ArrayRef
<UIntTy
> Bytes
, bool ShouldEmitSize
= true) {
385 // Emit a vbr6 to indicate the number of elements present.
387 EmitVBR(static_cast<uint32_t>(Bytes
.size()), 6);
389 // Flush to a 32-bit alignment boundary.
392 // Emit literal bytes.
393 for (const auto &B
: Bytes
) {
394 assert(isUInt
<8>(B
) && "Value too large to emit as byte");
395 WriteByte((unsigned char)B
);
398 // Align end to 32-bits.
399 while (GetBufferOffset() & 3)
402 void emitBlob(StringRef Bytes
, bool ShouldEmitSize
= true) {
403 emitBlob(makeArrayRef((const uint8_t *)Bytes
.data(), Bytes
.size()),
407 /// EmitRecord - Emit the specified record to the stream, using an abbrev if
408 /// we have one to compress the output.
409 template <typename Container
>
410 void EmitRecord(unsigned Code
, const Container
&Vals
, unsigned Abbrev
= 0) {
412 // If we don't have an abbrev to use, emit this in its fully unabbreviated
414 auto Count
= static_cast<uint32_t>(makeArrayRef(Vals
).size());
415 EmitCode(bitc::UNABBREV_RECORD
);
418 for (unsigned i
= 0, e
= Count
; i
!= e
; ++i
)
419 EmitVBR64(Vals
[i
], 6);
423 EmitRecordWithAbbrevImpl(Abbrev
, makeArrayRef(Vals
), StringRef(), Code
);
426 /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
427 /// Unlike EmitRecord, the code for the record should be included in Vals as
429 template <typename Container
>
430 void EmitRecordWithAbbrev(unsigned Abbrev
, const Container
&Vals
) {
431 EmitRecordWithAbbrevImpl(Abbrev
, makeArrayRef(Vals
), StringRef(), None
);
434 /// EmitRecordWithBlob - Emit the specified record to the stream, using an
435 /// abbrev that includes a blob at the end. The blob data to emit is
436 /// specified by the pointer and length specified at the end. In contrast to
437 /// EmitRecord, this routine expects that the first entry in Vals is the code
439 template <typename Container
>
440 void EmitRecordWithBlob(unsigned Abbrev
, const Container
&Vals
,
442 EmitRecordWithAbbrevImpl(Abbrev
, makeArrayRef(Vals
), Blob
, None
);
444 template <typename Container
>
445 void EmitRecordWithBlob(unsigned Abbrev
, const Container
&Vals
,
446 const char *BlobData
, unsigned BlobLen
) {
447 return EmitRecordWithAbbrevImpl(Abbrev
, makeArrayRef(Vals
),
448 StringRef(BlobData
, BlobLen
), None
);
451 /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
452 /// that end with an array.
453 template <typename Container
>
454 void EmitRecordWithArray(unsigned Abbrev
, const Container
&Vals
,
456 EmitRecordWithAbbrevImpl(Abbrev
, makeArrayRef(Vals
), Array
, None
);
458 template <typename Container
>
459 void EmitRecordWithArray(unsigned Abbrev
, const Container
&Vals
,
460 const char *ArrayData
, unsigned ArrayLen
) {
461 return EmitRecordWithAbbrevImpl(Abbrev
, makeArrayRef(Vals
),
462 StringRef(ArrayData
, ArrayLen
), None
);
465 //===--------------------------------------------------------------------===//
467 //===--------------------------------------------------------------------===//
470 // Emit the abbreviation as a DEFINE_ABBREV record.
471 void EncodeAbbrev(const BitCodeAbbrev
&Abbv
) {
472 EmitCode(bitc::DEFINE_ABBREV
);
473 EmitVBR(Abbv
.getNumOperandInfos(), 5);
474 for (unsigned i
= 0, e
= static_cast<unsigned>(Abbv
.getNumOperandInfos());
476 const BitCodeAbbrevOp
&Op
= Abbv
.getOperandInfo(i
);
477 Emit(Op
.isLiteral(), 1);
478 if (Op
.isLiteral()) {
479 EmitVBR64(Op
.getLiteralValue(), 8);
481 Emit(Op
.getEncoding(), 3);
482 if (Op
.hasEncodingData())
483 EmitVBR64(Op
.getEncodingData(), 5);
489 /// EmitAbbrev - This emits an abbreviation to the stream. Note that this
490 /// method takes ownership of the specified abbrev.
491 unsigned EmitAbbrev(std::shared_ptr
<BitCodeAbbrev
> Abbv
) {
492 // Emit the abbreviation as a record.
494 CurAbbrevs
.push_back(std::move(Abbv
));
495 return static_cast<unsigned>(CurAbbrevs
.size())-1 +
496 bitc::FIRST_APPLICATION_ABBREV
;
499 //===--------------------------------------------------------------------===//
500 // BlockInfo Block Emission
501 //===--------------------------------------------------------------------===//
503 /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
504 void EnterBlockInfoBlock() {
505 EnterSubblock(bitc::BLOCKINFO_BLOCK_ID
, 2);
506 BlockInfoCurBID
= ~0U;
507 BlockInfoRecords
.clear();
510 /// SwitchToBlockID - If we aren't already talking about the specified block
511 /// ID, emit a BLOCKINFO_CODE_SETBID record.
512 void SwitchToBlockID(unsigned BlockID
) {
513 if (BlockInfoCurBID
== BlockID
) return;
514 SmallVector
<unsigned, 2> V
;
515 V
.push_back(BlockID
);
516 EmitRecord(bitc::BLOCKINFO_CODE_SETBID
, V
);
517 BlockInfoCurBID
= BlockID
;
520 BlockInfo
&getOrCreateBlockInfo(unsigned BlockID
) {
521 if (BlockInfo
*BI
= getBlockInfo(BlockID
))
524 // Otherwise, add a new record.
525 BlockInfoRecords
.emplace_back();
526 BlockInfoRecords
.back().BlockID
= BlockID
;
527 return BlockInfoRecords
.back();
532 /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
534 unsigned EmitBlockInfoAbbrev(unsigned BlockID
, std::shared_ptr
<BitCodeAbbrev
> Abbv
) {
535 SwitchToBlockID(BlockID
);
538 // Add the abbrev to the specified block record.
539 BlockInfo
&Info
= getOrCreateBlockInfo(BlockID
);
540 Info
.Abbrevs
.push_back(std::move(Abbv
));
542 return Info
.Abbrevs
.size()-1+bitc::FIRST_APPLICATION_ABBREV
;
547 } // End llvm namespace