1 //===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Reid Spencer and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the llvm::Compressor class, an abstraction for memory
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Config/config.h"
16 #include "llvm/Support/Compressor.h"
17 #include "llvm/ADT/StringExtras.h"
21 #include "bzip2/bzlib.h"
24 enum CompressionTypes
{
29 static int getdata(char*& buffer
, size_t &size
,
30 llvm::Compressor::OutputDataCallback
* cb
, void* context
) {
33 int result
= (*cb
)(buffer
, size
, context
);
34 assert(buffer
!= 0 && "Invalid result from Compressor callback");
35 assert(size
!= 0 && "Invalid result from Compressor callback");
39 static int getdata_uns(char*& buffer
, unsigned &size
,
40 llvm::Compressor::OutputDataCallback
* cb
, void* context
)
43 int Res
= getdata(buffer
, SizeOut
, cb
, context
);
48 //===----------------------------------------------------------------------===//
49 //=== NULLCOMP - a compression like set of routines that just copies data
50 //=== without doing any compression. This is provided so that if the
51 //=== configured environment doesn't have a compression library the
52 //=== program can still work, albeit using more data/memory.
53 //===----------------------------------------------------------------------===//
55 struct NULLCOMP_stream
{
56 // User provided fields
63 size_t output_count
; // Total count of output bytes
66 static void NULLCOMP_init(NULLCOMP_stream
* s
) {
70 static bool NULLCOMP_compress(NULLCOMP_stream
* s
) {
71 assert(s
&& "Invalid NULLCOMP_stream");
72 assert(s
->next_in
!= 0);
73 assert(s
->next_out
!= 0);
74 assert(s
->avail_in
>= 1);
75 assert(s
->avail_out
>= 1);
77 if (s
->avail_out
>= s
->avail_in
) {
78 ::memcpy(s
->next_out
, s
->next_in
, s
->avail_in
);
79 s
->output_count
+= s
->avail_in
;
80 s
->avail_out
-= s
->avail_in
;
81 s
->next_in
+= s
->avail_in
;
85 ::memcpy(s
->next_out
, s
->next_in
, s
->avail_out
);
86 s
->output_count
+= s
->avail_out
;
87 s
->avail_in
-= s
->avail_out
;
88 s
->next_in
+= s
->avail_out
;
94 static bool NULLCOMP_decompress(NULLCOMP_stream
* s
) {
95 assert(s
&& "Invalid NULLCOMP_stream");
96 assert(s
->next_in
!= 0);
97 assert(s
->next_out
!= 0);
98 assert(s
->avail_in
>= 1);
99 assert(s
->avail_out
>= 1);
101 if (s
->avail_out
>= s
->avail_in
) {
102 ::memcpy(s
->next_out
, s
->next_in
, s
->avail_in
);
103 s
->output_count
+= s
->avail_in
;
104 s
->avail_out
-= s
->avail_in
;
105 s
->next_in
+= s
->avail_in
;
109 ::memcpy(s
->next_out
, s
->next_in
, s
->avail_out
);
110 s
->output_count
+= s
->avail_out
;
111 s
->avail_in
-= s
->avail_out
;
112 s
->next_in
+= s
->avail_out
;
118 static void NULLCOMP_end(NULLCOMP_stream
* strm
) {
123 /// This structure is only used when a bytecode file is compressed.
124 /// As bytecode is being decompressed, the memory buffer might need
125 /// to be reallocated. The buffer allocation is handled in a callback
126 /// and this structure is needed to retain information across calls
128 /// @brief An internal buffer object used for handling decompression
129 struct BufferContext
{
132 BufferContext(size_t compressedSize
) {
133 // Null to indicate malloc of a new block
136 // Compute the initial length of the uncompression buffer. Note that this
137 // is twice the length of the compressed buffer and will be doubled again
138 // in the callback for an initial allocation of 4x compressedSize. This
139 // calculation is based on the typical compression ratio of bzip2 on LLVM
140 // bytecode files which typically ranges in the 50%-75% range. Since we
141 // typically get at least 50%, doubling is insufficient. By using a 4x
142 // multiplier on the first allocation, we minimize the impact of having to
143 // copy the buffer on reallocation.
144 size
= compressedSize
*2;
147 /// trimTo - Reduce the size of the buffer down to the specified amount. This
148 /// is useful after have read in the bytecode file to discard extra unused
151 void trimTo(size_t NewSize
) {
152 buff
= (char*)::realloc(buff
, NewSize
);
156 /// This function handles allocation of the buffer used for decompression of
157 /// compressed bytecode files. It is called by Compressor::decompress which is
158 /// called by BytecodeReader::ParseBytecode.
159 static size_t callback(char*&buff
, size_t &sz
, void* ctxt
){
160 // Case the context variable to our BufferContext
161 BufferContext
* bc
= reinterpret_cast<BufferContext
*>(ctxt
);
163 // Compute the new, doubled, size of the block
164 size_t new_size
= bc
->size
* 2;
166 // Extend or allocate the block (realloc(0,n) == malloc(n))
167 char* new_buff
= (char*) ::realloc(bc
->buff
, new_size
);
169 // Figure out what to return to the Compressor. If this is the first call,
170 // then bc->buff will be null. In this case we want to return the entire
171 // buffer because there was no previous allocation. Otherwise, when the
172 // buffer is reallocated, we save the new base pointer in the
173 // BufferContext.buff field but return the address of only the extension,
174 // mid-way through the buffer (since its size was doubled). Furthermore,
175 // the sz result must be 1/2 the total size of the buffer.
176 if (bc
->buff
== 0 ) {
177 buff
= bc
->buff
= new_buff
;
181 buff
= new_buff
+ bc
->size
;
185 // Retain the size of the allocated block
188 // Make sure we fail (return 1) if we didn't get any memory.
189 return (bc
->buff
== 0 ? 1 : 0);
193 } // end anonymous namespace
198 // This structure retains the context when compressing the bytecode file. The
199 // WriteCompressedData function below uses it to keep track of the previously
200 // filled chunk of memory (which it writes) and how many bytes have been
202 struct WriterContext
{
203 // Initialize the context
204 WriterContext(std::ostream
*OS
, size_t CS
)
205 : chunk(0), sz(0), written(0), compSize(CS
), Out(OS
) {}
207 // Make sure we clean up memory
214 void write(size_t size
= 0) {
215 size_t write_size
= (size
== 0 ? sz
: size
);
216 Out
->write(chunk
,write_size
);
217 written
+= write_size
;
223 // This function is a callback used by the Compressor::compress function to
224 // allocate memory for the compression buffer. This function fulfills that
225 // responsibility but also writes the previous (now filled) buffer out to the
227 static size_t callback(char*& buffer
, size_t &size
, void* context
) {
228 // Cast the context to the structure it must point to.
229 WriterContext
* ctxt
= reinterpret_cast<WriterContext
*>(context
);
231 // If there's a previously allocated chunk, it must now be filled with
232 // compressed data, so we write it out and deallocate it.
233 if (ctxt
->chunk
!= 0 && ctxt
->sz
> 0 ) {
237 // Compute the size of the next chunk to allocate. We attempt to allocate
238 // enough memory to handle the compression in a single memory allocation. In
239 // general, the worst we do on compression of bytecode is about 50% so we
240 // conservatively estimate compSize / 2 as the size needed for the
241 // compression buffer. compSize is the size of the compressed data, provided
242 // by WriteBytecodeToFile.
243 size
= ctxt
->sz
= ctxt
->compSize
/ 2;
245 // Allocate the chunks
246 buffer
= ctxt
->chunk
= new char [size
];
248 // We must return 1 if the allocation failed so that the Compressor knows
249 // not to use the buffer pointer.
250 return (ctxt
->chunk
== 0 ? 1 : 0);
253 char* chunk
; // pointer to the chunk of memory filled by compression
254 size_t sz
; // size of chunk
255 size_t written
; // aggregate total of bytes written in all chunks
256 size_t compSize
; // size of the uncompressed buffer
257 std::ostream
* Out
; // The stream we write the data to.
260 } // end anonymous namespace
262 // Compress in one of three ways
263 size_t Compressor::compress(const char* in
, size_t size
,
264 OutputDataCallback
* cb
, void* context
,
265 std::string
* error
) {
266 assert(in
&& "Can't compress null buffer");
267 assert(size
&& "Can't compress empty buffer");
268 assert(cb
&& "Can't compress without a callback function");
272 // For small files, we just don't bother compressing. bzip2 isn't very good
273 // with tiny files and can actually make the file larger, so we just avoid
275 if (size
> 64*1024) {
276 // Set up the bz_stream
281 bzdata
.next_in
= (char*)in
;
282 bzdata
.avail_in
= size
;
284 bzdata
.avail_out
= 0;
285 switch ( BZ2_bzCompressInit(&bzdata
, 5, 0, 100) ) {
286 case BZ_CONFIG_ERROR
:
288 *error
= "bzip2 library mis-compiled";
292 *error
= "Compressor internal error";
296 *error
= "Out of memory";
303 // Get a block of memory
304 if (0 != getdata_uns(bzdata
.next_out
, bzdata
.avail_out
,cb
,context
)) {
305 BZ2_bzCompressEnd(&bzdata
);
307 *error
= "Can't allocate output buffer";
311 // Put compression code in first byte
312 (*bzdata
.next_out
++) = COMP_TYPE_BZIP2
;
316 int bzerr
= BZ_FINISH_OK
;
317 while (BZ_FINISH_OK
== (bzerr
= BZ2_bzCompress(&bzdata
, BZ_FINISH
))) {
318 if (0 != getdata_uns(bzdata
.next_out
, bzdata
.avail_out
,cb
,context
)) {
319 BZ2_bzCompressEnd(&bzdata
);
321 *error
= "Can't allocate output buffer";
326 case BZ_SEQUENCE_ERROR
:
329 *error
= "Param/Sequence error";
332 case BZ_STREAM_END
: break;
335 *error
= "BZip2 Error: " + utostr(unsigned(bzerr
));
340 result
= bzdata
.total_out_lo32
+ 1;
341 if (sizeof(size_t) == sizeof(uint64_t))
342 result
|= static_cast<uint64_t>(bzdata
.total_out_hi32
) << 32;
344 BZ2_bzCompressEnd(&bzdata
);
346 // Do null compression, for small files
347 NULLCOMP_stream sdata
;
348 sdata
.next_in
= (char*)in
;
349 sdata
.avail_in
= size
;
350 NULLCOMP_init(&sdata
);
352 if (0 != getdata(sdata
.next_out
, sdata
.avail_out
,cb
,context
)) {
354 *error
= "Can't allocate output buffer";
358 *(sdata
.next_out
++) = COMP_TYPE_NONE
;
361 while (!NULLCOMP_compress(&sdata
)) {
362 if (0 != getdata(sdata
.next_out
, sdata
.avail_out
,cb
,context
)) {
364 *error
= "Can't allocate output buffer";
369 result
= sdata
.output_count
+ 1;
370 NULLCOMP_end(&sdata
);
375 size_t Compressor::compressToNewBuffer(const char* in
, size_t size
, char*&out
,
376 std::string
* error
) {
377 BufferContext
bc(size
);
378 size_t result
= compress(in
,size
,BufferContext::callback
,(void*)&bc
,error
);
385 Compressor::compressToStream(const char*in
, size_t size
, std::ostream
& out
,
386 std::string
* error
) {
387 // Set up the context and writer
388 WriterContext
ctxt(&out
, size
/ 2);
390 // Compress everything after the magic number (which we'll alter).
391 size_t zipSize
= Compressor::compress(in
,size
,
392 WriterContext::callback
, (void*)&ctxt
,error
);
394 if (zipSize
&& ctxt
.chunk
) {
395 ctxt
.write(zipSize
- ctxt
.written
);
400 // Decompress in one of three ways
401 size_t Compressor::decompress(const char *in
, size_t size
,
402 OutputDataCallback
* cb
, void* context
,
403 std::string
* error
) {
404 assert(in
&& "Can't decompress null buffer");
405 assert(size
> 1 && "Can't decompress empty buffer");
406 assert(cb
&& "Can't decompress without a callback function");
411 case COMP_TYPE_BZIP2
: {
412 // Set up the bz_stream
417 bzdata
.next_in
= (char*)in
;
418 bzdata
.avail_in
= size
- 1;
420 bzdata
.avail_out
= 0;
421 switch ( BZ2_bzDecompressInit(&bzdata
, 0, 0) ) {
422 case BZ_CONFIG_ERROR
:
424 *error
= "bzip2 library mis-compiled";
428 *error
= "Compressor internal error";
432 *error
= "Out of memory";
439 // Get a block of memory
440 if (0 != getdata_uns(bzdata
.next_out
, bzdata
.avail_out
,cb
,context
)) {
441 BZ2_bzDecompressEnd(&bzdata
);
443 *error
= "Can't allocate output buffer";
449 while ( BZ_OK
== (bzerr
= BZ2_bzDecompress(&bzdata
)) &&
450 bzdata
.avail_in
!= 0 ) {
451 if (0 != getdata_uns(bzdata
.next_out
, bzdata
.avail_out
,cb
,context
)) {
452 BZ2_bzDecompressEnd(&bzdata
);
454 *error
= "Can't allocate output buffer";
460 BZ2_bzDecompressEnd(&bzdata
);
463 *error
= "Compressor internal error";
466 BZ2_bzDecompressEnd(&bzdata
);
468 *error
= "Out of memory";
471 BZ2_bzDecompressEnd(&bzdata
);
473 *error
= "Data integrity error";
475 case BZ_DATA_ERROR_MAGIC
:
476 BZ2_bzDecompressEnd(&bzdata
);
478 *error
= "Data is not BZIP2";
481 BZ2_bzDecompressEnd(&bzdata
);
483 *error
= "Insufficient input for bzip2";
485 case BZ_STREAM_END
: break;
487 BZ2_bzDecompressEnd(&bzdata
);
489 *error
= "Unknown result code from bzDecompress";
494 result
= bzdata
.total_out_lo32
;
495 if (sizeof(size_t) == sizeof(uint64_t))
496 result
|= (static_cast<uint64_t>(bzdata
.total_out_hi32
) << 32);
497 BZ2_bzDecompressEnd(&bzdata
);
501 case COMP_TYPE_NONE
: {
502 NULLCOMP_stream sdata
;
503 sdata
.next_in
= (char*)in
;
504 sdata
.avail_in
= size
- 1;
505 NULLCOMP_init(&sdata
);
507 if (0 != getdata(sdata
.next_out
, sdata
.avail_out
,cb
,context
)) {
509 *error
= "Can't allocate output buffer";
513 while (!NULLCOMP_decompress(&sdata
)) {
514 if (0 != getdata(sdata
.next_out
, sdata
.avail_out
,cb
,context
)) {
516 *error
= "Can't allocate output buffer";
521 result
= sdata
.output_count
;
522 NULLCOMP_end(&sdata
);
528 *error
= "Unknown type of compressed data";
536 Compressor::decompressToNewBuffer(const char* in
, size_t size
, char*&out
,
537 std::string
* error
) {
538 BufferContext
bc(size
);
539 size_t result
= decompress(in
,size
,BufferContext::callback
,(void*)&bc
,error
);
545 Compressor::decompressToStream(const char*in
, size_t size
, std::ostream
& out
,
546 std::string
* error
) {
547 // Set up the context and writer
548 WriterContext
ctxt(&out
,size
/ 2);
550 // Decompress everything after the magic number (which we'll alter)
551 size_t zipSize
= Compressor::decompress(in
,size
,
552 WriterContext::callback
, (void*)&ctxt
,error
);
554 if (zipSize
&& ctxt
.chunk
) {
555 ctxt
.write(zipSize
- ctxt
.written
);