1 /* zlib.c --- interface to the zlib compression library
2 Ian Lance Taylor <ian@cygnus.com>
4 This file is part of GNU CVS.
6 GNU CVS is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details. */
16 /* The routines in this file are the interface between the CVS
17 client/server support and the zlib compression library. */
21 #include "pagealign_alloc.h"
23 #if defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT)
31 /* OS/2 doesn't have EIO. FIXME: this whole notion of turning
32 a different error into EIO strikes me as pretty dubious. */
37 /* The compression interface is built upon the buffer data structure.
38 We provide a buffer type which compresses or decompresses the data
39 which passes through it. An input buffer decompresses the data
40 read from an underlying buffer, and an output buffer compresses the
41 data before writing it to an underlying buffer. */
43 /* This structure is the closure field of the buffer. */
45 struct compress_buffer
47 /* The underlying buffer. */
50 /* The compression information. */
55 static void compress_error (int, int, z_stream
*, const char *);
56 static int compress_buffer_input (void *, char *, size_t, size_t, size_t *);
57 static int compress_buffer_output (void *, const char *, size_t, size_t *);
58 static int compress_buffer_flush (void *);
59 static int compress_buffer_block (void *, bool);
60 static int compress_buffer_get_fd (void *);
61 static int compress_buffer_shutdown_input (struct buffer
*);
62 static int compress_buffer_shutdown_output (struct buffer
*);
64 /* Report an error from one of the zlib functions. */
67 compress_error (int status
, int zstatus
, z_stream
*zstr
, const char *msg
)
78 sprintf (buf
, "error %d", zstatus
);
83 zstatus
== Z_ERRNO
? hold_errno
: 0,
89 /* Create a compression buffer. */
91 compress_buffer_initialize (struct buffer
*buf
, int input
, int level
,
92 void (*memory
) (struct buffer
*))
94 struct compress_buffer
*n
;
97 n
= xmalloc (sizeof *n
);
98 memset (n
, 0, sizeof *n
);
104 zstatus
= inflateInit (&n
->zstr
);
106 zstatus
= deflateInit (&n
->zstr
, level
);
108 compress_error (1, zstatus
, &n
->zstr
, "compression initialization");
110 /* There may already be data buffered on BUF. For an output
111 buffer, this is OK, because these routines will just use the
112 buffer routines to append data to the (uncompressed) data
113 already on BUF. An input buffer expects to handle a single
114 buffer_data of buffered input to be uncompressed, so that is OK
115 provided there is only one buffer. At present that is all
116 there ever will be; if this changes, compress_buffer_input must
117 be modified to handle multiple input buffers. */
118 assert (! input
|| buf
->data
== NULL
|| buf
->data
->next
== NULL
);
120 return buf_initialize (input
? compress_buffer_input
: NULL
,
121 input
? NULL
: compress_buffer_output
,
122 input
? NULL
: compress_buffer_flush
,
123 compress_buffer_block
, compress_buffer_get_fd
,
125 ? compress_buffer_shutdown_input
126 : compress_buffer_shutdown_output
),
133 /* Input data from a compression buffer. */
135 compress_buffer_input (void *closure
, char *data
, size_t need
, size_t size
,
138 struct compress_buffer
*cb
= closure
;
139 struct buffer_data
*bd
;
141 assert (cb
->buf
->input
);
143 /* We use a single buffer_data structure to buffer up data which
144 the z_stream structure won't use yet. We can safely store this
145 on cb->buf->data, because we never call the buffer routines on
146 cb->buf; we only call the buffer input routine, since that
147 gives us the semantics we want. As noted in
148 compress_buffer_initialize, the buffer_data structure may
149 already exist, and hold data which was already read and
150 buffered before the decompression began. */
154 bd
= xmalloc (sizeof (struct buffer_data
));
157 bd
->text
= pagealign_xalloc (BUFFER_DATA_SIZE
);
158 if (bd
->text
== NULL
)
168 cb
->zstr
.avail_out
= size
;
169 cb
->zstr
.next_out
= (Bytef
*) data
;
173 int zstatus
, sofar
, status
;
176 /* First try to inflate any data we already have buffered up.
177 This is useful even if we don't have any buffered data,
178 because there may be data buffered inside the z_stream
181 cb
->zstr
.avail_in
= bd
->size
;
182 cb
->zstr
.next_in
= (Bytef
*) bd
->bufp
;
186 zstatus
= inflate (&cb
->zstr
, Z_NO_FLUSH
);
187 if (zstatus
== Z_STREAM_END
)
189 if (zstatus
!= Z_OK
&& zstatus
!= Z_BUF_ERROR
)
191 compress_error (0, zstatus
, &cb
->zstr
, "inflate");
194 } while (cb
->zstr
.avail_in
> 0
195 && cb
->zstr
.avail_out
> 0);
197 bd
->size
= cb
->zstr
.avail_in
;
198 bd
->bufp
= (char *) cb
->zstr
.next_in
;
200 sofar
= size
- cb
->zstr
.avail_out
;
202 if (zstatus
== Z_STREAM_END
)
204 /* If we read any data, then return it, relying on the fact that
205 * we will get Z_STREAM_END on the next read too.
207 if (sofar
> 0) break;
209 /* Otherwise, return EOF. */
213 /* If we have obtained NEED bytes, then return, unless NEED is
214 zero and we haven't obtained anything at all. If NEED is
215 zero, we will attempt at least one nonblocking read and see if
216 we can inflate anything then. */
217 if (sofar
> 0 && sofar
>= need
)
220 /* All our buffered data should have been processed at this
222 assert (bd
->size
== 0);
224 /* This will work well in the server, because this call will
225 do an unblocked read and fetch all the available data. In
226 the client, this will read a single byte from the stdio
227 stream, which will cause us to call inflate once per byte.
228 It would be more efficient if we could make a call which
229 would fetch all the available bytes, and at least one byte. */
231 status
= (*cb
->buf
->input
) (cb
->buf
->closure
, bd
->text
,
232 need
> 0, BUFFER_DATA_SIZE
, &nread
);
235 /* Don't try to recover from memory allcoation errors. */
240 /* If we read any data, then return it, relying on the fact that
241 * we will get the same error reading the underlying buffer
242 * on the next read too.
244 if (sofar
> 0) break;
246 /* Otherwise, return EOF. */
250 /* If we didn't read anything, then presumably the buffer is
251 in nonblocking mode, and we should just get out now with
252 whatever we've inflated. */
263 *got
= size
- cb
->zstr
.avail_out
;
270 extern int gzip_level
;
272 /* Output data to a compression buffer.
275 * gzip_level If GZIP_LEVEL has changed to a value different from
276 * CLOSURE->level, then set the compression level on the
277 * stream to the new value.
280 compress_buffer_output (void *closure
, const char *data
, size_t have
,
283 struct compress_buffer
*cb
= closure
;
285 /* This is only used within the while loop below, but allocated here for
288 static char *buffer
= NULL
;
290 buffer
= pagealign_xalloc (BUFFER_DATA_SIZE
);
292 if (cb
->level
!= gzip_level
)
294 cb
->level
= gzip_level
;
295 deflateParams (&cb
->zstr
, gzip_level
, Z_DEFAULT_STRATEGY
);
298 cb
->zstr
.avail_in
= have
;
299 cb
->zstr
.next_in
= (unsigned char *) data
;
301 while (cb
->zstr
.avail_in
> 0)
305 cb
->zstr
.avail_out
= BUFFER_DATA_SIZE
;
306 cb
->zstr
.next_out
= (unsigned char *) buffer
;
308 zstatus
= deflate (&cb
->zstr
, Z_NO_FLUSH
);
311 compress_error (0, zstatus
, &cb
->zstr
, "deflate");
315 if (cb
->zstr
.avail_out
!= BUFFER_DATA_SIZE
)
316 buf_output (cb
->buf
, buffer
,
317 BUFFER_DATA_SIZE
- cb
->zstr
.avail_out
);
322 /* We will only be here because buf_send_output was called on the
323 compression buffer. That means that we should now call
324 buf_send_output on the underlying buffer. */
325 return buf_send_output (cb
->buf
);
330 /* Flush a compression buffer. */
332 compress_buffer_flush (void *closure
)
334 struct compress_buffer
*cb
= closure
;
336 /* This is only used within the while loop below, but allocated here for
339 static char *buffer
= NULL
;
341 buffer
= pagealign_xalloc (BUFFER_DATA_SIZE
);
343 cb
->zstr
.avail_in
= 0;
344 cb
->zstr
.next_in
= NULL
;
350 cb
->zstr
.avail_out
= BUFFER_DATA_SIZE
;
351 cb
->zstr
.next_out
= (unsigned char *) buffer
;
353 zstatus
= deflate (&cb
->zstr
, Z_SYNC_FLUSH
);
355 /* The deflate function will return Z_BUF_ERROR if it can't do
356 anything, which in this case means that all data has been
358 if (zstatus
== Z_BUF_ERROR
)
363 compress_error (0, zstatus
, &cb
->zstr
, "deflate flush");
367 if (cb
->zstr
.avail_out
!= BUFFER_DATA_SIZE
)
368 buf_output (cb
->buf
, buffer
,
369 BUFFER_DATA_SIZE
- cb
->zstr
.avail_out
);
371 /* If the deflate function did not fill the output buffer,
372 then all data has been flushed. */
373 if (cb
->zstr
.avail_out
> 0)
377 /* Now flush the underlying buffer. Note that if the original
378 call to buf_flush passed 1 for the BLOCK argument, then the
379 buffer will already have been set into blocking mode, so we
380 should always pass 0 here. */
381 return buf_flush (cb
->buf
, 0);
386 /* The block routine for a compression buffer. */
388 compress_buffer_block (void *closure
, bool block
)
390 struct compress_buffer
*cb
= closure
;
393 return set_block (cb
->buf
);
395 return set_nonblock (cb
->buf
);
400 /* Return the file descriptor underlying any child buffers. */
402 compress_buffer_get_fd (void *closure
)
404 struct compress_buffer
*cb
= closure
;
405 return buf_get_fd (cb
->buf
);
410 /* Shut down an input buffer. */
412 compress_buffer_shutdown_input (struct buffer
*buf
)
414 struct compress_buffer
*cb
= buf
->closure
;
417 /* Don't make any attempt to pick up trailing data since we are shutting
418 * down. If the client doesn't know we are shutting down, we might not
419 * see the EOF we are expecting.
422 zstatus
= inflateEnd (&cb
->zstr
);
425 compress_error (0, zstatus
, &cb
->zstr
, "inflateEnd");
429 return buf_shutdown (cb
->buf
);
434 /* Shut down an output buffer. */
436 compress_buffer_shutdown_output (struct buffer
*buf
)
438 struct compress_buffer
*cb
= buf
->closure
;
441 /* This is only used within the while loop below, but allocated here for
444 static char *buffer
= NULL
;
446 buffer
= pagealign_xalloc (BUFFER_DATA_SIZE
);
450 cb
->zstr
.avail_out
= BUFFER_DATA_SIZE
;
451 cb
->zstr
.next_out
= (unsigned char *) buffer
;
453 zstatus
= deflate (&cb
->zstr
, Z_FINISH
);
454 if (zstatus
!= Z_OK
&& zstatus
!= Z_STREAM_END
)
456 compress_error (0, zstatus
, &cb
->zstr
, "deflate finish");
460 if (cb
->zstr
.avail_out
!= BUFFER_DATA_SIZE
)
461 buf_output (cb
->buf
, buffer
,
462 BUFFER_DATA_SIZE
- cb
->zstr
.avail_out
);
463 } while (zstatus
!= Z_STREAM_END
);
465 zstatus
= deflateEnd (&cb
->zstr
);
468 compress_error (0, zstatus
, &cb
->zstr
, "deflateEnd");
472 status
= buf_flush (cb
->buf
, 1);
476 return buf_shutdown (cb
->buf
);
481 /* Here is our librarified gzip implementation. It is very minimal
482 but attempts to be RFC1952 compliant. */
484 /* GZIP ID byte values */
488 /* Compression methods */
489 #define GZIP_CDEFLATE 8
494 #define GZIP_FEXTRA 4
496 #define GZIP_FCOMMENT 16
498 /* BUF should contain SIZE bytes of gzipped data (RFC1952/RFC1951).
499 We are to uncompress the data and write the result to the file
500 descriptor FD. If something goes wrong, give a nonfatal error message
501 mentioning FULLNAME as the name of the file for FD. Return 1 if
502 it is an error we can't recover from. */
505 gunzip_and_write (int fd
, const char *fullname
, unsigned char *buf
,
511 unsigned char outbuf
[32768];
516 error (0, 0, "gzipped data too small - lacks complete header");
519 if (buf
[0] != GZIP_ID1
|| buf
[1] != GZIP_ID2
)
521 error (0, 0, "gzipped data does not start with gzip identification");
524 if (buf
[2] != GZIP_CDEFLATE
)
526 error (0, 0, "only the deflate compression method is supported");
530 /* Skip over the fixed header, and then skip any of the variable-length
531 fields. As we skip each field, we keep pos <= size. The checks
532 on positions and lengths are really checks for malformed or
533 incomplete gzip data. */
535 if (buf
[3] & GZIP_FEXTRA
)
539 error (0, 0, "%s lacks proper gzip XLEN field", fullname
);
542 pos
+= buf
[pos
] + (buf
[pos
+ 1] << 8) + 2;
545 error (0, 0, "%s lacks proper gzip \"extra field\"", fullname
);
550 if (buf
[3] & GZIP_FNAME
)
552 unsigned char *p
= memchr(buf
+ pos
, '\0', size
- pos
);
555 error (0, 0, "%s has bad gzip filename field", fullname
);
560 if (buf
[3] & GZIP_FCOMMENT
)
562 unsigned char *p
= memchr(buf
+ pos
, '\0', size
- pos
);
565 error (0, 0, "%s has bad gzip comment field", fullname
);
570 if (buf
[3] & GZIP_FHCRC
)
575 error (0, 0, "%s has bad gzip CRC16 field", fullname
);
580 /* There could be no data to decompress - check and short circuit. */
583 error (0, 0, "gzip data incomplete for %s (no data)", fullname
);
587 memset (&zstr
, 0, sizeof zstr
);
588 /* Passing a negative argument tells zlib not to look for a zlib
589 (RFC1950) header. This is an undocumented feature; I suppose if
590 we wanted to be anal we could synthesize a header instead,
592 zstatus
= inflateInit2 (&zstr
, -15);
595 compress_error (1, zstatus
, &zstr
, fullname
);
597 /* I don't see why we should have to include the 8 byte trailer in
598 avail_in. But I see that zlib/gzio.c does, and it seemed to fix
599 a fairly rare bug in which we'd get a Z_BUF_ERROR for no obvious
601 zstr
.avail_in
= size
- pos
;
602 zstr
.next_in
= buf
+ pos
;
604 crc
= crc32 (0, NULL
, 0);
608 zstr
.avail_out
= sizeof (outbuf
);
609 zstr
.next_out
= outbuf
;
610 zstatus
= inflate (&zstr
, Z_NO_FLUSH
);
611 if (zstatus
!= Z_STREAM_END
&& zstatus
!= Z_OK
)
613 compress_error (0, zstatus
, &zstr
, fullname
);
616 if (write (fd
, outbuf
, sizeof (outbuf
) - zstr
.avail_out
) < 0)
618 error (0, errno
, "writing decompressed file %s", fullname
);
621 crc
= crc32 (crc
, outbuf
, sizeof (outbuf
) - zstr
.avail_out
);
622 } while (zstatus
!= Z_STREAM_END
);
623 zstatus
= inflateEnd (&zstr
);
625 compress_error (0, zstatus
, &zstr
, fullname
);
627 /* Check that there is still 8 trailer bytes remaining (CRC32
628 and ISIZE). Check total decomp. data, plus header len (pos)
629 against input buffer total size. */
630 pos
+= zstr
.total_in
;
633 error (0, 0, "gzip data incomplete for %s (no trailer)", fullname
);
637 if (crc
!= ((unsigned long)buf
[pos
]
638 + ((unsigned long)buf
[pos
+ 1] << 8)
639 + ((unsigned long)buf
[pos
+ 2] << 16)
640 + ((unsigned long)buf
[pos
+ 3] << 24)))
642 error (0, 0, "CRC error uncompressing %s", fullname
);
646 if (zstr
.total_out
!= ((unsigned long)buf
[pos
+ 4]
647 + ((unsigned long)buf
[pos
+ 5] << 8)
648 + ((unsigned long)buf
[pos
+ 6] << 16)
649 + ((unsigned long)buf
[pos
+ 7] << 24)))
651 error (0, 0, "invalid length uncompressing %s", fullname
);
658 /* Read all of FD and put the gzipped data (RFC1952/RFC1951) into *BUF,
659 replacing previous contents of *BUF. *BUF is xmalloc'd and *SIZE is
660 its allocated size. Put the actual number of bytes of data in
661 *LEN. If something goes wrong, give a nonfatal error mentioning
662 FULLNAME as the name of the file for FD, and return 1 if we can't
663 recover from it). LEVEL is the compression level (1-9). */
666 read_and_gzip (int fd
, const char *fullname
, unsigned char **buf
, size_t *size
,
667 size_t *len
, int level
)
671 unsigned char inbuf
[8192];
677 unsigned char *newbuf
;
680 newbuf
= xrealloc (*buf
, *size
);
683 error (0, 0, "out of memory");
688 (*buf
)[0] = GZIP_ID1
;
689 (*buf
)[1] = GZIP_ID2
;
690 (*buf
)[2] = GZIP_CDEFLATE
;
692 (*buf
)[4] = (*buf
)[5] = (*buf
)[6] = (*buf
)[7] = 0;
693 /* Could set this based on level, but why bother? */
697 memset (&zstr
, 0, sizeof zstr
);
698 zstatus
= deflateInit2 (&zstr
, level
, Z_DEFLATED
, -15, 8,
700 crc
= crc32 (0, NULL
, 0);
703 compress_error (0, zstatus
, &zstr
, fullname
);
707 /* Adjust for 10-byte output header (filled in above) */
709 zstr
.avail_out
= *size
- 10;
710 zstr
.next_out
= *buf
+ 10;
716 nread
= read (fd
, inbuf
, sizeof inbuf
);
719 error (0, errno
, "cannot read %s", fullname
);
725 crc
= crc32 (crc
, inbuf
, nread
);
726 zstr
.next_in
= inbuf
;
727 zstr
.avail_in
= nread
;
731 /* I don't see this documented anywhere, but deflate seems
732 to tend to dump core sometimes if we pass it Z_FINISH and
733 a small (e.g. 2147 byte) avail_out. So we insist on at
734 least 4096 bytes (that is what zlib/gzio.c uses). */
736 if (zstr
.avail_out
< 4096)
738 unsigned char *newbuf
;
740 assert(zstr
.avail_out
+ zstr
.total_out
== *size
);
741 assert(zstr
.next_out
== *buf
+ zstr
.total_out
);
743 newbuf
= xrealloc (*buf
, *size
);
746 error (0, 0, "out of memory");
750 zstr
.next_out
= *buf
+ zstr
.total_out
;
751 zstr
.avail_out
= *size
- zstr
.total_out
;
752 assert(zstr
.avail_out
+ zstr
.total_out
== *size
);
753 assert(zstr
.next_out
== *buf
+ zstr
.total_out
);
756 zstatus
= deflate (&zstr
, finish
? Z_FINISH
: 0);
757 if (zstatus
== Z_STREAM_END
)
759 else if (zstatus
!= Z_OK
)
760 compress_error (0, zstatus
, &zstr
, fullname
);
761 } while (zstr
.avail_out
== 0);
764 /* Need to add the CRC information (8 bytes)
765 to the end of the gzip'd output.
766 Ensure there is enough space in the output buffer
768 if (zstr
.avail_out
< 8)
770 unsigned char *newbuf
;
772 assert(zstr
.avail_out
+ zstr
.total_out
== *size
);
773 assert(zstr
.next_out
== *buf
+ zstr
.total_out
);
774 *size
+= 8 - zstr
.avail_out
;
775 newbuf
= realloc (*buf
, *size
);
778 error (0, 0, "out of memory");
782 zstr
.next_out
= *buf
+ zstr
.total_out
;
783 zstr
.avail_out
= *size
- zstr
.total_out
;
784 assert(zstr
.avail_out
+ zstr
.total_out
== *size
);
785 assert(zstr
.next_out
== *buf
+ zstr
.total_out
);
787 *zstr
.next_out
++ = (unsigned char)(crc
& 0xff);
788 *zstr
.next_out
++ = (unsigned char)((crc
>> 8) & 0xff);
789 *zstr
.next_out
++ = (unsigned char)((crc
>> 16) & 0xff);
790 *zstr
.next_out
++ = (unsigned char)((crc
>> 24) & 0xff);
792 *zstr
.next_out
++ = (unsigned char)(zstr
.total_in
& 0xff);
793 *zstr
.next_out
++ = (unsigned char)((zstr
.total_in
>> 8) & 0xff);
794 *zstr
.next_out
++ = (unsigned char)((zstr
.total_in
>> 16) & 0xff);
795 *zstr
.next_out
++ = (unsigned char)((zstr
.total_in
>> 24) & 0xff);
799 assert(zstr
.avail_out
+ zstr
.total_out
== *size
);
800 assert(zstr
.next_out
== *buf
+ zstr
.total_out
);
802 *len
= zstr
.total_out
;
804 zstatus
= deflateEnd (&zstr
);
806 compress_error (0, zstatus
, &zstr
, fullname
);
810 #endif /* defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT) */