3 /* gzappend -- command to append to a gzip file
5 Copyright (C) 2003 Mark Adler, all rights reserved
6 version 1.1, 4 Nov 2003
8 This software is provided 'as-is', without any express or implied
9 warranty. In no event will the author be held liable for any damages
10 arising from the use of this software.
12 Permission is granted to anyone to use this software for any purpose,
13 including commercial applications, and to alter it and redistribute it
14 freely, subject to the following restrictions:
16 1. The origin of this software must not be misrepresented; you must not
17 claim that you wrote the original software. If you use this software
18 in a product, an acknowledgment in the product documentation would be
19 appreciated but is not required.
20 2. Altered source versions must be plainly marked as such, and must not be
21 misrepresented as being the original software.
22 3. This notice may not be removed or altered from any source distribution.
24 Mark Adler madler@alumni.caltech.edu
30 * 1.0 19 Oct 2003 - First version
31 * 1.1 4 Nov 2003 - Expand and clarify some comments and notes
32 * - Add version and copyright to help
33 * - Send help to stdout instead of stderr
34 * - Add some preemptive typecasts
35 * - Add L to constants in lseek() calls
36 * - Remove some debugging information in error messages
37 * - Use new data_type definition for zlib 1.2.1
38 * - Simplfy and unify file operations
39 * - Finish off gzip file in gztack()
40 * - Use deflatePrime() instead of adding empty blocks
41 * - Keep gzip file clean on appended file read errors
42 * - Use in-place rotate instead of auxiliary buffer
43 * (Why you ask? Because it was fun to write!)
47 gzappend takes a gzip file and appends to it, compressing files from the
48 command line or data from stdin. The gzip file is written to directly, to
49 avoid copying that file, in case it's large. Note that this results in the
50 unfriendly behavior that if gzappend fails, the gzip file is corrupted.
52 This program was written to illustrate the use of the new Z_BLOCK option of
53 zlib 1.2.x's inflate() function. This option returns from inflate() at each
54 block boundary to facilitate locating and modifying the last block bit at
55 the start of the final deflate block. Also whether using Z_BLOCK or not,
56 another required feature of zlib 1.2.x is that inflate() now provides the
57 number of unusued bits in the last input byte used. gzappend will not work
58 with versions of zlib earlier than 1.2.1.
60 gzappend first decompresses the gzip file internally, discarding all but
61 the last 32K of uncompressed data, and noting the location of the last block
62 bit and the number of unused bits in the last byte of the compressed data.
63 The gzip trailer containing the CRC-32 and length of the uncompressed data
64 is verified. This trailer will be later overwritten.
66 Then the last block bit is cleared by seeking back in the file and rewriting
67 the byte that contains it. Seeking forward, the last byte of the compressed
68 data is saved along with the number of unused bits to initialize deflate.
70 A deflate process is initialized, using the last 32K of the uncompressed
71 data from the gzip file to initialize the dictionary. If the total
72 uncompressed data was less than 32K, then all of it is used to initialize
73 the dictionary. The deflate output bit buffer is also initialized with the
74 last bits from the original deflate stream. From here on, the data to
75 append is simply compressed using deflate, and written to the gzip file.
76 When that is complete, the new CRC-32 and uncompressed length are written
77 as the trailer of the gzip file.
89 #define CHUNK (1U << LGCHUNK)
92 /* print an error message and terminate with extreme prejudice */
93 local
void bye(char *msg1
, char *msg2
)
95 fprintf(stderr
, "gzappend error: %s%s\n", msg1
, msg2
);
99 /* return the greatest common divisor of a and b using Euclid's algorithm,
100 modified to be fast when one argument much greater than the other, and
101 coded to avoid unnecessary swapping */
102 local
unsigned gcd(unsigned a
, unsigned b
)
122 /* rotate list[0..len-1] left by rot positions, in place */
123 local
void rotate(unsigned char *list
, unsigned len
, unsigned rot
)
127 unsigned char *start
, *last
, *to
, *from
;
129 /* normalize rot and handle degenerate cases */
131 if (rot
>= len
) rot
%= len
;
132 if (rot
== 0) return;
134 /* pointer to last entry in list */
135 last
= list
+ (len
- 1);
137 /* do simple left shift by one */
140 memcpy(list
, list
+ 1, len
- 1);
145 /* do simple right shift by one */
146 if (rot
== len
- 1) {
148 memmove(list
+ 1, list
, len
- 1);
153 /* otherwise do rotate as a set of cycles in place */
154 cycles
= gcd(len
, rot
); /* number of cycles */
156 start
= from
= list
+ cycles
; /* start index is arbitrary */
157 tmp
= *from
; /* save entry to be overwritten */
159 to
= from
; /* next step in cycle */
160 from
+= rot
; /* go right rot positions */
161 if (from
> last
) from
-= len
; /* (pointer better not wrap) */
162 if (from
== start
) break; /* all but one shifted */
163 *to
= *from
; /* shift left */
165 *to
= tmp
; /* complete the circle */
169 /* structure for gzip file read operations */
171 int fd
; /* file descriptor */
172 int size
; /* 1 << size is bytes in buf */
173 unsigned left
; /* bytes available at next */
174 unsigned char *buf
; /* buffer */
175 unsigned char *next
; /* next byte in buffer */
176 char *name
; /* file name for error messages */
180 local
int readin(file
*in
)
184 len
= read(in
->fd
, in
->buf
, 1 << in
->size
);
185 if (len
== -1) bye("error reading ", in
->name
);
186 in
->left
= (unsigned)len
;
191 /* read from file in, exit if end-of-file */
192 local
int readmore(file
*in
)
194 if (readin(in
) == 0) bye("unexpected end of ", in
->name
);
198 #define read1(in) (in->left == 0 ? readmore(in) : 0, \
199 in->left--, *(in->next)++)
201 /* skip over n bytes of in */
202 local
void skip(file
*in
, unsigned n
)
208 bypass
= n
& ~((1U << in
->size
) - 1);
210 if (lseek(in
->fd
, (off_t
)bypass
, SEEK_CUR
) == -1)
211 bye("seeking ", in
->name
);
216 bye("unexpected end of ", in
->name
);
222 /* read a four-byte unsigned integer, little-endian, from in */
223 unsigned long read4(file
*in
)
228 val
+= (unsigned)read1(in
) << 8;
229 val
+= (unsigned long)read1(in
) << 16;
230 val
+= (unsigned long)read1(in
) << 24;
234 /* skip over gzip header */
235 local
void gzheader(file
*in
)
240 if (read1(in
) != 31 || read1(in
) != 139) bye(in
->name
, " not a gzip file");
241 if (read1(in
) != 8) bye("unknown compression method in", in
->name
);
243 if (flags
& 0xe0) bye("unknown header flags set in", in
->name
);
247 n
+= (unsigned)(read1(in
)) << 8;
250 if (flags
& 8) while (read1(in
) != 0) ;
251 if (flags
& 16) while (read1(in
) != 0) ;
252 if (flags
& 2) skip(in
, 2);
255 /* decompress gzip file "name", return strm with a deflate stream ready to
256 continue compression of the data in the gzip file, and return a file
257 descriptor pointing to where to write the compressed data -- the deflate
258 stream is initialized to compress using level "level" */
259 local
int gzscan(char *name
, z_stream
*strm
, int level
)
261 int ret
, lastbit
, left
, full
;
263 unsigned long crc
, tot
;
264 unsigned char *window
;
270 gz
.fd
= open(name
, O_RDWR
, 0);
271 if (gz
.fd
== -1) bye("cannot open ", name
);
272 gz
.buf
= malloc(CHUNK
);
273 if (gz
.buf
== NULL
) bye("out of memory", "");
277 /* skip gzip header */
280 /* prepare to decompress */
281 window
= malloc(DSIZE
);
282 if (window
== NULL
) bye("out of memory", "");
283 strm
->zalloc
= Z_NULL
;
284 strm
->zfree
= Z_NULL
;
285 strm
->opaque
= Z_NULL
;
286 ret
= inflateInit2(strm
, -15);
287 if (ret
!= Z_OK
) bye("out of memory", " or library mismatch");
289 /* decompress the deflate stream, saving append information */
291 lastoff
= lseek(gz
.fd
, 0L, SEEK_CUR
) - gz
.left
;
293 strm
->avail_in
= gz
.left
;
294 strm
->next_in
= gz
.next
;
295 crc
= crc32(0L, Z_NULL
, 0);
298 /* if needed, get more input */
299 if (strm
->avail_in
== 0) {
301 strm
->avail_in
= gz
.left
;
302 strm
->next_in
= gz
.next
;
305 /* set up output to next available section of sliding window */
306 strm
->avail_out
= DSIZE
- have
;
307 strm
->next_out
= window
+ have
;
309 /* inflate and check for errors */
310 ret
= inflate(strm
, Z_BLOCK
);
311 if (ret
== Z_STREAM_ERROR
) bye("internal stream error!", "");
312 if (ret
== Z_MEM_ERROR
) bye("out of memory", "");
313 if (ret
== Z_DATA_ERROR
)
314 bye("invalid compressed data--format violated in", name
);
316 /* update crc and sliding window pointer */
317 crc
= crc32(crc
, window
+ have
, DSIZE
- have
- strm
->avail_out
);
319 have
= DSIZE
- strm
->avail_out
;
325 /* process end of block */
326 if (strm
->data_type
& 128) {
327 if (strm
->data_type
& 64)
328 left
= strm
->data_type
& 0x1f;
330 lastbit
= strm
->data_type
& 0x1f;
331 lastoff
= lseek(gz
.fd
, 0L, SEEK_CUR
) - strm
->avail_in
;
334 } while (ret
!= Z_STREAM_END
);
336 gz
.left
= strm
->avail_in
;
337 gz
.next
= strm
->next_in
;
339 /* save the location of the end of the compressed data */
340 end
= lseek(gz
.fd
, 0L, SEEK_CUR
) - gz
.left
;
342 /* check gzip trailer and save total for deflate */
343 if (crc
!= read4(&gz
))
344 bye("invalid compressed data--crc mismatch in ", name
);
345 tot
= strm
->total_out
;
346 if ((tot
& 0xffffffffUL
) != read4(&gz
))
347 bye("invalid compressed data--length mismatch in", name
);
349 /* if not at end of file, warn */
350 if (gz
.left
|| readin(&gz
))
352 "gzappend warning: junk at end of gzip file overwritten\n");
354 /* clear last block bit */
355 lseek(gz
.fd
, lastoff
- (lastbit
!= 0), SEEK_SET
);
356 if (read(gz
.fd
, gz
.buf
, 1) != 1) bye("reading after seek on ", name
);
357 *gz
.buf
= (unsigned char)(*gz
.buf
^ (1 << ((8 - lastbit
) & 7)));
358 lseek(gz
.fd
, -1L, SEEK_CUR
);
359 if (write(gz
.fd
, gz
.buf
, 1) != 1) bye("writing after seek to ", name
);
361 /* if window wrapped, build dictionary from window by rotating */
363 rotate(window
, DSIZE
, have
);
367 /* set up deflate stream with window, crc, total_in, and leftover bits */
368 ret
= deflateInit2(strm
, level
, Z_DEFLATED
, -15, 8, Z_DEFAULT_STRATEGY
);
369 if (ret
!= Z_OK
) bye("out of memory", "");
370 deflateSetDictionary(strm
, window
, have
);
372 strm
->total_in
= tot
;
374 lseek(gz
.fd
, --end
, SEEK_SET
);
375 if (read(gz
.fd
, gz
.buf
, 1) != 1) bye("reading after seek on ", name
);
376 deflatePrime(strm
, 8 - left
, *gz
.buf
);
378 lseek(gz
.fd
, end
, SEEK_SET
);
380 /* clean up and return */
386 /* append file "name" to gzip file gd using deflate stream strm -- if last
387 is true, then finish off the deflate stream at the end */
388 local
void gztack(char *name
, int gd
, z_stream
*strm
, int last
)
392 unsigned char *in
, *out
;
394 /* open file to compress and append */
397 fd
= open(name
, O_RDONLY
, 0);
399 fprintf(stderr
, "gzappend warning: %s not found, skipping ...\n",
403 /* allocate buffers */
404 in
= fd
== -1 ? NULL
: malloc(CHUNK
);
406 if (out
== NULL
) bye("out of memory", "");
408 /* compress input file and append to gzip file */
411 len
= fd
== -1 ? 0 : read(fd
, in
, CHUNK
);
414 "gzappend warning: error reading %s, skipping rest ...\n",
418 strm
->avail_in
= (unsigned)len
;
420 if (len
) strm
->adler
= crc32(strm
->adler
, in
, (unsigned)len
);
422 /* compress and write all available output */
424 strm
->avail_out
= CHUNK
;
425 strm
->next_out
= out
;
426 ret
= deflate(strm
, last
&& len
== 0 ? Z_FINISH
: Z_NO_FLUSH
);
427 left
= CHUNK
- strm
->avail_out
;
429 len
= write(gd
, out
+ CHUNK
- strm
->avail_out
- left
, left
);
430 if (len
== -1) bye("writing gzip file", "");
431 left
-= (unsigned)len
;
433 } while (strm
->avail_out
== 0 && ret
!= Z_STREAM_END
);
436 /* write trailer after last entry */
439 out
[0] = (unsigned char)(strm
->adler
);
440 out
[1] = (unsigned char)(strm
->adler
>> 8);
441 out
[2] = (unsigned char)(strm
->adler
>> 16);
442 out
[3] = (unsigned char)(strm
->adler
>> 24);
443 out
[4] = (unsigned char)(strm
->total_in
);
444 out
[5] = (unsigned char)(strm
->total_in
>> 8);
445 out
[6] = (unsigned char)(strm
->total_in
>> 16);
446 out
[7] = (unsigned char)(strm
->total_in
>> 24);
449 ret
= write(gd
, out
+ 8 - len
, len
);
450 if (ret
== -1) bye("writing gzip file", "");
456 /* clean up and return */
458 if (in
!= NULL
) free(in
);
459 if (fd
> 0) close(fd
);
462 /* process the compression level option if present, scan the gzip file, and
463 append the specified files, or append the data from stdin if no other file
464 names are provided on the command line -- the gzip file must be writable
466 int main(int argc
, char **argv
)
471 /* ignore command name */
474 /* provide usage if no arguments */
476 printf("gzappend 1.1 (4 Nov 2003) Copyright (C) 2003 Mark Adler\n");
478 "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n");
482 /* set compression level */
483 level
= Z_DEFAULT_COMPRESSION
;
484 if (argv
[0][0] == '-') {
485 if (argv
[0][1] < '0' || argv
[0][1] > '9' || argv
[0][2] != 0)
486 bye("invalid compression level", "");
487 level
= argv
[0][1] - '0';
488 if (*++argv
== NULL
) bye("no gzip file name after options", "");
491 /* prepare to append to gzip file */
492 gd
= gzscan(*argv
++, &strm
, level
);
494 /* append files on command line, or from stdin if none */
496 gztack(NULL
, gd
, &strm
, 1);
499 gztack(*argv
, gd
, &strm
, argv
[1] == NULL
);
500 } while (*++argv
!= NULL
);