4 * Copyright (c) 1998 by Gilbert Ramirez <gram@alumni.rice.edu>
6 * SPDX-License-Identifier: GPL-2.0-or-later
9 /* file_access interface based heavily on zlib gzread.c and gzlib.c from zlib
10 * Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
13 * SPDX-License-Identifier: Zlib
18 #include "file_wrappers.h"
25 #include <wsutil/file_util.h>
30 #endif /* HAVE_ZLIB */
39 #if LZ4_VERSION_NUMBER >= 10703
48 * https://tools.ietf.org/html/rfc1952
50 * for a description of the gzip file format.
52 * Some other compressed file formats we might want to support:
54 * XZ format: https://tukaani.org/xz/
56 * Bzip2 format: https://www.sourceware.org/bzip2/
58 * Lzip format: https://www.nongnu.org/lzip/
62 * List of compression types supported.
64 static struct compression_type
{
65 wtap_compression_type type
;
66 const char *extension
;
67 const char *description
;
68 } compression_types
[] = {
70 { WTAP_GZIP_COMPRESSED
, "gz", "gzip compressed" },
73 { WTAP_ZSTD_COMPRESSED
, "zst", "zstd compressed" },
76 { WTAP_LZ4_COMPRESSED
, "lz4", "lz4 compressed" },
78 { WTAP_UNCOMPRESSED
, NULL
, NULL
}
81 static wtap_compression_type
file_get_compression_type(FILE_T stream
);
84 wtap_get_compression_type(wtap
*wth
)
86 return file_get_compression_type((wth
->fh
== NULL
) ? wth
->random_fh
: wth
->fh
);
90 wtap_compression_type_description(wtap_compression_type compression_type
)
92 for (struct compression_type
*p
= compression_types
;
93 p
->type
!= WTAP_UNCOMPRESSED
; p
++) {
94 if (p
->type
== compression_type
)
95 return p
->description
;
101 wtap_compression_type_extension(wtap_compression_type compression_type
)
103 for (struct compression_type
*p
= compression_types
;
104 p
->type
!= WTAP_UNCOMPRESSED
; p
++) {
105 if (p
->type
== compression_type
)
112 wtap_get_all_compression_type_extensions_list(void)
116 extensions
= NULL
; /* empty list, to start with */
118 for (struct compression_type
*p
= compression_types
;
119 p
->type
!= WTAP_UNCOMPRESSED
; p
++)
120 extensions
= g_slist_prepend(extensions
, (void *)p
->extension
);
125 /* #define GZBUFSIZE 8192 */
126 #define GZBUFSIZE 4096
128 /* values for wtap_reader compression */
130 UNKNOWN
, /* unknown - look for a compression header */
131 UNCOMPRESSED
, /* uncompressed - copy input directly */
132 ZLIB
, /* decompress a zlib stream */
139 * We limit the size of our input and output buffers to 2^30 bytes,
142 * 1) on Windows with MSVC, the return value of _read() is int,
143 * so the biggest read you can do is INT_MAX, and the biggest
144 * power of 2 below that is 2^30;
146 * 2) the "avail_in" and "avail_out" values in a z_stream structure
147 * in zlib are uInts, and those are unsigned ints, and that
148 * imposes a limit on the buffer size when we're reading a
151 * Thus, we use unsigned for the buffer sizes, offsets, amount available
152 * from the buffer, etc.
154 * If we want an even bigger buffer for uncompressed data, or for
155 * some other form of compression, then the unsigned-sized values should
156 * be in structure values used only for reading gzipped files, and
157 * other values should be used for uncompressed data or data
158 * compressed using other algorithms (e.g., in a union).
160 #define MAX_READ_BUF_SIZE (1U << 30)
162 struct wtap_reader_buf
{
163 uint8_t *buf
; /* buffer */
164 uint8_t *next
; /* next byte to deliver from buffer */
165 unsigned avail
; /* number of bytes available to deliver at next */
169 int fd
; /* file descriptor */
170 int64_t raw_pos
; /* current position in file (just to not call lseek()) */
171 int64_t pos
; /* current position in uncompressed data */
172 unsigned size
; /* buffer size */
174 struct wtap_reader_buf in
; /* input buffer, containing compressed data */
175 struct wtap_reader_buf out
; /* output buffer, containing uncompressed data */
177 bool eof
; /* true if end of input file reached */
178 int64_t start
; /* where the gzip data started, for rewinding */
179 int64_t raw
; /* where the raw data started, for seeking */
180 compression_t compression
; /* type of compression, if any */
181 compression_t last_compression
; /* last known compression type */
182 bool is_compressed
; /* false if completely uncompressed, true otherwise */
185 int64_t skip
; /* amount to skip (already rewound if backwards) */
186 bool seek_pending
; /* true if seek request pending */
188 /* error information */
189 int err
; /* error code */
190 const char *err_info
; /* additional error information string for some errors */
193 /* zlib inflate stream */
194 z_stream strm
; /* stream structure in-place (not a pointer) */
195 bool dont_check_crc
; /* true if we aren't supposed to check the CRC */
198 GPtrArray
*fast_seek
;
201 ZSTD_DCtx
*zstd_dctx
;
208 /* Current read offset within a buffer. */
210 offset_in_buffer(struct wtap_reader_buf
*buf
)
212 /* buf->next points to the next byte to read, and buf->buf points
213 to the first byte in the buffer, so the difference between them
216 This will fit in an unsigned int, because it can't be bigger
217 than the size of the buffer, which is an unsigned int. */
218 return (unsigned)(buf
->next
- buf
->buf
);
221 /* Number of bytes of data that are in a buffer. */
223 bytes_in_buffer(struct wtap_reader_buf
*buf
)
225 /* buf->next + buf->avail points just past the last byte of data in
227 Thus, (buf->next + buf->avail) - buf->buf is the number of bytes
228 of data in the buffer.
230 This will fit in an unsigned, because it can't be bigger
231 than the size of the buffer, which is a unsigned. */
232 return (unsigned)((buf
->next
+ buf
->avail
) - buf
->buf
);
235 /* Reset a buffer, discarding all data in the buffer, so we read into
236 it starting at the beginning. */
238 buf_reset(struct wtap_reader_buf
*buf
)
240 buf
->next
= buf
->buf
;
245 buf_read(FILE_T state
, struct wtap_reader_buf
*buf
)
247 unsigned space_left
, to_read
;
248 unsigned char *read_ptr
;
251 /* How much space is left at the end of the buffer?
252 XXX - the output buffer actually has state->size * 2 bytes. */
253 space_left
= state
->size
- bytes_in_buffer(buf
);
254 if (space_left
== 0) {
255 /* There's no space left, so we start fresh at the beginning
260 to_read
= state
->size
;
262 /* There's some space left; try to read as much data as we
263 can into that space. We may get less than that if we're
264 reading from a pipe or if we're near the end of the file. */
265 read_ptr
= buf
->next
+ buf
->avail
;
266 to_read
= space_left
;
269 ret
= ws_read(state
->fd
, read_ptr
, to_read
);
272 state
->err_info
= NULL
;
277 state
->raw_pos
+= ret
;
278 buf
->avail
+= (unsigned)ret
;
282 static int /* gz_avail */
283 fill_in_buffer(FILE_T state
)
288 if (buf_read(state
, &state
->in
) < 0)
294 #define ZLIB_WINSIZE 32768
296 struct fast_seek_point
{
297 int64_t out
; /* corresponding offset in uncompressed data */
298 int64_t in
; /* offset in input file of first full byte */
300 compression_t compression
;
303 #ifdef HAVE_INFLATEPRIME
304 int bits
; /* number of bits (1-7) from byte at in - 1, or 0 */
306 unsigned char window
[ZLIB_WINSIZE
]; /* preceding 32K of uncompressed data */
308 /* be gentle with Z_STREAM_END, 8 bytes more... Another solution would be to comment checks out */
315 struct zlib_cur_seek_point
{
316 unsigned char window
[ZLIB_WINSIZE
]; /* preceding 32K of uncompressed data */
321 #define SPAN INT64_C(1048576)
322 static struct fast_seek_point
*
323 fast_seek_find(FILE_T file
, int64_t pos
)
325 struct fast_seek_point
*smallest
= NULL
;
326 struct fast_seek_point
*item
;
327 unsigned low
, i
, max
;
329 if (!file
->fast_seek
)
332 for (low
= 0, max
= file
->fast_seek
->len
; low
< max
; ) {
334 item
= (struct fast_seek_point
*)file
->fast_seek
->pdata
[i
];
338 else if (pos
> item
->out
) {
349 fast_seek_header(FILE_T file
, int64_t in_pos
, int64_t out_pos
,
350 compression_t compression
)
352 struct fast_seek_point
*item
= NULL
;
354 if (file
->fast_seek
->len
!= 0)
355 item
= (struct fast_seek_point
*)file
->fast_seek
->pdata
[file
->fast_seek
->len
- 1];
357 if (!item
|| item
->out
< out_pos
) {
358 struct fast_seek_point
*val
= g_new(struct fast_seek_point
,1);
361 val
->compression
= compression
;
363 g_ptr_array_add(file
->fast_seek
, val
);
376 if (state
->compression
== ZLIB
&& state
->fast_seek_cur
!= NULL
) {
377 struct zlib_cur_seek_point
*cur
= (struct zlib_cur_seek_point
*) state
->fast_seek_cur
;
386 /* Get next byte from input, or -1 if end or error.
390 * 1) errors from buf_read(), and thus from fill_in_buffer(), are
391 * "sticky", and fill_in_buffer() won't do any reading if there's
394 * 2) GZ_GETC() returns -1 on an EOF;
396 * so it's safe to make multiple GZ_GETC() calls and only check the
397 * last one for an error. */
398 #define GZ_GETC() ((state->in.avail == 0 && fill_in_buffer(state) == -1) ? -1 : \
399 (state->in.avail == 0 ? -1 : \
400 (state->in.avail--, *(state->in.next)++)))
402 /* Get a one-byte integer and return 0 on success and the value in *ret.
403 Otherwise -1 is returned, state->err is set, and *ret is not modified. */
405 gz_next1(FILE_T state
, uint8_t *ret
)
411 if (state
->err
== 0) {
413 state
->err
= WTAP_ERR_SHORT_READ
;
414 state
->err_info
= NULL
;
422 /* Get a two-byte little-endian integer and return 0 on success and the value
423 in *ret. Otherwise -1 is returned, state->err is set, and *ret is not
426 gz_next2(FILE_T state
, uint16_t *ret
)
434 if (state
->err
== 0) {
436 state
->err
= WTAP_ERR_SHORT_READ
;
437 state
->err_info
= NULL
;
441 val
+= (uint16_t)ch
<< 8;
446 /* Get a four-byte little-endian integer and return 0 on success and the value
447 in *ret. Otherwise -1 is returned, state->err is set, and *ret is not
450 gz_next4(FILE_T state
, uint32_t *ret
)
456 val
+= (unsigned)GZ_GETC() << 8;
457 val
+= (uint32_t)GZ_GETC() << 16;
460 if (state
->err
== 0) {
462 state
->err
= WTAP_ERR_SHORT_READ
;
463 state
->err_info
= NULL
;
467 val
+= (uint32_t)ch
<< 24;
472 /* Skip the specified number of bytes and return 0 on success. Otherwise -1
475 gz_skipn(FILE_T state
, size_t n
)
478 if (GZ_GETC() == -1) {
479 if (state
->err
== 0) {
481 state
->err
= WTAP_ERR_SHORT_READ
;
482 state
->err_info
= NULL
;
491 /* Skip a null-terminated string and return 0 on success. Otherwise -1
494 gz_skipzstr(FILE_T state
)
498 /* It's null-terminated, so scan until we read a byte with
499 the value 0 or get an error. */
500 while ((ch
= GZ_GETC()) > 0)
503 if (state
->err
== 0) {
505 state
->err
= WTAP_ERR_SHORT_READ
;
506 state
->err_info
= NULL
;
514 zlib_fast_seek_add(FILE_T file
, struct zlib_cur_seek_point
*point
, int bits
, int64_t in_pos
, int64_t out_pos
)
516 /* it's for sure after gzip header, so file->fast_seek->len != 0 */
517 struct fast_seek_point
*item
= (struct fast_seek_point
*)file
->fast_seek
->pdata
[file
->fast_seek
->len
- 1];
519 #ifndef HAVE_INFLATEPRIME
524 /* Glib has got Balanced Binary Trees (GTree) but I couldn't find a way to do quick search for nearest (and smaller) value to seek (It's what fast_seek_find() do)
525 * Inserting value in middle of sorted array is expensive, so we want to add only in the end.
526 * It's not big deal, cause first-read don't usually invoke seeking
528 if (item
->out
+ SPAN
< out_pos
) {
529 struct fast_seek_point
*val
= g_new(struct fast_seek_point
,1);
532 val
->compression
= ZLIB
;
533 #ifdef HAVE_INFLATEPRIME
534 val
->data
.zlib
.bits
= bits
;
536 if (point
->pos
!= 0) {
537 unsigned int left
= ZLIB_WINSIZE
- point
->pos
;
539 memcpy(val
->data
.zlib
.window
, point
->window
+ point
->pos
, left
);
540 memcpy(val
->data
.zlib
.window
+ left
, point
->window
, point
->pos
);
542 memcpy(val
->data
.zlib
.window
, point
->window
, ZLIB_WINSIZE
);
545 * XXX - strm.adler is a uLong in at least some versions
546 * of zlib, and uLong is an unsigned long in at least
547 * some of those versions, which means it's 64-bit
548 * on LP64 platforms, even though the checksum is
549 * 32-bit. We assume the actual Adler checksum
550 * is in the lower 32 bits of strm.adler; as the
551 * checksum in the file is only 32 bits, we save only
552 * those lower 32 bits, and cast away any additional
553 * bits to squelch warnings.
555 * The same applies to strm.total_out.
557 val
->data
.zlib
.adler
= (uint32_t) file
->strm
.adler
;
558 val
->data
.zlib
.total_out
= (uint32_t) file
->strm
.total_out
;
559 g_ptr_array_add(file
->fast_seek
, val
);
563 static void /* gz_decomp */
564 zlib_read(FILE_T state
, unsigned char *buf
, unsigned int count
)
566 int ret
= 0; /* XXX */
568 z_streamp strm
= &(state
->strm
);
570 unsigned char *buf2
= buf
;
571 unsigned int count2
= count
;
573 strm
->avail_out
= count
;
574 strm
->next_out
= buf
;
576 /* fill output buffer up to end of deflate stream or error */
578 /* get more input for inflate() */
579 if (state
->in
.avail
== 0 && fill_in_buffer(state
) == -1)
581 if (state
->in
.avail
== 0) {
583 state
->err
= WTAP_ERR_SHORT_READ
;
584 state
->err_info
= NULL
;
588 strm
->avail_in
= state
->in
.avail
;
589 strm
->next_in
= state
->in
.next
;
590 /* decompress and handle errors */
592 ret
= inflate(strm
, Z_BLOCK
);
594 ret
= inflate(strm
, Z_NO_FLUSH
);
596 state
->in
.avail
= strm
->avail_in
;
599 state
->in
.next
= (unsigned char *)strm
->next_in
;
602 state
->in
.next
= strm
->next_in
;
604 if (ret
== Z_STREAM_ERROR
) {
605 state
->err
= WTAP_ERR_DECOMPRESS
;
606 state
->err_info
= strm
->msg
;
609 if (ret
== Z_NEED_DICT
) {
610 state
->err
= WTAP_ERR_DECOMPRESS
;
611 state
->err_info
= "preset dictionary needed";
614 if (ret
== Z_MEM_ERROR
) {
615 /* This means "not enough memory". */
617 state
->err_info
= NULL
;
620 if (ret
== Z_DATA_ERROR
) { /* deflate stream invalid */
621 state
->err
= WTAP_ERR_DECOMPRESS
;
622 state
->err_info
= strm
->msg
;
629 strm
->adler
= crc32(strm
->adler
, buf2
, count2
- strm
->avail_out
);
631 if (state
->fast_seek_cur
!= NULL
) {
632 struct zlib_cur_seek_point
*cur
= (struct zlib_cur_seek_point
*) state
->fast_seek_cur
;
633 unsigned int ready
= count2
- strm
->avail_out
;
635 if (ready
< ZLIB_WINSIZE
) {
636 unsigned left
= ZLIB_WINSIZE
- cur
->pos
;
639 memcpy(cur
->window
+ cur
->pos
, buf2
, left
);
641 memcpy(cur
->window
, buf2
+ left
, ready
- left
);
643 cur
->pos
= ready
- left
;
646 memcpy(cur
->window
+ cur
->pos
, buf2
, ready
);
651 if (cur
->have
>= ZLIB_WINSIZE
)
652 cur
->have
= ZLIB_WINSIZE
;
655 memcpy(cur
->window
, buf2
+ (ready
- ZLIB_WINSIZE
), ZLIB_WINSIZE
);
657 cur
->have
= ZLIB_WINSIZE
;
660 if (cur
->have
>= ZLIB_WINSIZE
&& ret
!= Z_STREAM_END
&& (strm
->data_type
& 128) && !(strm
->data_type
& 64))
661 zlib_fast_seek_add(state
, cur
, (strm
->data_type
& 7), state
->raw_pos
- strm
->avail_in
, state
->pos
+ (count
- strm
->avail_out
));
664 buf2
= (buf2
+ count2
- strm
->avail_out
);
665 count2
= strm
->avail_out
;
667 } while (strm
->avail_out
&& ret
!= Z_STREAM_END
);
669 /* update available output and crc check value */
670 state
->out
.next
= buf
;
671 state
->out
.avail
= count
- strm
->avail_out
;
673 /* Check gzip trailer if at end of deflate stream.
674 We don't fail immediately here, we just set an error
675 indication, so that we try to process what data we
676 got before the error. The next attempt to read
677 something past that data will get the error. */
678 if (ret
== Z_STREAM_END
) {
679 if (gz_next4(state
, &crc
) != -1 &&
680 gz_next4(state
, &len
) != -1) {
681 if (crc
!= strm
->adler
&& !state
->dont_check_crc
) {
682 state
->err
= WTAP_ERR_DECOMPRESS
;
683 state
->err_info
= "bad CRC";
684 } else if (len
!= (strm
->total_out
& 0xffffffffUL
)) {
685 state
->err
= WTAP_ERR_DECOMPRESS
;
686 state
->err_info
= "length field wrong";
689 state
->last_compression
= state
->compression
;
690 state
->compression
= UNKNOWN
; /* ready for next stream, once have is 0 */
691 g_free(state
->fast_seek_cur
);
692 state
->fast_seek_cur
= NULL
;
698 gz_head(FILE_T state
)
700 unsigned already_read
;
702 /* get some data in the input buffer */
703 if (state
->in
.avail
== 0) {
704 if (fill_in_buffer(state
) == -1)
706 if (state
->in
.avail
== 0)
710 /* look for the gzip magic header bytes 31 and 139 */
711 if (state
->in
.next
[0] == 31) {
715 /* Make sure the byte after the first byte is present */
716 if (state
->in
.avail
== 0 && fill_in_buffer(state
) == -1) {
720 if (state
->in
.avail
!= 0) {
721 if (state
->in
.next
[0] == 139) {
723 * We have what looks like the ID1 and ID2 bytes of a gzip
725 * Continue processing the file.
727 * XXX - some capture file formats (I'M LOOKING AT YOU,
728 * ENDACE!) can have 31 in the first byte of the file
729 * and 139 in the second byte of the file. For now, in
730 * those cases, you lose.
741 /* read rest of header */
743 /* compression method (CM) */
744 if (gz_next1(state
, &cm
) == -1)
747 state
->err
= WTAP_ERR_DECOMPRESS
;
748 state
->err_info
= "unknown compression method";
753 if (gz_next1(state
, &flags
) == -1) {
757 if (flags
& 0xe0) { /* reserved flag bits */
758 state
->err
= WTAP_ERR_DECOMPRESS
;
759 state
->err_info
= "reserved flag bits set";
763 /* modification time (MTIME) */
764 if (gz_skipn(state
, 4) == -1) {
769 /* extra flags (XFL) */
770 if (gz_skipn(state
, 1) == -1) {
775 /* operating system (OS) */
776 if (gz_skipn(state
, 1) == -1) {
782 /* extra field - get XLEN */
783 if (gz_next2(state
, &len
) == -1) {
788 /* skip the extra field */
789 if (gz_skipn(state
, len
) == -1) {
796 if (gz_skipzstr(state
) == -1) {
803 if (gz_skipzstr(state
) == -1) {
810 if (gz_next2(state
, &hcrc
) == -1) {
814 /* XXX - check the CRC? */
817 /* set up for decompression */
818 inflateReset(&(state
->strm
));
819 state
->strm
.adler
= crc32(0L, Z_NULL
, 0);
820 state
->compression
= ZLIB
;
821 state
->is_compressed
= true;
823 if (state
->fast_seek
) {
824 struct zlib_cur_seek_point
*cur
= g_new(struct zlib_cur_seek_point
,1);
826 cur
->pos
= cur
->have
= 0;
827 g_free(state
->fast_seek_cur
);
828 state
->fast_seek_cur
= cur
;
829 fast_seek_header(state
, state
->raw_pos
- state
->in
.avail
, state
->pos
, GZIP_AFTER_HEADER
);
833 #else /* HAVE_ZLIB */
834 state
->err
= WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED
;
835 state
->err_info
= "reading gzip-compressed files isn't supported";
837 #endif /* HAVE_ZLIB */
841 * Not a gzip file. "Unget" the first character; either:
843 * 1) we read both of the first two bytes into the
844 * buffer with the first ws_read, so we can just back
847 * 2) we only read the first byte into the buffer with
848 * the first ws_read (e.g., because we're reading from
849 * a pipe and only the first byte had been written to
850 * the pipe at that point), and read the second byte
851 * into the buffer after the first byte in the
852 * fill_in_buffer call, so we now have two bytes in
853 * the buffer, and can just back up by one byte.
860 /* { 0xFD, '7', 'z', 'X', 'Z', 0x00 } */
861 /* FD 37 7A 58 5A 00 */
864 if (state
->in
.avail
>= 4
865 && state
->in
.buf
[0] == 0x28 && state
->in
.buf
[1] == 0xb5
866 && state
->in
.buf
[2] == 0x2f && state
->in
.buf
[3] == 0xfd) {
868 const size_t ret
= ZSTD_initDStream(state
->zstd_dctx
);
869 if (ZSTD_isError(ret
)) {
870 state
->err
= WTAP_ERR_DECOMPRESS
;
871 state
->err_info
= ZSTD_getErrorName(ret
);
875 state
->compression
= ZSTD
;
876 state
->is_compressed
= true;
879 state
->err
= WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED
;
880 state
->err_info
= "reading zstd-compressed files isn't supported";
885 if (state
->in
.avail
>= 4
886 && state
->in
.buf
[0] == 0x04 && state
->in
.buf
[1] == 0x22
887 && state
->in
.buf
[2] == 0x4d && state
->in
.buf
[3] == 0x18) {
889 #if LZ4_VERSION_NUMBER >= 10800
890 LZ4F_resetDecompressionContext(state
->lz4_dctx
);
892 LZ4F_freeDecompressionContext(state
->lz4_dctx
);
893 const LZ4F_errorCode_t ret
= LZ4F_createDecompressionContext(&state
->lz4_dctx
, LZ4F_VERSION
);
894 if (LZ4F_isError(ret
)) {
895 state
->err
= WTAP_ERR_INTERNAL
;
896 state
->err_info
= LZ4F_getErrorName(ret
);
900 state
->compression
= LZ4
;
901 state
->is_compressed
= true;
904 state
->err
= WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED
;
905 state
->err_info
= "reading lz4-compressed files isn't supported";
910 if (state
->fast_seek
)
911 fast_seek_header(state
, state
->raw_pos
- state
->in
.avail
- state
->out
.avail
, state
->pos
, UNCOMPRESSED
);
913 /* doing raw i/o, save start of raw data for seeking, copy any leftover
914 input to output -- this assumes that the output buffer is larger than
915 the input buffer, which also assures space for gzungetc() */
916 state
->raw
= state
->pos
;
917 state
->out
.next
= state
->out
.buf
;
918 /* not a compressed file -- copy everything we've read into the
919 input buffer to the output buffer and fall to raw i/o */
920 already_read
= bytes_in_buffer(&state
->in
);
921 if (already_read
!= 0) {
922 memcpy(state
->out
.buf
, state
->in
.buf
, already_read
);
923 state
->out
.avail
= already_read
;
925 /* Now discard everything in the input buffer */
926 buf_reset(&state
->in
);
928 state
->compression
= UNCOMPRESSED
;
932 static int /* gz_make */
933 fill_out_buffer(FILE_T state
)
935 if (state
->compression
== UNKNOWN
) { /* look for compression header */
936 if (gz_head(state
) == -1)
938 if (state
->out
.avail
!= 0) /* got some data from gz_head() */
941 if (state
->compression
== UNCOMPRESSED
) { /* straight copy */
942 if (buf_read(state
, &state
->out
) < 0)
946 else if (state
->compression
== ZLIB
) { /* decompress */
947 zlib_read(state
, state
->out
.buf
, state
->size
<< 1);
951 else if (state
->compression
== ZSTD
) {
952 ws_assert(state
->out
.avail
== 0);
954 if (state
->in
.avail
== 0 && fill_in_buffer(state
) == -1)
957 ZSTD_outBuffer output
= {state
->out
.buf
, state
->size
<< 1, 0};
958 ZSTD_inBuffer input
= {state
->in
.next
, state
->in
.avail
, 0};
959 const size_t ret
= ZSTD_decompressStream(state
->zstd_dctx
, &output
, &input
);
960 if (ZSTD_isError(ret
)) {
961 state
->err
= WTAP_ERR_DECOMPRESS
;
962 state
->err_info
= ZSTD_getErrorName(ret
);
966 state
->in
.next
= state
->in
.next
+ input
.pos
;
967 state
->in
.avail
-= (unsigned)input
.pos
;
969 state
->out
.next
= output
.dst
;
970 state
->out
.avail
= (unsigned)output
.pos
;
973 state
->last_compression
= state
->compression
;
974 state
->compression
= UNKNOWN
;
979 else if (state
->compression
== LZ4
) {
980 ws_assert(state
->out
.avail
== 0);
982 if (state
->in
.avail
== 0 && fill_in_buffer(state
) == -1)
985 size_t outBufSize
= state
->size
<< 1;
986 size_t inBufSize
= state
->in
.avail
;
987 const size_t ret
= LZ4F_decompress(state
->lz4_dctx
, state
->out
.buf
, &outBufSize
, state
->in
.next
, &inBufSize
, NULL
);
988 if (LZ4F_isError(ret
)) {
989 state
->err
= WTAP_ERR_DECOMPRESS
;
990 state
->err_info
= LZ4F_getErrorName(ret
);
995 * We assume LZ4F_decompress() will not set inBufSize to a
996 * value > state->in.avail.
998 state
->in
.next
= state
->in
.next
+ inBufSize
;
999 state
->in
.avail
-= (unsigned)inBufSize
;
1001 state
->out
.next
= state
->out
.buf
;
1002 state
->out
.avail
= (unsigned)outBufSize
;
1005 state
->last_compression
= state
->compression
;
1006 state
->compression
= UNKNOWN
;
1014 gz_skip(FILE_T state
, int64_t len
)
1018 /* skip over len bytes or reach end-of-file, whichever comes first */
1020 if (state
->out
.avail
!= 0) {
1021 /* We have stuff in the output buffer; skip over
1023 n
= (int64_t)state
->out
.avail
> len
? (unsigned)len
: state
->out
.avail
;
1024 state
->out
.avail
-= n
;
1025 state
->out
.next
+= n
;
1028 } else if (state
->err
!= 0) {
1029 /* We have nothing in the output buffer, and
1030 we have an error that may not have been
1031 reported yet; that means we can't generate
1032 any more data into the output buffer, so
1033 return an error indication. */
1035 } else if (state
->eof
&& state
->in
.avail
== 0) {
1036 /* We have nothing in the output buffer, and
1037 we're at the end of the input; just return. */
1040 /* We have nothing in the output buffer, and
1041 we can generate more data; get more output,
1042 looking for header if required. */
1043 if (fill_out_buffer(state
) == -1)
1050 gz_reset(FILE_T state
)
1052 buf_reset(&state
->out
); /* no output data available */
1053 state
->eof
= false; /* not at end of file */
1054 state
->compression
= UNKNOWN
; /* look for compression header */
1056 state
->seek_pending
= false; /* no seek request pending */
1057 state
->err
= 0; /* clear error */
1058 state
->err_info
= NULL
;
1059 state
->pos
= 0; /* no uncompressed data yet */
1060 buf_reset(&state
->in
); /* no input data yet */
1067 * XXX - we now check whether we have st_blksize in struct stat;
1068 * it's not available on all platforms.
1070 * I'm not sure why we're testing _STATBUF_ST_BLKSIZE; it's not
1071 * set on all platforms that have st_blksize in struct stat.
1072 * (Not all platforms have st_blksize in struct stat.)
1074 * Is there some reason *not* to make the buffer size the maximum
1075 * of GBUFSIZE and st_blksize? On most UN*Xes, the standard I/O
1076 * library does I/O with st_blksize as the buffer size; on others,
1077 * and on Windows, it's a 4K buffer size. If st_blksize is bigger
1078 * than GBUFSIZE (which is currently 4KB), that's probably a
1079 * hint that reading in st_blksize chunks is considered a good
1080 * idea (e.g., an 8K/1K Berkeley fast file system with st_blksize
1081 * being 8K, or APFS, where st_blksize is big on at least some
1082 * versions of macOS).
1084 #ifdef _STATBUF_ST_BLKSIZE
1088 size_t zstd_buf_size
;
1090 unsigned want
= GZBUFSIZE
;
1099 /* allocate FILE_T structure to return */
1100 state
= (FILE_T
)g_try_malloc0(sizeof *state
);
1104 state
->fast_seek_cur
= NULL
;
1105 state
->fast_seek
= NULL
;
1107 /* open the file with the appropriate mode (or just use fd) */
1110 /* we don't yet know whether it's compressed */
1111 state
->is_compressed
= false;
1112 state
->last_compression
= UNKNOWN
;
1114 /* save the current position for rewinding (only if reading) */
1115 state
->start
= ws_lseek64(state
->fd
, 0, SEEK_CUR
);
1116 if (state
->start
== -1) state
->start
= 0;
1117 state
->raw_pos
= state
->start
;
1119 /* initialize stream */
1122 #ifdef _STATBUF_ST_BLKSIZE
1124 * See what I/O size the file system recommends using, and if
1125 * it's bigger than what we're using and isn't too big, use
1128 if (ws_fstat64(fd
, &st
) >= 0) {
1130 * Yes, st_blksize can be bigger than an int; apparently,
1131 * it's a long on LP64 Linux, for example.
1133 * If the value is too big to fit into a unsigned,
1134 * just use the maximum read buffer size.
1136 * On top of that, the Single UNIX Speification says that
1137 * st_blksize is of type blksize_t, which is a *signed*
1138 * integer type, and, at minimum, macOS 11.6 and Linux 5.14.11's
1139 * include/uapi/asm-generic/stat.h define it as such.
1141 * However, other OSes might make it unsigned, and older versions
1142 * of OSes that currently make it signed might make it unsigned,
1143 * so we try to avoid warnings from that.
1145 * We cast MAX_READ_BUF_SIZE to long in order to avoid the
1146 * warning, although it might introduce warnings on platforms
1147 * where st_blocksize is unsigned; we'll deal with that if
1148 * it ever shows up as an issue.
1150 * MAX_READ_BUF_SIZE is < the largest *signed* 32-bt integer,
1151 * so casting it to long won't turn it into a negative number.
1152 * (We only support 32-bit and 64-bit 2's-complement platforms.)
1154 if (st
.st_blksize
<= (long)MAX_READ_BUF_SIZE
)
1155 want
= (unsigned)st
.st_blksize
;
1157 want
= MAX_READ_BUF_SIZE
;
1158 /* XXX, verify result? */
1162 /* we should have separate input and output buf sizes */
1163 zstd_buf_size
= ZSTD_DStreamInSize();
1164 if (zstd_buf_size
> want
) {
1165 if (zstd_buf_size
<= MAX_READ_BUF_SIZE
)
1166 want
= (unsigned)zstd_buf_size
;
1168 want
= MAX_READ_BUF_SIZE
;
1170 zstd_buf_size
= ZSTD_DStreamOutSize();
1171 if (zstd_buf_size
> want
) {
1172 if (zstd_buf_size
<= MAX_READ_BUF_SIZE
)
1173 want
= (unsigned)zstd_buf_size
;
1175 want
= MAX_READ_BUF_SIZE
;
1178 /* allocate buffers */
1179 state
->in
.buf
= (unsigned char *)g_try_malloc(want
);
1180 state
->in
.next
= state
->in
.buf
;
1181 state
->in
.avail
= 0;
1182 state
->out
.buf
= (unsigned char *)g_try_malloc(want
<< 1);
1183 state
->out
.next
= state
->out
.buf
;
1184 state
->out
.avail
= 0;
1186 if (state
->in
.buf
== NULL
|| state
->out
.buf
== NULL
) {
1191 /* allocate inflate memory */
1192 state
->strm
.zalloc
= Z_NULL
;
1193 state
->strm
.zfree
= Z_NULL
;
1194 state
->strm
.opaque
= Z_NULL
;
1195 state
->strm
.avail_in
= 0;
1196 state
->strm
.next_in
= Z_NULL
;
1197 if (inflateInit2(&(state
->strm
), -15) != Z_OK
) { /* raw inflate */
1201 /* for now, assume we should check the crc */
1202 state
->dont_check_crc
= false;
1206 state
->zstd_dctx
= ZSTD_createDCtx();
1207 if (state
->zstd_dctx
== NULL
) {
1213 ret
= LZ4F_createDecompressionContext(&state
->lz4_dctx
, LZ4F_VERSION
);
1214 if (LZ4F_isError(ret
)) {
1224 inflateEnd(&state
->strm
);
1227 ZSTD_freeDCtx(state
->zstd_dctx
);
1230 LZ4F_freeDecompressionContext(state
->lz4_dctx
);
1232 g_free(state
->out
.buf
);
1233 g_free(state
->in
.buf
);
1240 file_open(const char *path
)
1245 const char *suffixp
;
1248 /* open file and do correct filename conversions.
1250 XXX - do we need O_LARGEFILE? On UN*X, if we need to do
1251 something special to get large file support, the configure
1252 script should have set us up with the appropriate #defines,
1253 so we should be getting a large-file-enabled file descriptor
1254 here. Pre-Large File Summit UN*Xes, and possibly even some
1255 post-LFS UN*Xes, might require O_LARGEFILE here, though.
1256 If so, we should probably handle that in ws_open(). */
1257 if ((fd
= ws_open(path
, O_RDONLY
|O_BINARY
, 0000)) == -1)
1260 /* open file handle */
1261 ft
= file_fdopen(fd
);
1269 * If this file's name ends in ".caz", it's probably a compressed
1270 * Windows Sniffer file. The compression is gzip, but if we
1271 * process the CRC as specified by RFC 1952, the computed CRC
1272 * doesn't match the stored CRC.
1274 * Compressed Windows Sniffer files don't all have the same CRC
1275 * value; is it just random crap, or are they running the CRC on
1276 * a different set of data than you're supposed to (e.g., not
1277 * CRCing some of the data), or something such as that?
1279 * For now, we just set a flag to ignore CRC errors.
1281 suffixp
= strrchr(path
, '.');
1282 if (suffixp
!= NULL
) {
1283 if (g_ascii_strcasecmp(suffixp
, ".caz") == 0)
1284 ft
->dont_check_crc
= true;
1292 file_set_random_access(FILE_T stream
, bool random_flag _U_
, GPtrArray
*seek
)
1294 stream
->fast_seek
= seek
;
1298 file_seek(FILE_T file
, int64_t offset
, int whence
, int *err
)
1300 struct fast_seek_point
*here
;
1303 if (whence
!= SEEK_SET
&& whence
!= SEEK_CUR
&& whence
!= SEEK_END
) {
1304 ws_assert_not_reached();
1311 /* Normalize offset to a SEEK_CUR specification */
1312 if (whence
== SEEK_END
) {
1313 /* Seek relative to the end of the file; given that we might be
1314 reading from a compressed file, we do that by seeking to the
1315 end of the file, making an offset relative to the end of
1316 the file an offset relative to the current position.
1318 XXX - we don't actually use this yet, but, for uncompressed
1319 files, we could optimize it, if desired, by directly using
1321 if (gz_skip(file
, INT64_MAX
) == -1) {
1329 } else if (whence
== SEEK_SET
)
1330 offset
-= file
->pos
;
1331 else if (file
->seek_pending
) {
1332 /* There's a forward-skip pending, so file->pos doesn't reflect
1333 the actual file position, it represents the position from
1334 which we're skipping; update the offset to include that. */
1335 offset
+= file
->skip
;
1337 file
->seek_pending
= false;
1340 * Are we moving at all?
1343 /* No. Just return the current position. */
1348 * Are we seeking backwards?
1354 * Do we have enough data before the current position in the
1355 * buffer that we can seek backwards within the buffer?
1357 if (-offset
<= offset_in_buffer(&file
->out
)) {
1359 * Yes. Adjust appropriately.
1361 * offset is negative, so -offset is non-negative, and
1362 * -offset is <= an unsigned and thus fits in an unsigned.
1363 * Get that value and adjust appropriately.
1365 * (Casting offset to unsigned makes it positive, which
1366 * is not what we would want, so we cast -offset instead.)
1368 * XXX - this won't work with -offset = 2^63, as its
1369 * negative isn't a valid 64-bit integer, but we are
1370 * not at all likely to see files big enough to ever
1371 * see a negative offset that large.
1373 unsigned adjustment
= (unsigned)(-offset
);
1375 file
->out
.avail
+= adjustment
;
1376 file
->out
.next
-= adjustment
;
1377 file
->pos
-= adjustment
;
1382 * No. Offset is positive; we're seeking forwards.
1384 * Do we have enough data after the current position in the
1385 * buffer that we can seek forwards within the buffer?
1387 if (offset
< file
->out
.avail
) {
1389 * Yes. Adjust appropriately.
1391 * offset is < an unsigned and thus fits in an unsigned,
1392 * so we can cast it to unsigned safely.
1394 file
->out
.avail
-= (unsigned)offset
;
1395 file
->out
.next
+= offset
;
1396 file
->pos
+= offset
;
1402 * We're not seeking within the buffer. Do we have "fast seek" data
1403 * for the location to which we will be seeking, and is the offset
1404 * outside the span for compressed files or is this an uncompressed
1409 if ((here
= fast_seek_find(file
, file
->pos
+ offset
)) &&
1410 (offset
< 0 || offset
> SPAN
|| here
->compression
== UNCOMPRESSED
)) {
1414 * Yes. Use that data to do the seek.
1415 * Note that this will be true only if file_set_random_access()
1416 * has been called on this file, which should never be the case
1420 if (here
->compression
== ZLIB
) {
1421 #ifdef HAVE_INFLATEPRIME
1422 off
= here
->in
- (here
->data
.zlib
.bits
? 1 : 0);
1427 } else if (here
->compression
== GZIP_AFTER_HEADER
) {
1433 off2
= (file
->pos
+ offset
);
1434 off
= here
->in
+ (off2
- here
->out
);
1437 if (ws_lseek64(file
->fd
, off
, SEEK_SET
) == -1) {
1441 fast_seek_reset(file
);
1443 file
->raw_pos
= off
;
1444 buf_reset(&file
->out
);
1446 file
->seek_pending
= false;
1448 file
->err_info
= NULL
;
1449 buf_reset(&file
->in
);
1452 if (here
->compression
== ZLIB
) {
1453 z_stream
*strm
= &file
->strm
;
1456 strm
->adler
= here
->data
.zlib
.adler
;
1457 strm
->total_out
= here
->data
.zlib
.total_out
;
1458 #ifdef HAVE_INFLATEPRIME
1459 if (here
->data
.zlib
.bits
) {
1460 FILE_T state
= file
;
1461 int ret
= GZ_GETC();
1464 if (state
->err
== 0) {
1466 *err
= WTAP_ERR_SHORT_READ
;
1471 (void)inflatePrime(strm
, here
->data
.zlib
.bits
, ret
>> (8 - here
->data
.zlib
.bits
));
1474 (void)inflateSetDictionary(strm
, here
->data
.zlib
.window
, ZLIB_WINSIZE
);
1475 file
->compression
= ZLIB
;
1476 } else if (here
->compression
== GZIP_AFTER_HEADER
) {
1477 z_stream
*strm
= &file
->strm
;
1480 strm
->adler
= crc32(0L, Z_NULL
, 0);
1481 file
->compression
= ZLIB
;
1484 file
->compression
= here
->compression
;
1486 offset
= (file
->pos
+ offset
) - off2
;
1488 /* g_print("OK! %ld\n", offset); */
1491 /* Don't skip forward yet, wait until we want to read from
1492 the file; that way, if we do multiple seeks in a row,
1493 all involving forward skips, they will be combined. */
1494 file
->seek_pending
= true;
1495 file
->skip
= offset
;
1497 return file
->pos
+ offset
;
1501 * Is this an uncompressed file, are we within the raw area,
1502 * are we either seeking backwards or seeking past the end
1503 * of the buffer, and are we set up for random access with
1504 * file_set_random_access()?
1506 * Again, note that this will never be true on a pipe, as
1507 * file_set_random_access() should never be called if we're
1508 * reading from a pipe.
1510 if (file
->compression
== UNCOMPRESSED
&& file
->pos
+ offset
>= file
->raw
1511 && (offset
< 0 || offset
>= file
->out
.avail
)
1512 && (file
->fast_seek
!= NULL
))
1515 * Yes. Just seek there within the file.
1517 if (ws_lseek64(file
->fd
, offset
- file
->out
.avail
, SEEK_CUR
) == -1) {
1521 file
->raw_pos
+= (offset
- file
->out
.avail
);
1522 buf_reset(&file
->out
);
1524 file
->seek_pending
= false;
1526 file
->err_info
= NULL
;
1527 buf_reset(&file
->in
);
1528 file
->pos
+= offset
;
1533 * Are we seeking backwards?
1537 * Yes. We have no fast seek data, so we have to rewind and
1539 * XXX - true only for compressed files.
1541 * Calculate the amount to skip forward after rewinding.
1543 offset
+= file
->pos
;
1544 if (offset
< 0) { /* before start of file! */
1548 /* rewind, then skip to offset */
1550 /* back up and start over */
1551 if (ws_lseek64(file
->fd
, file
->start
, SEEK_SET
) == -1) {
1555 fast_seek_reset(file
);
1556 file
->raw_pos
= file
->start
;
1561 * Either we're seeking backwards, but have rewound and now need to
1562 * skip forwards, or we're seeking forwards.
1564 * Skip what's in output buffer (one less gzgetc() check).
1566 n
= (int64_t)file
->out
.avail
> offset
? (unsigned)offset
: file
->out
.avail
;
1567 file
->out
.avail
-= n
;
1568 file
->out
.next
+= n
;
1572 /* request skip (if not zero) */
1574 /* Don't skip forward yet, wait until we want to read from
1575 the file; that way, if we do multiple seeks in a row,
1576 all involving forward skips, they will be combined. */
1577 file
->seek_pending
= true;
1578 file
->skip
= offset
;
1580 return file
->pos
+ offset
;
1584 file_tell(FILE_T stream
)
1586 /* return position */
1587 return stream
->pos
+ (stream
->seek_pending
? stream
->skip
: 0);
1591 file_tell_raw(FILE_T stream
)
1593 return stream
->raw_pos
;
1597 file_fstat(FILE_T stream
, ws_statb64
*statb
, int *err
)
1599 if (ws_fstat64(stream
->fd
, statb
) == -1) {
1608 file_iscompressed(FILE_T stream
)
1610 return stream
->is_compressed
;
1613 /* Returns a wtap compression type. If we don't know the compression type,
1614 * return WTAP_UNCOMPRESSED, but if our compression state is temporarily
1615 * UNKNOWN because we need to reread compression headers, return the last
1616 * known compression type.
1618 static wtap_compression_type
1619 file_get_compression_type(FILE_T stream
)
1621 if (stream
->is_compressed
) {
1622 switch ((stream
->compression
== UNKNOWN
) ? stream
->last_compression
: stream
->compression
) {
1625 case GZIP_AFTER_HEADER
:
1626 return WTAP_GZIP_COMPRESSED
;
1629 return WTAP_ZSTD_COMPRESSED
;
1632 return WTAP_LZ4_COMPRESSED
;
1635 return WTAP_UNCOMPRESSED
;
1637 default: /* UNKNOWN, should never happen if is_compressed is set */
1638 ws_assert_not_reached();
1639 return WTAP_UNCOMPRESSED
;
1642 return WTAP_UNCOMPRESSED
;
1646 file_read(void *buf
, unsigned int len
, FILE_T file
)
1650 /* if len is zero, avoid unnecessary operations */
1654 /* process a skip request */
1655 if (file
->seek_pending
) {
1656 file
->seek_pending
= false;
1657 if (gz_skip(file
, file
->skip
) == -1)
1662 * Get len bytes to buf, or less than len if at the end;
1663 * if buf is null, just throw the bytes away.
1667 if (file
->out
.avail
!= 0) {
1668 /* We have stuff in the output buffer; copy
1670 n
= file
->out
.avail
> len
? len
: file
->out
.avail
;
1672 memcpy(buf
, file
->out
.next
, n
);
1673 buf
= (char *)buf
+ n
;
1675 file
->out
.next
+= n
;
1676 file
->out
.avail
-= n
;
1680 } else if (file
->err
!= 0) {
1681 /* We have nothing in the output buffer, and
1682 we have an error that may not have been
1683 reported yet; that means we can't generate
1684 any more data into the output buffer, so
1685 return an error indication. */
1687 } else if (file
->eof
&& file
->in
.avail
== 0) {
1688 /* We have nothing in the output buffer, and
1689 we're at the end of the input; just return
1690 with what we've gotten so far. */
1693 /* We have nothing in the output buffer, and
1694 we can generate more data; get more output,
1695 looking for header if required, and
1696 keep looping to process the new stuff
1697 in the output buffer. */
1698 if (fill_out_buffer(file
) == -1)
1707 * XXX - this *peeks* at next byte, not a character.
1710 file_peekc(FILE_T file
)
1714 /* check that we're reading and that there's no error */
1718 /* try output buffer (no need to check for skip request) */
1719 if (file
->out
.avail
!= 0) {
1720 return *(file
->out
.next
);
1723 /* process a skip request */
1724 if (file
->seek_pending
) {
1725 file
->seek_pending
= false;
1726 if (gz_skip(file
, file
->skip
) == -1)
1729 /* if we processed a skip request, there may be data in the buffer,
1730 * or an error could have occurred; likewise if we didn't do seek but
1731 * now call fill_out_buffer, the errors can occur. So we do this while
1732 * loop to check before and after - this is basically the logic from
1733 * file_read() but only for peeking not consuming a byte
1736 if (file
->out
.avail
!= 0) {
1737 return *(file
->out
.next
);
1739 else if (file
->err
!= 0) {
1742 else if (file
->eof
&& file
->in
.avail
== 0) {
1745 else if (fill_out_buffer(file
) == -1) {
1749 /* it's actually impossible to get here */
1754 * XXX - this gets a byte, not a character.
1757 file_getc(FILE_T file
)
1759 unsigned char buf
[1];
1762 /* check that we're reading and that there's no error */
1766 /* try output buffer (no need to check for skip request) */
1767 if (file
->out
.avail
!= 0) {
1770 return *(file
->out
.next
)++;
1773 ret
= file_read(buf
, 1, file
);
1774 return ret
< 1 ? -1 : buf
[0];
1777 /* Like file_gets, but returns a pointer to the terminating NUL. */
1779 file_getsp(char *buf
, int len
, FILE_T file
)
1785 /* check parameters */
1786 if (buf
== NULL
|| len
< 1)
1789 /* check that there's no error */
1793 /* process a skip request */
1794 if (file
->seek_pending
) {
1795 file
->seek_pending
= false;
1796 if (gz_skip(file
, file
->skip
) == -1)
1800 /* copy output bytes up to new line or len - 1, whichever comes first --
1801 append a terminating zero to the string (we don't check for a zero in
1802 the contents, let the user worry about that) */
1804 left
= (unsigned)len
- 1;
1806 /* assure that something is in the output buffer */
1807 if (file
->out
.avail
== 0) {
1808 /* We have nothing in the output buffer. */
1809 if (file
->err
!= 0) {
1810 /* We have an error that may not have
1811 been reported yet; that means we
1812 can't generate any more data into
1813 the output buffer, so return an
1814 error indication. */
1817 if (fill_out_buffer(file
) == -1)
1818 return NULL
; /* error */
1819 if (file
->out
.avail
== 0) { /* end of file */
1820 if (buf
== str
) /* got bupkus */
1822 break; /* got something -- return it */
1826 /* look for end-of-line in current output buffer */
1827 n
= file
->out
.avail
> left
? left
: file
->out
.avail
;
1828 eol
= (unsigned char *)memchr(file
->out
.next
, '\n', n
);
1830 n
= (unsigned)(eol
- file
->out
.next
) + 1;
1832 /* copy through end-of-line, or remainder if not found */
1833 memcpy(buf
, file
->out
.next
, n
);
1834 file
->out
.avail
-= n
;
1835 file
->out
.next
+= n
;
1839 } while (left
&& eol
== NULL
);
1841 /* found end-of-line or out of space -- add a terminator and return
1848 file_gets(char *buf
, int len
, FILE_T file
)
1850 if (!file_getsp(buf
, len
, file
)) return NULL
;
1855 file_eof(FILE_T file
)
1857 /* return end-of-file state */
1858 return (file
->eof
&& file
->in
.avail
== 0 && file
->out
.avail
== 0);
1862 * Routine to return a Wiretap error code (0 for no error, an errno
1863 * for a file error, or a WTAP_ERR_ code for other errors) for an
1864 * I/O stream. Also returns an error string for some errors.
1867 file_error(FILE_T fh
, char **err_info
)
1869 if (fh
->err
!=0 && err_info
) {
1870 /* g_strdup() returns NULL for NULL argument */
1871 *err_info
= g_strdup(fh
->err_info
);
1877 file_clearerr(FILE_T stream
)
1879 /* clear error and end-of-file */
1881 stream
->err_info
= NULL
;
1882 stream
->eof
= false;
1886 file_fdclose(FILE_T file
)
1894 file_fdreopen(FILE_T file
, const char *path
)
1898 if ((fd
= ws_open(path
, O_RDONLY
|O_BINARY
, 0000)) == -1)
1905 file_close(FILE_T file
)
1909 /* free memory and close file */
1912 inflateEnd(&(file
->strm
));
1915 ZSTD_freeDCtx(file
->zstd_dctx
);
1918 LZ4F_freeDecompressionContext(file
->lz4_dctx
);
1920 g_free(file
->out
.buf
);
1921 g_free(file
->in
.buf
);
1923 g_free(file
->fast_seek_cur
);
1925 file
->err_info
= NULL
;
1928 * If fd is -1, somebody's done a file_closefd() on us, so
1929 * we don't need to close the FD itself, and shouldn't do
1937 /* internal gzip file state data structure for writing */
1938 struct wtap_writer
{
1939 int fd
; /* file descriptor */
1940 int64_t pos
; /* current position in uncompressed data */
1941 unsigned size
; /* buffer size, zero if not allocated yet */
1942 unsigned want
; /* requested buffer size, default is GZBUFSIZE */
1943 unsigned char *in
; /* input buffer */
1944 unsigned char *out
; /* output buffer (double-sized when reading) */
1945 unsigned char *next
; /* next output data to deliver or write */
1946 int level
; /* compression level */
1947 int strategy
; /* compression strategy */
1948 int err
; /* error code */
1949 const char *err_info
; /* additional error information string for some errors */
1950 /* zlib deflate stream */
1951 z_stream strm
; /* stream structure in-place (not a pointer) */
1955 gzwfile_open(const char *path
)
1961 fd
= ws_open(path
, O_BINARY
|O_WRONLY
|O_CREAT
|O_TRUNC
, 0666);
1964 state
= gzwfile_fdopen(fd
);
1965 if (state
== NULL
) {
1974 gzwfile_fdopen(int fd
)
1978 /* allocate wtap_writer structure to return */
1979 state
= (GZWFILE_T
)g_try_malloc(sizeof *state
);
1983 state
->size
= 0; /* no buffers allocated yet */
1984 state
->want
= GZBUFSIZE
; /* requested buffer size */
1986 state
->level
= Z_DEFAULT_COMPRESSION
;
1987 state
->strategy
= Z_DEFAULT_STRATEGY
;
1989 /* initialize stream */
1990 state
->err
= Z_OK
; /* clear error */
1991 state
->err_info
= NULL
; /* clear additional error information */
1992 state
->pos
= 0; /* no uncompressed data yet */
1993 state
->strm
.avail_in
= 0; /* no input data yet */
1999 /* Initialize state for writing a gzip file. Mark initialization by setting
2000 state->size to non-zero. Return -1, and set state->err and possibly
2001 state->err_info, on failure; return 0 on success. */
2003 gz_init(GZWFILE_T state
)
2006 z_streamp strm
= &(state
->strm
);
2008 /* allocate input and output buffers */
2009 state
->in
= (unsigned char *)g_try_malloc(state
->want
);
2010 state
->out
= (unsigned char *)g_try_malloc(state
->want
);
2011 if (state
->in
== NULL
|| state
->out
== NULL
) {
2014 state
->err
= ENOMEM
;
2018 /* allocate deflate memory, set up for gzip compression */
2019 strm
->zalloc
= Z_NULL
;
2020 strm
->zfree
= Z_NULL
;
2021 strm
->opaque
= Z_NULL
;
2022 ret
= deflateInit2(strm
, state
->level
, Z_DEFLATED
,
2023 15 + 16, 8, state
->strategy
);
2027 if (ret
== Z_MEM_ERROR
) {
2028 /* This means "not enough memory". */
2029 state
->err
= ENOMEM
;
2031 /* This "shouldn't happen". */
2032 state
->err
= WTAP_ERR_INTERNAL
;
2033 state
->err_info
= "Unknown error from deflateInit2()";
2038 /* mark state as initialized */
2039 state
->size
= state
->want
;
2041 /* initialize write buffer */
2042 strm
->avail_out
= state
->size
;
2043 strm
->next_out
= state
->out
;
2044 state
->next
= strm
->next_out
;
2048 /* Compress whatever is at avail_in and next_in and write to the output file.
2049 Return -1, and set state->err and possibly state->err_info, if there is
2050 an error writing to the output file; return 0 on success.
2051 flush is assumed to be a valid deflate() flush value. If flush is Z_FINISH,
2052 then the deflate() state is reset to start a new gzip stream. */
2054 gz_comp(GZWFILE_T state
, int flush
)
2059 z_streamp strm
= &(state
->strm
);
2061 /* allocate memory if this is the first time through */
2062 if (state
->size
== 0 && gz_init(state
) == -1)
2065 /* run deflate() on provided input until it produces no more output */
2068 /* write out current buffer contents if full, or if flushing, but if
2069 doing Z_FINISH then don't write until we get to Z_STREAM_END */
2070 if (strm
->avail_out
== 0 || (flush
!= Z_NO_FLUSH
&&
2071 (flush
!= Z_FINISH
|| ret
== Z_STREAM_END
))) {
2072 have
= strm
->next_out
- state
->next
;
2074 got
= ws_write(state
->fd
, state
->next
, (unsigned int)have
);
2079 if ((ptrdiff_t)got
!= have
) {
2080 state
->err
= WTAP_ERR_SHORT_WRITE
;
2084 if (strm
->avail_out
== 0) {
2085 strm
->avail_out
= state
->size
;
2086 strm
->next_out
= state
->out
;
2088 state
->next
= strm
->next_out
;
2092 have
= strm
->avail_out
;
2093 ret
= deflate(strm
, flush
);
2094 if (ret
== Z_STREAM_ERROR
) {
2095 /* This "shouldn't happen". */
2096 state
->err
= WTAP_ERR_INTERNAL
;
2097 state
->err_info
= "Z_STREAM_ERROR from deflate()";
2100 have
-= strm
->avail_out
;
2103 /* if that completed a deflate stream, allow another to start */
2104 if (flush
== Z_FINISH
)
2107 /* all done, no errors */
2111 /* Write out len bytes from buf. Return 0, and set state->err, on
2112 failure or on an attempt to write 0 bytes (in which case state->err
2113 is Z_OK); return the number of bytes written on success. */
2115 gzwfile_write(GZWFILE_T state
, const void *buf
, unsigned len
)
2121 strm
= &(state
->strm
);
2123 /* check that there's no error */
2124 if (state
->err
!= Z_OK
)
2127 /* if len is zero, avoid unnecessary operations */
2131 /* allocate memory if this is the first time through */
2132 if (state
->size
== 0 && gz_init(state
) == -1)
2135 /* for small len, copy to input buffer, otherwise compress directly */
2136 if (len
< state
->size
) {
2137 /* copy to input buffer, compress when full */
2139 if (strm
->avail_in
== 0)
2140 strm
->next_in
= state
->in
;
2141 n
= state
->size
- strm
->avail_in
;
2146 memcpy((Bytef
*)strm
->next_in
+ strm
->avail_in
, buf
, n
);
2149 memcpy(strm
->next_in
+ strm
->avail_in
, buf
, n
);
2151 strm
->avail_in
+= n
;
2153 buf
= (const char *)buf
+ n
;
2155 if (len
&& gz_comp(state
, Z_NO_FLUSH
) == -1)
2160 /* consume whatever's left in the input buffer */
2161 if (strm
->avail_in
!= 0 && gz_comp(state
, Z_NO_FLUSH
) == -1)
2164 /* directly compress user buffer to file */
2165 strm
->avail_in
= len
;
2167 strm
->next_in
= (z_const Bytef
*)buf
;
2170 strm
->next_in
= (Bytef
*)buf
;
2174 if (gz_comp(state
, Z_NO_FLUSH
) == -1)
2178 /* input was all buffered or compressed (put will fit in int) */
2182 /* Flush out what we've written so far. Returns -1, and sets state->err,
2183 on failure; returns 0 on success. */
2185 gzwfile_flush(GZWFILE_T state
)
2187 /* check that there's no error */
2188 if (state
->err
!= Z_OK
)
2191 /* compress remaining data with Z_SYNC_FLUSH */
2192 gz_comp(state
, Z_SYNC_FLUSH
);
2193 if (state
->err
!= Z_OK
)
2198 /* Flush out all data written, and close the file. Returns a Wiretap
2199 error on failure; returns 0 on success. */
2201 gzwfile_close(GZWFILE_T state
)
2205 /* flush, free memory, and close file */
2206 if (gz_comp(state
, Z_FINISH
) == -1)
2208 (void)deflateEnd(&(state
->strm
));
2212 if (ws_close(state
->fd
) == -1 && ret
== 0)
2219 gzwfile_geterr(GZWFILE_T state
)
2226 * Editor modelines - https://www.wireshark.org/tools/modelines.html
2231 * indent-tabs-mode: nil
2234 * vi: set shiftwidth=4 tabstop=8 expandtab:
2235 * :indentSize=4:tabSize=8:noTabs=true: