4 * Copyright (c) 1998 by Gilbert Ramirez <gram@alumni.rice.edu>
6 * SPDX-License-Identifier: GPL-2.0-or-later
9 /* file_access interface based heavily on zlib gzread.c and gzlib.c from zlib
10 * Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
13 * SPDX-License-Identifier: Zlib
19 #define WS_LOG_DOMAIN LOG_DOMAIN_WIRETAP
21 #include "file_wrappers.h"
28 #include <wsutil/file_util.h>
30 #if defined(HAVE_ZLIB) && !defined(HAVE_ZLIBNG)
31 #define USE_ZLIB_OR_ZLIBNG
33 #define ZLIB_PREFIX(x) x
35 typedef z_stream zlib_stream
;
36 #endif /* defined(HAVE_ZLIB) && !defined(HAVE_ZLIBNG) */
39 #define USE_ZLIB_OR_ZLIBNG
40 #define HAVE_INFLATEPRIME 1
41 #define ZLIB_PREFIX(x) zng_ ## x
43 typedef zng_stream zlib_stream
;
44 #endif /* HAVE_ZLIBNG */
48 #endif /* HAVE_ZSTD */
53 #if LZ4_VERSION_NUMBER >= 10703
56 #ifndef LZ4F_BLOCK_HEADER_SIZE /* Added in LZ4_VERSION_NUMBER 10902 */
57 #define LZ4F_BLOCK_HEADER_SIZE 4
58 #endif /* LZ4F_BLOCK_HEADER_SIZE */
59 #endif /* LZ4_VERSION_NUMBER >= 10703 */
63 * List of compression types supported.
65 static struct compression_type
{
66 wtap_compression_type type
;
67 const char *extension
;
68 const char *description
;
70 const bool can_write_compressed
;
71 } compression_types
[] = {
72 #ifdef USE_ZLIB_OR_ZLIBNG
73 { WTAP_GZIP_COMPRESSED
, "gz", "gzip compressed", "gzip", true },
74 #endif /* USE_ZLIB_OR_ZLIBNG */
76 { WTAP_ZSTD_COMPRESSED
, "zst", "zstd compressed", "zstd", false },
77 #endif /* HAVE_ZSTD */
79 { WTAP_LZ4_COMPRESSED
, "lz4", "lz4 compressed", "lz4", true },
81 { WTAP_UNCOMPRESSED
, NULL
, NULL
, "none", true },
82 { WTAP_UNKNOWN_COMPRESSION
, NULL
, NULL
, NULL
, false },
85 static wtap_compression_type
file_get_compression_type(FILE_T stream
);
88 wtap_name_to_compression_type(const char *name
)
90 for (struct compression_type
*p
= compression_types
;
91 p
->type
!= WTAP_UNKNOWN_COMPRESSION
; p
++) {
92 if (!g_strcmp0(name
, p
->name
))
95 return WTAP_UNKNOWN_COMPRESSION
;
99 wtap_extension_to_compression_type(const char *ext
)
101 for (struct compression_type
*p
= compression_types
;
102 p
->type
!= WTAP_UNKNOWN_COMPRESSION
; p
++) {
103 if (!g_strcmp0(ext
, p
->extension
))
106 return WTAP_UNKNOWN_COMPRESSION
;
110 wtap_can_write_compression_type(wtap_compression_type compression_type
)
112 for (struct compression_type
*p
= compression_types
; p
->type
!= WTAP_UNKNOWN_COMPRESSION
; p
++) {
113 if (compression_type
== p
->type
)
114 return p
->can_write_compressed
;
120 wtap_compression_type
121 wtap_get_compression_type(wtap
*wth
)
123 return file_get_compression_type((wth
->fh
== NULL
) ? wth
->random_fh
: wth
->fh
);
127 wtap_compression_type_description(wtap_compression_type compression_type
)
129 for (struct compression_type
*p
= compression_types
;
130 p
->type
!= WTAP_UNCOMPRESSED
; p
++) {
131 if (p
->type
== compression_type
)
132 return p
->description
;
138 wtap_compression_type_extension(wtap_compression_type compression_type
)
140 for (struct compression_type
*p
= compression_types
;
141 p
->type
!= WTAP_UNCOMPRESSED
; p
++) {
142 if (p
->type
== compression_type
)
149 wtap_get_all_compression_type_extensions_list(void)
153 extensions
= NULL
; /* empty list, to start with */
155 for (struct compression_type
*p
= compression_types
;
156 p
->type
!= WTAP_UNCOMPRESSED
; p
++)
157 extensions
= g_slist_prepend(extensions
, (void *)p
->extension
);
163 wtap_get_all_output_compression_type_names_list(void)
167 names
= NULL
; /* empty list, to start with */
169 for (struct compression_type
*p
= compression_types
;
170 p
->type
!= WTAP_UNCOMPRESSED
; p
++) {
171 if (p
->can_write_compressed
)
172 names
= g_slist_prepend(names
, (void *)p
->name
);
178 /* #define GZBUFSIZE 8192 */
179 #define GZBUFSIZE 4096
180 #define LZ4BUFSIZE 4194304 // 4MiB, maximum block size
182 /* values for wtap_reader compression */
184 UNKNOWN
, /* unknown - look for a compression header */
185 UNCOMPRESSED
, /* uncompressed - copy input directly */
186 ZLIB
, /* decompress a zlib stream */
193 * We limit the size of our input and output buffers to 2^30 bytes,
196 * 1) on Windows with MSVC, the return value of _read() is int,
197 * so the biggest read you can do is INT_MAX, and the biggest
198 * power of 2 below that is 2^30;
200 * 2) the "avail_in" and "avail_out" values in a z_stream structure
201 * in zlib are uInts, and those are unsigned ints, and that
202 * imposes a limit on the buffer size when we're reading a
205 * Thus, we use unsigned for the buffer sizes, offsets, amount available
206 * from the buffer, etc.
208 * If we want an even bigger buffer for uncompressed data, or for
209 * some other form of compression, then the unsigned-sized values should
210 * be in structure values used only for reading gzipped files, and
211 * other values should be used for uncompressed data or data
212 * compressed using other algorithms (e.g., in a union).
214 #define MAX_READ_BUF_SIZE (1U << 30)
216 struct wtap_reader_buf
{
217 uint8_t *buf
; /* buffer */
218 uint8_t *next
; /* next byte to deliver from buffer */
219 unsigned avail
; /* number of bytes available to deliver at next */
223 int fd
; /* file descriptor */
224 int64_t raw_pos
; /* current position in file (just to not call lseek()) */
225 int64_t pos
; /* current position in uncompressed data */
226 unsigned size
; /* buffer size */
228 struct wtap_reader_buf in
; /* input buffer, containing compressed data */
229 struct wtap_reader_buf out
; /* output buffer, containing uncompressed data */
231 bool eof
; /* true if end of input file reached */
232 int64_t start
; /* where the gzip data started, for rewinding */
233 int64_t raw
; /* where the raw data started, for seeking */
234 compression_t compression
; /* type of compression, if any */
235 compression_t last_compression
; /* last known compression type */
236 bool is_compressed
; /* false if completely uncompressed, true otherwise */
239 int64_t skip
; /* amount to skip (already rewound if backwards) */
240 bool seek_pending
; /* true if seek request pending */
242 /* error information */
243 int err
; /* error code */
244 const char *err_info
; /* additional error information string for some errors */
247 * Decompression stream information.
249 * XXX - should this be a union?
251 #ifdef USE_ZLIB_OR_ZLIBNG
252 /* zlib inflate stream */
253 zlib_stream strm
; /* stream structure in-place (not a pointer) */
254 bool dont_check_crc
; /* true if we aren't supposed to check the CRC */
255 #endif /* USE_ZLIB_OR_ZLIBNG */
257 ZSTD_DCtx
*zstd_dctx
;
258 #endif /* HAVE_ZSTD */
261 LZ4F_frameInfo_t lz4_info
;
262 unsigned char lz4_hdr
[LZ4F_HEADER_SIZE_MAX
];
266 GPtrArray
*fast_seek
;
270 /* Current read offset within a buffer. */
272 offset_in_buffer(struct wtap_reader_buf
*buf
)
274 /* buf->next points to the next byte to read, and buf->buf points
275 to the first byte in the buffer, so the difference between them
278 This will fit in an unsigned int, because it can't be bigger
279 than the size of the buffer, which is an unsigned int. */
280 return (unsigned)(buf
->next
- buf
->buf
);
283 /* Number of bytes of data that are in a buffer. */
285 bytes_in_buffer(struct wtap_reader_buf
*buf
)
287 /* buf->next + buf->avail points just past the last byte of data in
289 Thus, (buf->next + buf->avail) - buf->buf is the number of bytes
290 of data in the buffer.
292 This will fit in an unsigned, because it can't be bigger
293 than the size of the buffer, which is a unsigned. */
294 return (unsigned)((buf
->next
+ buf
->avail
) - buf
->buf
);
297 /* Reset a buffer, discarding all data in the buffer, so we read into
298 it starting at the beginning. */
300 buf_reset(struct wtap_reader_buf
*buf
)
302 buf
->next
= buf
->buf
;
307 buf_read(FILE_T state
, struct wtap_reader_buf
*buf
)
309 unsigned space_left
, to_read
;
310 unsigned char *read_ptr
;
313 /* How much space is left at the end of the buffer?
314 XXX - the output buffer actually has state->size * 2 bytes. */
315 space_left
= state
->size
- bytes_in_buffer(buf
);
316 if (space_left
== 0) {
317 /* There's no space left, so we start fresh at the beginning
322 to_read
= state
->size
;
324 /* There's some space left; try to read as much data as we
325 can into that space. We may get less than that if we're
326 reading from a pipe or if we're near the end of the file. */
327 read_ptr
= buf
->next
+ buf
->avail
;
328 to_read
= space_left
;
331 ret
= ws_read(state
->fd
, read_ptr
, to_read
);
334 state
->err_info
= NULL
;
339 state
->raw_pos
+= ret
;
340 buf
->avail
+= (unsigned)ret
;
344 static int /* gz_avail */
345 fill_in_buffer(FILE_T state
)
350 if (buf_read(state
, &state
->in
) < 0)
356 #define ZLIB_WINSIZE 32768
357 #define LZ4_WINSIZE 65536
359 struct fast_seek_point
{
360 int64_t out
; /* corresponding offset in uncompressed data */
361 int64_t in
; /* offset in input file of first full byte */
363 compression_t compression
;
366 #ifdef HAVE_INFLATEPRIME
367 int bits
; /* number of bits (1-7) from byte at in - 1, or 0 */
368 #endif /* HAVE_INFLATEPRIME */
369 unsigned char window
[ZLIB_WINSIZE
]; /* preceding 32K of uncompressed data */
371 /* be gentle with Z_STREAM_END, 8 bytes more... Another solution would be to comment checks out */
377 LZ4F_frameInfo_t lz4_info
;
378 unsigned char lz4_hdr
[LZ4F_HEADER_SIZE_MAX
];
380 unsigned char window
[LZ4_WINSIZE
]; /* preceding 64K of uncompressed data */
387 struct zlib_cur_seek_point
{
388 unsigned char window
[ZLIB_WINSIZE
]; /* preceding 32K of uncompressed data */
393 #define SPAN INT64_C(1048576)
394 static struct fast_seek_point
*
395 fast_seek_find(FILE_T file
, int64_t pos
)
397 struct fast_seek_point
*smallest
= NULL
;
398 struct fast_seek_point
*item
;
399 unsigned low
, i
, max
;
401 if (!file
->fast_seek
)
404 for (low
= 0, max
= file
->fast_seek
->len
; low
< max
; ) {
406 item
= (struct fast_seek_point
*)file
->fast_seek
->pdata
[i
];
410 else if (pos
> item
->out
) {
421 fast_seek_header(FILE_T file
, int64_t in_pos
, int64_t out_pos
,
422 compression_t compression
)
424 struct fast_seek_point
*item
= NULL
;
426 if (!file
->fast_seek
) {
430 if (file
->fast_seek
->len
!= 0)
431 item
= (struct fast_seek_point
*)file
->fast_seek
->pdata
[file
->fast_seek
->len
- 1];
433 /* fast_seek_header always adds a fast seek point, even if less than
434 * SPAN from the last one. That is because it used for new streams
435 * (including concatenated streams) where the compression type
436 * or, for LZ4, compression options, may change.
438 if (!item
|| item
->out
< out_pos
) {
439 struct fast_seek_point
*val
= g_new(struct fast_seek_point
,1);
442 val
->compression
= compression
;
445 if (compression
== LZ4
) {
446 val
->data
.lz4
.lz4_info
= file
->lz4_info
;
447 memcpy(val
->data
.lz4
.lz4_hdr
, file
->lz4_hdr
, LZ4F_HEADER_SIZE_MAX
);
450 g_ptr_array_add(file
->fast_seek
, val
);
455 fast_seek_reset(FILE_T state
)
457 switch (state
->compression
) {
467 #ifdef USE_ZLIB_OR_ZLIBNG
468 if (state
->fast_seek_cur
!= NULL
) {
469 struct zlib_cur_seek_point
*cur
= (struct zlib_cur_seek_point
*) state
->fast_seek_cur
;
474 /* This "cannot happen" */
475 ws_assert_not_reached();
476 #endif /* USE_ZLIB_OR_ZLIBNG */
479 case GZIP_AFTER_HEADER
:
484 /* Anything to do? */
486 /* This "cannot happen" */
487 ws_assert_not_reached();
488 #endif /* HAVE_ZSTD */
493 /* Anything to do? */
495 /* This "cannot happen" */
496 ws_assert_not_reached();
497 #endif /* HAVE_LZ4 */
500 /* Add other compression types here */
503 /* This "cannot happen" */
504 ws_assert_not_reached();
510 uncompressed_fill_out_buffer(FILE_T state
)
512 if (buf_read(state
, &state
->out
) < 0)
517 /* Get next byte from input, or -1 if end or error.
521 * 1) errors from buf_read(), and thus from fill_in_buffer(), are
522 * "sticky", and fill_in_buffer() won't do any reading if there's
525 * 2) GZ_GETC() returns -1 on an EOF;
527 * so it's safe to make multiple GZ_GETC() calls and only check the
528 * last one for an error. */
529 #define GZ_GETC() ((state->in.avail == 0 && fill_in_buffer(state) == -1) ? -1 : \
530 (state->in.avail == 0 ? -1 : \
531 (state->in.avail--, *(state->in.next)++)))
535 * Gzipped files, using compression from zlib or zlib-ng.
537 * https://tools.ietf.org/html/rfc1952 (RFC 1952)
539 #ifdef USE_ZLIB_OR_ZLIBNG
541 /* Get a one-byte integer and return 0 on success and the value in *ret.
542 Otherwise -1 is returned, state->err is set, and *ret is not modified. */
544 gz_next1(FILE_T state
, uint8_t *ret
)
550 if (state
->err
== 0) {
552 state
->err
= WTAP_ERR_SHORT_READ
;
553 state
->err_info
= NULL
;
561 /* Get a two-byte little-endian integer and return 0 on success and the value
562 in *ret. Otherwise -1 is returned, state->err is set, and *ret is not
565 gz_next2(FILE_T state
, uint16_t *ret
)
573 if (state
->err
== 0) {
575 state
->err
= WTAP_ERR_SHORT_READ
;
576 state
->err_info
= NULL
;
580 val
+= (uint16_t)ch
<< 8;
585 /* Get a four-byte little-endian integer and return 0 on success and the value
586 in *ret. Otherwise -1 is returned, state->err is set, and *ret is not
589 gz_next4(FILE_T state
, uint32_t *ret
)
595 val
+= (unsigned)GZ_GETC() << 8;
596 val
+= (uint32_t)GZ_GETC() << 16;
599 if (state
->err
== 0) {
601 state
->err
= WTAP_ERR_SHORT_READ
;
602 state
->err_info
= NULL
;
606 val
+= (uint32_t)ch
<< 24;
611 /* Skip the specified number of bytes and return 0 on success. Otherwise -1
614 gz_skipn(FILE_T state
, size_t n
)
617 if (GZ_GETC() == -1) {
618 if (state
->err
== 0) {
620 state
->err
= WTAP_ERR_SHORT_READ
;
621 state
->err_info
= NULL
;
630 /* Skip a null-terminated string and return 0 on success. Otherwise -1
633 gz_skipzstr(FILE_T state
)
637 /* It's null-terminated, so scan until we read a byte with
638 the value 0 or get an error. */
639 while ((ch
= GZ_GETC()) > 0)
642 if (state
->err
== 0) {
644 state
->err
= WTAP_ERR_SHORT_READ
;
645 state
->err_info
= NULL
;
653 zlib_fast_seek_add(FILE_T file
, struct zlib_cur_seek_point
*point
, int bits
, int64_t in_pos
, int64_t out_pos
)
655 /* it's for sure after gzip header, so file->fast_seek->len != 0 */
656 struct fast_seek_point
*item
= (struct fast_seek_point
*)file
->fast_seek
->pdata
[file
->fast_seek
->len
- 1];
658 #ifndef HAVE_INFLATEPRIME
661 #endif /* HAVE_INFLATEPRIME */
663 /* Glib has got Balanced Binary Trees (GTree) but I couldn't find a way to do quick search for nearest (and smaller) value to seek (It's what fast_seek_find() do)
664 * Inserting value in middle of sorted array is expensive, so we want to add only in the end.
665 * It's not big deal, cause first-read don't usually invoke seeking
667 if (item
->out
+ SPAN
< out_pos
) {
668 struct fast_seek_point
*val
= g_new(struct fast_seek_point
,1);
671 val
->compression
= ZLIB
;
672 #ifdef HAVE_INFLATEPRIME
673 val
->data
.zlib
.bits
= bits
;
674 #endif /* HAVE_INFLATEPRIME */
675 if (point
->pos
!= 0) {
676 unsigned int left
= ZLIB_WINSIZE
- point
->pos
;
678 memcpy(val
->data
.zlib
.window
, point
->window
+ point
->pos
, left
);
679 memcpy(val
->data
.zlib
.window
+ left
, point
->window
, point
->pos
);
681 memcpy(val
->data
.zlib
.window
, point
->window
, ZLIB_WINSIZE
);
684 * XXX - strm.adler is a uLong in at least some versions
685 * of zlib, and uLong is an unsigned long in at least
686 * some of those versions, which means it's 64-bit
687 * on LP64 platforms, even though the checksum is
688 * 32-bit. We assume the actual Adler checksum
689 * is in the lower 32 bits of strm.adler; as the
690 * checksum in the file is only 32 bits, we save only
691 * those lower 32 bits, and cast away any additional
692 * bits to squelch warnings.
694 * The same applies to strm.total_out.
696 val
->data
.zlib
.adler
= (uint32_t) file
->strm
.adler
;
697 val
->data
.zlib
.total_out
= (uint32_t) file
->strm
.total_out
;
698 g_ptr_array_add(file
->fast_seek
, val
);
703 * Based on what gz_decomp() in zlib does.
706 zlib_fill_out_buffer(FILE_T state
)
708 int ret
= 0; /* XXX */
711 zng_streamp strm
= &(state
->strm
);
712 #else /* HAVE_ZLIBNG */
713 z_streamp strm
= &(state
->strm
);
714 #endif /* HAVE_ZLIBNG */
715 unsigned char *buf
= state
->out
.buf
;
716 unsigned int count
= state
->size
<< 1;
718 unsigned char *buf2
= buf
;
719 unsigned int count2
= count
;
721 strm
->avail_out
= count
;
722 strm
->next_out
= buf
;
724 /* fill output buffer up to end of deflate stream or error */
726 /* get more input for inflate() */
727 if (state
->in
.avail
== 0 && fill_in_buffer(state
) == -1)
729 if (state
->in
.avail
== 0) {
731 state
->err
= WTAP_ERR_SHORT_READ
;
732 state
->err_info
= NULL
;
736 strm
->avail_in
= state
->in
.avail
;
737 strm
->next_in
= state
->in
.next
;
738 /* decompress and handle errors */
740 ret
= ZLIB_PREFIX(inflate
)(strm
, Z_BLOCK
);
742 ret
= ZLIB_PREFIX(inflate
)(strm
, Z_NO_FLUSH
);
744 state
->in
.avail
= strm
->avail_in
;
747 state
->in
.next
= (unsigned char *)strm
->next_in
;
750 state
->in
.next
= strm
->next_in
;
752 if (ret
== Z_STREAM_ERROR
) {
753 state
->err
= WTAP_ERR_DECOMPRESS
;
754 state
->err_info
= strm
->msg
;
757 if (ret
== Z_NEED_DICT
) {
758 state
->err
= WTAP_ERR_DECOMPRESS
;
759 state
->err_info
= "preset dictionary needed";
762 if (ret
== Z_MEM_ERROR
) {
763 /* This means "not enough memory". */
765 state
->err_info
= NULL
;
768 if (ret
== Z_DATA_ERROR
) { /* deflate stream invalid */
769 state
->err
= WTAP_ERR_DECOMPRESS
;
770 state
->err_info
= strm
->msg
;
777 strm
->adler
= ZLIB_PREFIX(crc32
)(strm
->adler
, buf2
, count2
- strm
->avail_out
);
779 if (state
->fast_seek_cur
!= NULL
) {
780 struct zlib_cur_seek_point
*cur
= (struct zlib_cur_seek_point
*) state
->fast_seek_cur
;
781 unsigned int ready
= count2
- strm
->avail_out
;
783 if (ready
< ZLIB_WINSIZE
) {
784 unsigned left
= ZLIB_WINSIZE
- cur
->pos
;
787 memcpy(cur
->window
+ cur
->pos
, buf2
, left
);
789 memcpy(cur
->window
, buf2
+ left
, ready
- left
);
791 cur
->pos
= ready
- left
;
794 memcpy(cur
->window
+ cur
->pos
, buf2
, ready
);
799 if (cur
->have
>= ZLIB_WINSIZE
)
800 cur
->have
= ZLIB_WINSIZE
;
803 memcpy(cur
->window
, buf2
+ (ready
- ZLIB_WINSIZE
), ZLIB_WINSIZE
);
805 cur
->have
= ZLIB_WINSIZE
;
808 if (cur
->have
>= ZLIB_WINSIZE
&& ret
!= Z_STREAM_END
&& (strm
->data_type
& 128) && !(strm
->data_type
& 64))
809 zlib_fast_seek_add(state
, cur
, (strm
->data_type
& 7), state
->raw_pos
- strm
->avail_in
, state
->pos
+ (count
- strm
->avail_out
));
812 buf2
= (buf2
+ count2
- strm
->avail_out
);
813 count2
= strm
->avail_out
;
815 } while (strm
->avail_out
&& ret
!= Z_STREAM_END
);
817 /* update available output and crc check value */
818 state
->out
.next
= buf
;
819 state
->out
.avail
= count
- strm
->avail_out
;
821 /* Check gzip trailer if at end of deflate stream.
822 We don't fail immediately here, we just set an error
823 indication, so that we try to process what data we
824 got before the error. The next attempt to read
825 something past that data will get the error. */
826 if (ret
== Z_STREAM_END
) {
827 if (gz_next4(state
, &crc
) != -1 &&
828 gz_next4(state
, &len
) != -1) {
829 if (crc
!= strm
->adler
&& !state
->dont_check_crc
) {
830 state
->err
= WTAP_ERR_DECOMPRESS
;
831 state
->err_info
= "bad CRC";
832 } else if (len
!= (strm
->total_out
& 0xffffffffUL
)) {
833 state
->err
= WTAP_ERR_DECOMPRESS
;
834 state
->err_info
= "length field wrong";
837 state
->last_compression
= state
->compression
;
838 state
->compression
= UNKNOWN
; /* ready for next stream, once have is 0 */
839 g_free(state
->fast_seek_cur
);
840 state
->fast_seek_cur
= NULL
;
843 #endif /* USE_ZLIB_OR_ZLIBNG */
846 * Check for a gzip header.
848 * Based on the gzip-specific stuff gz_head() from zlib does.
851 check_for_zlib_compression(FILE_T state
)
854 * Look for the gzip header. The first two bytes are 31 and 139,
855 * and if we find it, return success if we support gzip and an
858 if (state
->in
.next
[0] == 31) {
862 /* Make sure the byte after the first byte is present */
863 if (state
->in
.avail
== 0 && fill_in_buffer(state
) == -1) {
867 if (state
->in
.avail
!= 0) {
868 if (state
->in
.next
[0] == 139) {
870 * We have what looks like the ID1 and ID2 bytes of a gzip
872 * Continue processing the file.
874 * XXX - some capture file formats (I'M LOOKING AT YOU,
875 * ENDACE!) can have 31 in the first byte of the file
876 * and 139 in the second byte of the file. For now, in
877 * those cases, you lose.
879 #ifdef USE_ZLIB_OR_ZLIBNG
888 /* read rest of header */
890 /* compression method (CM) */
891 if (gz_next1(state
, &cm
) == -1)
894 state
->err
= WTAP_ERR_DECOMPRESS
;
895 state
->err_info
= "unknown compression method";
900 if (gz_next1(state
, &flags
) == -1) {
904 if (flags
& 0xe0) { /* reserved flag bits */
905 state
->err
= WTAP_ERR_DECOMPRESS
;
906 state
->err_info
= "reserved flag bits set";
910 /* modification time (MTIME) */
911 if (gz_skipn(state
, 4) == -1) {
916 /* extra flags (XFL) */
917 if (gz_skipn(state
, 1) == -1) {
922 /* operating system (OS) */
923 if (gz_skipn(state
, 1) == -1) {
929 /* extra field - get XLEN */
930 if (gz_next2(state
, &len
) == -1) {
935 /* skip the extra field */
936 if (gz_skipn(state
, len
) == -1) {
943 if (gz_skipzstr(state
) == -1) {
950 if (gz_skipzstr(state
) == -1) {
957 if (gz_next2(state
, &hcrc
) == -1) {
961 /* XXX - check the CRC? */
964 /* set up for decompression */
965 ZLIB_PREFIX(inflateReset
)(&(state
->strm
));
966 state
->strm
.adler
= ZLIB_PREFIX(crc32
)(0L, Z_NULL
, 0);
967 state
->compression
= ZLIB
;
968 state
->is_compressed
= true;
970 if (state
->fast_seek
) {
971 struct zlib_cur_seek_point
*cur
= g_new(struct zlib_cur_seek_point
,1);
973 cur
->pos
= cur
->have
= 0;
974 g_free(state
->fast_seek_cur
);
975 state
->fast_seek_cur
= cur
;
976 fast_seek_header(state
, state
->raw_pos
- state
->in
.avail
, state
->pos
, GZIP_AFTER_HEADER
);
980 #else /* USE_ZLIB_OR_ZLIBNG */
981 state
->err
= WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED
;
982 state
->err_info
= "reading gzip-compressed files isn't supported";
984 #endif /* USE_ZLIB_OR_ZLIBNG */
988 * Not a gzip file. "Unget" the first character; either:
990 * 1) we read both of the first two bytes into the
991 * buffer with the first ws_read, so we can just back
994 * 2) we only read the first byte into the buffer with
995 * the first ws_read (e.g., because we're reading from
996 * a pipe and only the first byte had been written to
997 * the pipe at that point), and read the second byte
998 * into the buffer after the first byte in the
999 * fill_in_buffer call, so we now have two bytes in
1000 * the buffer, and can just back up by one byte.
1011 * Zstandard compression.
1013 * https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
1017 zstd_fill_out_buffer(FILE_T state
)
1019 ws_assert(state
->out
.avail
== 0);
1021 if (state
->in
.avail
== 0 && fill_in_buffer(state
) == -1)
1024 ZSTD_outBuffer output
= {state
->out
.buf
, state
->size
<< 1, 0};
1025 ZSTD_inBuffer input
= {state
->in
.next
, state
->in
.avail
, 0};
1026 const size_t ret
= ZSTD_decompressStream(state
->zstd_dctx
, &output
, &input
);
1027 if (ZSTD_isError(ret
)) {
1028 state
->err
= WTAP_ERR_DECOMPRESS
;
1029 state
->err_info
= ZSTD_getErrorName(ret
);
1033 state
->in
.next
= state
->in
.next
+ input
.pos
;
1034 state
->in
.avail
-= (unsigned)input
.pos
;
1036 state
->out
.next
= output
.dst
;
1037 state
->out
.avail
= (unsigned)output
.pos
;
1040 state
->last_compression
= state
->compression
;
1041 state
->compression
= UNKNOWN
;
1045 #endif /* HAVE_ZSTD */
1048 * Check for a Zstandard header.
1051 check_for_zstd_compression(FILE_T state
)
1054 * Look for the Zstandard header, and, if we find it, return
1055 * success if we support Zstandard and an error if we don't.
1057 if (state
->in
.avail
>= 4
1058 && state
->in
.next
[0] == 0x28 && state
->in
.next
[1] == 0xb5
1059 && state
->in
.next
[2] == 0x2f && state
->in
.next
[3] == 0xfd) {
1061 const size_t ret
= ZSTD_initDStream(state
->zstd_dctx
);
1062 if (ZSTD_isError(ret
)) {
1063 state
->err
= WTAP_ERR_DECOMPRESS
;
1064 state
->err_info
= ZSTD_getErrorName(ret
);
1068 fast_seek_header(state
, state
->raw_pos
- state
->in
.avail
, state
->pos
, ZSTD
);
1069 state
->compression
= ZSTD
;
1070 state
->is_compressed
= true;
1072 #else /* HAVE_ZSTD */
1073 state
->err
= WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED
;
1074 state
->err_info
= "reading zstd-compressed files isn't supported";
1076 #endif /* HAVE_ZSTD */
1084 * https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
1088 lz4_fast_seek_add(FILE_T file
, struct zlib_cur_seek_point
*point _U_
, int64_t in_pos
, int64_t out_pos
)
1090 if (!file
->fast_seek
) {
1094 struct fast_seek_point
*item
= NULL
;
1096 if (file
->fast_seek
->len
!= 0)
1097 item
= (struct fast_seek_point
*)file
->fast_seek
->pdata
[file
->fast_seek
->len
- 1];
1099 /* As of Glib 2.68 GTree has g_tree_upper_bound, or we could use a
1100 * wmem_tree. However, since our initial read is usually sequential
1101 * only adding seek points at the end of the ptr array is fast and fine.
1104 /* don't bother adding jump points between very small blocks (min SPAN) */
1105 if (!item
|| item
->out
+ SPAN
< out_pos
) {
1106 struct fast_seek_point
*val
= g_new(struct fast_seek_point
,1);
1109 val
->compression
= LZ4
;
1111 if (point
->pos
!= 0) {
1112 unsigned int left
= LZ4_WINSIZE
- point
->pos
;
1114 memcpy(val
->data
.zlib
.window
, point
->window
+ point
->pos
, left
);
1115 memcpy(val
->data
.zlib
.window
+ left
, point
->window
, point
->pos
);
1117 memcpy(val
->data
.zlib
.window
, point
->window
, ZZ4_WINSIZE
);
1120 val
->data
.lz4
.lz4_info
= file
->lz4_info
;
1121 memcpy(val
->data
.lz4
.lz4_hdr
, file
->lz4_hdr
, LZ4F_HEADER_SIZE_MAX
);
1122 g_ptr_array_add(file
->fast_seek
, val
);
1127 lz4_fill_out_buffer(FILE_T state
)
1129 ws_assert(state
->out
.avail
== 0);
1131 if (state
->in
.avail
== 0 && fill_in_buffer(state
) == -1)
1135 * We should be at the start of a block. First, determine the size of
1136 * the block. We tell LZ4F_decompress that there's no room to put
1137 * the decompressed block; this will make it read the block size
1138 * header and stop, returning the size of the block (plus next
1139 * header) as hint of how much input to hand next.
1142 size_t outBufSize
= 0; // Zero so we don't actually consume the block
1145 size_t compressedSize
= 0;
1148 /* get more input for decompress() */
1149 if (state
->in
.avail
== 0 && fill_in_buffer(state
) == -1)
1152 state
->err
= WTAP_ERR_SHORT_READ
;
1153 state
->err_info
= NULL
;
1157 inBufSize
= state
->in
.avail
;
1158 compressedSize
= LZ4F_decompress(state
->lz4_dctx
, state
->out
.buf
, &outBufSize
, state
->in
.next
, &inBufSize
, NULL
);
1160 if (LZ4F_isError(compressedSize
)) {
1161 state
->err
= WTAP_ERR_DECOMPRESS
;
1162 state
->err_info
= LZ4F_getErrorName(compressedSize
);
1166 state
->in
.next
+= (unsigned)inBufSize
;
1167 state
->in
.avail
-= (unsigned)inBufSize
;
1169 if (compressedSize
== 0) {
1171 state
->last_compression
= state
->compression
;
1172 state
->compression
= UNKNOWN
;
1176 ws_assert(outBufSize
== 0);
1178 } while (compressedSize
< LZ4F_BLOCK_HEADER_SIZE
);
1181 * We don't want to add a fast seek point for the end of frame,
1182 * especially if there's another frame or other stream after it,
1183 * which would have the same out position. So add it after the
1184 * reading the block size - but point to where the block size
1185 * is so that we'll fast seek to the block size again.
1187 if (state
->lz4_info
.blockMode
== LZ4F_blockIndependent
) {
1189 * XXX - If state->lz4_info.blockMode == LZ4F_blockLinked, it doesn't
1190 * seem like the LZ4 Frame API can handle this, we would need to use
1191 * the low level Block API and pass the last 64KiB window of data to
1192 * LZ4_setStreamDecode and use LZ4_decompress_safe_continue (similar
1193 * to gzip). So for now we can't do fast seek with it (we do add one
1194 * header at the frame beginning so that concatenated frames and other
1195 * decompression streams work.)
1197 lz4_fast_seek_add(state
, NULL
, state
->raw_pos
- state
->in
.avail
- LZ4F_BLOCK_HEADER_SIZE
, state
->pos
);
1200 // Now actually read the entire next block - but not the next header
1201 compressedSize
-= LZ4F_BLOCK_HEADER_SIZE
;
1202 state
->out
.next
= state
->out
.buf
;
1204 if (compressedSize
> state
->size
) {
1206 * What is this? Either bogus, or some new variant of LZ4 Frames with
1207 * a larger block size we don't support. We could have a buffer
1208 * overrun if we try to process it.
1210 * TODO - We could realloc here.
1212 state
->err
= WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED
;
1213 state
->err_info
= "lz4 compressed block size too large";
1219 /* get more input for decompress() */
1220 if (state
->in
.avail
== 0 && fill_in_buffer(state
) == -1)
1223 state
->err
= WTAP_ERR_SHORT_READ
;
1224 state
->err_info
= NULL
;
1228 outBufSize
= (state
->size
<< 1) - offset_in_buffer(&state
->out
);
1229 inBufSize
= MIN(state
->in
.avail
, compressedSize
);
1230 ret
= LZ4F_decompress(state
->lz4_dctx
, state
->out
.next
, &outBufSize
, state
->in
.next
, &inBufSize
, NULL
);
1232 if (LZ4F_isError(ret
)) {
1233 state
->err
= WTAP_ERR_DECOMPRESS
;
1234 state
->err_info
= LZ4F_getErrorName(ret
);
1237 state
->in
.next
+= (unsigned)inBufSize
;
1238 state
->in
.avail
-= (unsigned)inBufSize
;
1239 compressedSize
-= inBufSize
;
1241 state
->out
.next
+= (unsigned)outBufSize
;
1242 state
->out
.avail
+= (unsigned)outBufSize
;
1243 } while (compressedSize
!= 0);
1245 state
->out
.next
= state
->out
.buf
;
1248 /* This is an alternative implementation using the lower-level
1249 * LZ4 Block API. Doing something like this might be necessary
1250 * to handle linked blocks, because the Frame API doesn't have
1251 * a method to reset the dictionary / window.
1253 int outBufSize
= state
->size
<< 1;
1254 uint32_t compressedSize
;
1255 if (gz_next4(state
, &compressedSize
) == -1) {
1258 if (compressedSize
== 0) {
1260 if (state
->lz4_info
.contentChecksumFlag
) {
1262 if (gz_next4(state
, &xxHash
) == -1) {
1265 /* XXX - check hash? */
1267 state
->last_compression
= state
->compression
;
1268 state
->compression
= UNKNOWN
;
1271 bool uncompressed
= compressedSize
>> 31;
1272 compressedSize
&= 0x7FFFFFFF;
1273 if (compressedSize
> state
->size
) {
1274 // TODO - we could realloc here
1275 state
->err
= WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED
;
1276 state
->err_info
= "lz4 compressed block size too large";
1281 * We have to read an entire block as we're using the low-level
1282 * Block API instead of the LZ4 Frame API.
1284 if (compressedSize
> (unsigned)state
->in
.avail
) {
1285 memmove(state
->in
.buf
, state
->in
.next
, state
->in
.avail
);
1286 state
->in
.next
= state
->in
.buf
;
1287 while ((unsigned)state
->in
.avail
< compressedSize
) {
1289 state
->err
= WTAP_ERR_SHORT_READ
;
1290 state
->err_info
= NULL
;
1293 if (fill_in_buffer(state
) == -1) {
1299 int decompressedSize
;
1301 memcpy(state
->out
.buf
, state
->in
.buf
, compressedSize
);
1302 decompressedSize
= compressedSize
;
1304 decompressedSize
= LZ4_decompress_safe(state
->in
.next
, state
->out
.buf
, compressedSize
, outBufSize
);
1305 //const size_t ret = LZ4F_decompress(state->lz4_dctx, state->out.buf, &outBufSize, state->in.next, &inBufSize, NULL);
1306 if (LZ4F_isError(decompressedSize
)) {
1307 state
->err
= WTAP_ERR_DECOMPRESS
;
1308 state
->err_info
= LZ4F_getErrorName(decompressedSize
);
1314 * We assume LZ4F_decompress() will not set inBufSize to a
1315 * value > state->in.avail.
1317 state
->in
.next
+= compressedSize
;
1318 state
->in
.avail
-= compressedSize
;
1320 state
->out
.next
= state
->out
.buf
;
1321 state
->out
.avail
= (unsigned)decompressedSize
;
1323 if (state
->lz4_info
.blockChecksumFlag
== LZ4F_blockChecksumEnabled
) {
1325 if (gz_next4(state
, &xxHash
) == -1) {
1328 /* XXX - check hash? */
1333 #endif /* USE_LZ4 */
1336 * Check for an lz4 header.
1339 check_for_lz4_compression(FILE_T state
)
1342 * Look for the lz4 header, and, if we find it, return success
1343 * if we support lz4 and an error if we don't.
1345 if (state
->in
.avail
>= 4
1346 && state
->in
.next
[0] == 0x04 && state
->in
.next
[1] == 0x22
1347 && state
->in
.next
[2] == 0x4d && state
->in
.next
[3] == 0x18) {
1349 #if LZ4_VERSION_NUMBER >= 10800
1350 LZ4F_resetDecompressionContext(state
->lz4_dctx
);
1351 #else /* LZ4_VERSION_NUMBER >= 10800 */
1352 LZ4F_freeDecompressionContext(state
->lz4_dctx
);
1353 const LZ4F_errorCode_t ret
= LZ4F_createDecompressionContext(&state
->lz4_dctx
, LZ4F_VERSION
);
1354 if (LZ4F_isError(ret
)) {
1355 state
->err
= WTAP_ERR_INTERNAL
;
1356 state
->err_info
= LZ4F_getErrorName(ret
);
1359 #endif /* LZ4_VERSION_NUMBER >= 10800 */
1360 size_t headerSize
= LZ4F_HEADER_SIZE_MAX
;
1361 #if LZ4_VERSION_NUMBER >= 10903
1363 * In 1.9.3+ we can handle a silly edge case of a tiny valid
1364 * frame at the end of a file that is smaller than the maximum
1365 * header size. (lz4frame.h added the function in 1.9.0, but
1366 * only for the static library; it wasn't exported until 1.9.3)
1368 while (state
->in
.avail
< LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH
) {
1369 if (fill_in_buffer(state
) == -1) {
1373 state
->err
= WTAP_ERR_SHORT_READ
;
1374 state
->err_info
= NULL
;
1378 headerSize
= LZ4F_headerSize(state
->in
.next
, state
->in
.avail
);
1379 if (LZ4F_isError(headerSize
)) {
1380 state
->err
= WTAP_ERR_DECOMPRESS
;
1381 state
->err_info
= LZ4F_getErrorName(headerSize
);
1384 #endif /* LZ4_VERSION_NUMBER >= 10903 */
1385 while (state
->in
.avail
< headerSize
) {
1386 if (fill_in_buffer(state
) == -1) {
1390 state
->err
= WTAP_ERR_SHORT_READ
;
1391 state
->err_info
= NULL
;
1395 size_t inBufSize
= state
->in
.avail
;
1396 memcpy(state
->lz4_hdr
, state
->in
.next
, headerSize
);
1397 const LZ4F_errorCode_t err
= LZ4F_getFrameInfo(state
->lz4_dctx
, &state
->lz4_info
, state
->in
.next
, &inBufSize
);
1398 if (LZ4F_isError(err
)) {
1399 state
->err
= WTAP_ERR_DECOMPRESS
;
1400 state
->err_info
= LZ4F_getErrorName(err
);
1405 * XXX - We could check state->lz4_info.blockSizeID here, and
1406 * only realloc the buffers to a larger value if the max
1407 * block size is bigger than state->size. Also we could fail
1408 * on unknown values?
1410 state
->in
.avail
-= (unsigned)inBufSize
;
1411 state
->in
.next
+= (unsigned)inBufSize
;
1413 fast_seek_header(state
, state
->raw_pos
- state
->in
.avail
, state
->pos
, LZ4
);
1414 state
->compression
= LZ4
;
1415 state
->is_compressed
= true;
1418 state
->err
= WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED
;
1419 state
->err_info
= "reading lz4-compressed files isn't supported";
1421 #endif /* USE_LZ4 */
1426 typedef int (*compression_type_test
)(FILE_T
);
1428 static compression_type_test
const compression_type_tests
[] = {
1429 check_for_zlib_compression
,
1430 check_for_zstd_compression
,
1431 check_for_lz4_compression
,
1435 * Used when we haven't yet determined whether we have a compressed file
1436 * and, if we do, what sort of compressed file it is.
1438 * Based on the non-gzip-specific stuff that gz_head() from zlib does.
1441 check_for_compression(FILE_T state
)
1444 * If this isn't the first frame / compressed stream, ensure that
1445 * we're starting at the beginning of the buffer. This shouldn't
1448 * This is to avoid edge cases where a previous frame finished but
1449 * state->in.next is close to the end of the buffer so there isn't
1450 * much room to put the start of the next frame.
1451 * This also lets us put back bytes if things go wrong.
1453 if (state
->in
.next
!= state
->in
.buf
) {
1454 memmove(state
->in
.buf
, state
->in
.next
, state
->in
.avail
);
1455 state
->in
.next
= state
->in
.buf
;
1458 /* get some data in the input buffer */
1459 if (state
->in
.avail
== 0) {
1460 if (fill_in_buffer(state
) == -1)
1462 if (state
->in
.avail
== 0)
1467 * Check for the compression types we support.
1469 for (size_t i
= 0; i
< G_N_ELEMENTS(compression_type_tests
); i
++) {
1472 ret
= compression_type_tests
[i
](state
);
1474 return -1; /* error */
1476 return 0; /* found it */
1480 * Some other compressed file formats we might want to support:
1483 * https://tukaani.org/xz/
1484 * https://github.com/tukaani-project/xz
1485 * https://github.com/tukaani-project/xz/blob/master/doc/xz-file-format.txt
1488 * https://www.sourceware.org/bzip2/
1489 * https://gitlab.com/bzip2/bzip2/
1490 * https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
1491 * (GitHub won't render it; download and open it)
1494 * https://www.nongnu.org/lzip/
1498 * We didn't see anything that looks like a header for any type of
1499 * compressed file that we support, so just do uncompressed I/O.
1501 * XXX - This fast seek data is for the case where a compressed stream
1502 * ends and is followed by an uncompressed portion. It only works if
1503 * the uncompressed portion is at the end, as we don't constantly scan
1504 * for magic bytes in the middle of uncompressed data. (Concatenated
1505 * compressed streams _do_ work, even streams of different compression types.)
1507 if (state
->fast_seek
)
1508 fast_seek_header(state
, state
->raw_pos
- state
->in
.avail
, state
->pos
, UNCOMPRESSED
);
1511 /* doing raw i/o, save start of raw data for seeking, copy any leftover
1512 input to output -- this assumes that the output buffer is larger than
1513 the input buffer, which also assures space for gzungetc() */
1514 state
->raw
= state
->pos
;
1515 state
->out
.next
= state
->out
.buf
;
1516 /* not a compressed file -- copy everything we've read into the
1517 input buffer to the output buffer and fall to raw i/o */
1518 if (state
->in
.avail
) {
1519 memcpy(state
->out
.buf
, state
->in
.next
, state
->in
.avail
);
1520 state
->out
.avail
= state
->in
.avail
;
1522 /* Now discard everything in the input buffer */
1523 buf_reset(&state
->in
);
1525 state
->compression
= UNCOMPRESSED
;
1530 * Based on what gz_make() in zlib does.
1533 fill_out_buffer(FILE_T state
)
1535 if (state
->compression
== UNKNOWN
) {
1537 * We don't yet know whether the file is compressed,
1538 * so check for a compressed-file header.
1540 if (check_for_compression(state
) == -1)
1542 if (state
->out
.avail
!= 0) /* got some data from check_for_compression() */
1547 * We got no data from check_for_compression(), or we didn't call
1548 * it as we already know the compression type, so read some more
1551 switch (state
->compression
) {
1555 if (!uncompressed_fill_out_buffer(state
))
1559 #ifdef USE_ZLIB_OR_ZLIBNG
1561 /* zlib (gzip) decompress */
1562 zlib_fill_out_buffer(state
);
1564 #endif /* USE_ZLIB_OR_ZLIBNG */
1568 /* zstd decompress */
1569 if (!zstd_fill_out_buffer(state
))
1572 #endif /* HAVE_ZSTD */
1576 /* lz4 decompress */
1577 if (!lz4_fill_out_buffer(state
))
1580 #endif /* USE_LZ4 */
1583 /* Unknown compression type; keep reading */
1590 gz_skip(FILE_T state
, int64_t len
)
1594 /* skip over len bytes or reach end-of-file, whichever comes first */
1596 if (state
->out
.avail
!= 0) {
1597 /* We have stuff in the output buffer; skip over
1599 n
= (int64_t)state
->out
.avail
> len
? (unsigned)len
: state
->out
.avail
;
1600 state
->out
.avail
-= n
;
1601 state
->out
.next
+= n
;
1604 } else if (state
->err
!= 0) {
1605 /* We have nothing in the output buffer, and
1606 we have an error that may not have been
1607 reported yet; that means we can't generate
1608 any more data into the output buffer, so
1609 return an error indication. */
1611 } else if (state
->eof
&& state
->in
.avail
== 0) {
1612 /* We have nothing in the output buffer, and
1613 we're at the end of the input; just return. */
1616 /* We have nothing in the output buffer, and
1617 we can generate more data; get more output,
1618 looking for header if required. */
1619 if (fill_out_buffer(state
) == -1)
1626 gz_reset(FILE_T state
)
1628 buf_reset(&state
->out
); /* no output data available */
1629 state
->eof
= false; /* not at end of file */
1630 state
->compression
= UNKNOWN
; /* look for compression header */
1632 state
->seek_pending
= false; /* no seek request pending */
1633 state
->err
= 0; /* clear error */
1634 state
->err_info
= NULL
;
1635 state
->pos
= 0; /* no uncompressed data yet */
1636 buf_reset(&state
->in
); /* no input data yet */
1643 * XXX - we now check whether we have st_blksize in struct stat;
1644 * it's not available on all platforms.
1646 * I'm not sure why we're testing _STATBUF_ST_BLKSIZE; it's not
1647 * set on all platforms that have st_blksize in struct stat.
1648 * (Not all platforms have st_blksize in struct stat.)
1650 * Is there some reason *not* to make the buffer size the maximum
1651 * of GBUFSIZE and st_blksize? On most UN*Xes, the standard I/O
1652 * library does I/O with st_blksize as the buffer size; on others,
1653 * and on Windows, it's a 4K buffer size. If st_blksize is bigger
1654 * than GBUFSIZE (which is currently 4KB), that's probably a
1655 * hint that reading in st_blksize chunks is considered a good
1656 * idea (e.g., an 8K/1K Berkeley fast file system with st_blksize
1657 * being 8K, or APFS, where st_blksize is big on at least some
1658 * versions of macOS).
1660 #ifdef _STATBUF_ST_BLKSIZE
1662 #endif /* _STATBUF_ST_BLKSIZE */
1664 size_t zstd_buf_size
;
1665 #endif /* HAVE_ZSTD */
1666 unsigned want
= GZBUFSIZE
;
1670 #endif /* USE_LZ4 */
1675 /* allocate FILE_T structure to return */
1676 state
= (FILE_T
)g_try_malloc0(sizeof *state
);
1680 state
->fast_seek_cur
= NULL
;
1681 state
->fast_seek
= NULL
;
1683 /* open the file with the appropriate mode (or just use fd) */
1686 /* we don't yet know whether it's compressed */
1687 state
->is_compressed
= false;
1688 state
->last_compression
= UNKNOWN
;
1690 /* save the current position for rewinding (only if reading) */
1691 state
->start
= ws_lseek64(state
->fd
, 0, SEEK_CUR
);
1692 if (state
->start
== -1) state
->start
= 0;
1693 state
->raw_pos
= state
->start
;
1695 /* initialize stream */
1698 #ifdef _STATBUF_ST_BLKSIZE
1700 * See what I/O size the file system recommends using, and if
1701 * it's bigger than what we're using and isn't too big, use
1704 if (ws_fstat64(fd
, &st
) >= 0) {
1706 * Yes, st_blksize can be bigger than an int; apparently,
1707 * it's a long on LP64 Linux, for example.
1709 * If the value is too big to fit into a unsigned,
1710 * just use the maximum read buffer size.
1712 * On top of that, the Single UNIX Speification says that
1713 * st_blksize is of type blksize_t, which is a *signed*
1714 * integer type, and, at minimum, macOS 11.6 and Linux 5.14.11's
1715 * include/uapi/asm-generic/stat.h define it as such.
1717 * However, other OSes might make it unsigned, and older versions
1718 * of OSes that currently make it signed might make it unsigned,
1719 * so we try to avoid warnings from that.
1721 * We cast MAX_READ_BUF_SIZE to long in order to avoid the
1722 * warning, although it might introduce warnings on platforms
1723 * where st_blocksize is unsigned; we'll deal with that if
1724 * it ever shows up as an issue.
1726 * MAX_READ_BUF_SIZE is < the largest *signed* 32-bt integer,
1727 * so casting it to long won't turn it into a negative number.
1728 * (We only support 32-bit and 64-bit 2's-complement platforms.)
1730 if (st
.st_blksize
<= (long)MAX_READ_BUF_SIZE
)
1731 want
= (unsigned)st
.st_blksize
;
1733 want
= MAX_READ_BUF_SIZE
;
1734 /* XXX, verify result? */
1736 #endif /* _STATBUF_ST_BLKSIZE */
1738 /* we should have separate input and output buf sizes */
1739 zstd_buf_size
= ZSTD_DStreamInSize();
1740 if (zstd_buf_size
> want
) {
1741 if (zstd_buf_size
<= MAX_READ_BUF_SIZE
)
1742 want
= (unsigned)zstd_buf_size
;
1744 want
= MAX_READ_BUF_SIZE
;
1746 zstd_buf_size
= ZSTD_DStreamOutSize();
1747 if (zstd_buf_size
> want
) {
1748 if (zstd_buf_size
<= MAX_READ_BUF_SIZE
)
1749 want
= (unsigned)zstd_buf_size
;
1751 want
= MAX_READ_BUF_SIZE
;
1753 #endif /* HAVE_ZSTD */
1755 if (LZ4BUFSIZE
> want
) {
1756 if (LZ4BUFSIZE
<= MAX_READ_BUF_SIZE
) {
1762 #endif /* USE_LZ4 */
1764 /* allocate buffers */
1765 state
->in
.buf
= (unsigned char *)g_try_malloc(want
);
1766 state
->in
.next
= state
->in
.buf
;
1767 state
->in
.avail
= 0;
1768 state
->out
.buf
= (unsigned char *)g_try_malloc(want
<< 1);
1769 state
->out
.next
= state
->out
.buf
;
1770 state
->out
.avail
= 0;
1772 if (state
->in
.buf
== NULL
|| state
->out
.buf
== NULL
) {
1776 #ifdef USE_ZLIB_OR_ZLIBNG
1777 /* allocate inflate memory */
1778 state
->strm
.zalloc
= Z_NULL
;
1779 state
->strm
.zfree
= Z_NULL
;
1780 state
->strm
.opaque
= Z_NULL
;
1781 state
->strm
.avail_in
= 0;
1782 state
->strm
.next_in
= Z_NULL
;
1783 if (ZLIB_PREFIX(inflateInit2
)(&(state
->strm
), -15) != Z_OK
) { /* raw inflate */
1787 /* for now, assume we should check the crc */
1788 state
->dont_check_crc
= false;
1789 #endif /* USE_ZLIB_OR_ZLIBNG */
1792 state
->zstd_dctx
= ZSTD_createDCtx();
1793 if (state
->zstd_dctx
== NULL
) {
1796 #endif /* HAVE_ZSTD */
1799 ret
= LZ4F_createDecompressionContext(&state
->lz4_dctx
, LZ4F_VERSION
);
1800 if (LZ4F_isError(ret
)) {
1803 #endif /* USE_LZ4 */
1809 #ifdef USE_ZLIB_OR_ZLIBNG
1810 ZLIB_PREFIX(inflateEnd
)(&state
->strm
);
1811 #endif /* USE_ZLIB_OR_ZLIBNG */
1813 ZSTD_freeDCtx(state
->zstd_dctx
);
1814 #endif /* HAVE_ZSTD */
1816 LZ4F_freeDecompressionContext(state
->lz4_dctx
);
1817 #endif /* USE_LZ4 */
1818 g_free(state
->out
.buf
);
1819 g_free(state
->in
.buf
);
1826 file_open(const char *path
)
1830 #ifdef USE_ZLIB_OR_ZLIBNG
1831 const char *suffixp
;
1832 #endif /* USE_ZLIB_OR_ZLIBNG */
1834 /* open file and do correct filename conversions.
1836 XXX - do we need O_LARGEFILE? On UN*X, if we need to do
1837 something special to get large file support, the configure
1838 script should have set us up with the appropriate #defines,
1839 so we should be getting a large-file-enabled file descriptor
1840 here. Pre-Large File Summit UN*Xes, and possibly even some
1841 post-LFS UN*Xes, might require O_LARGEFILE here, though.
1842 If so, we should probably handle that in ws_open(). */
1843 if ((fd
= ws_open(path
, O_RDONLY
|O_BINARY
, 0000)) == -1)
1846 /* open file handle */
1847 ft
= file_fdopen(fd
);
1853 #ifdef USE_ZLIB_OR_ZLIBNG
1855 * If this file's name ends in ".caz", it's probably a compressed
1856 * Windows Sniffer file. The compression is gzip, but if we
1857 * process the CRC as specified by RFC 1952, the computed CRC
1858 * doesn't match the stored CRC.
1860 * Compressed Windows Sniffer files don't all have the same CRC
1861 * value; is it just random crap, or are they running the CRC on
1862 * a different set of data than you're supposed to (e.g., not
1863 * CRCing some of the data), or something such as that?
1865 * For now, we just set a flag to ignore CRC errors.
1867 suffixp
= strrchr(path
, '.');
1868 if (suffixp
!= NULL
) {
1869 if (g_ascii_strcasecmp(suffixp
, ".caz") == 0)
1870 ft
->dont_check_crc
= true;
1872 #endif /* USE_ZLIB_OR_ZLIBNG */
1878 file_set_random_access(FILE_T stream
, bool random_flag _U_
, GPtrArray
*seek
)
1880 stream
->fast_seek
= seek
;
1884 file_seek(FILE_T file
, int64_t offset
, int whence
, int *err
)
1886 struct fast_seek_point
*here
;
1889 if (whence
!= SEEK_SET
&& whence
!= SEEK_CUR
&& whence
!= SEEK_END
) {
1890 ws_assert_not_reached();
1897 /* Normalize offset to a SEEK_CUR specification */
1898 if (whence
== SEEK_END
) {
1899 /* Seek relative to the end of the file; given that we might be
1900 reading from a compressed file, we do that by seeking to the
1901 end of the file, making an offset relative to the end of
1902 the file an offset relative to the current position.
1904 XXX - we don't actually use this yet, but, for uncompressed
1905 files, we could optimize it, if desired, by directly using
1907 if (gz_skip(file
, INT64_MAX
) == -1) {
1915 } else if (whence
== SEEK_SET
)
1916 offset
-= file
->pos
;
1917 else if (file
->seek_pending
) {
1918 /* There's a forward-skip pending, so file->pos doesn't reflect
1919 the actual file position, it represents the position from
1920 which we're skipping; update the offset to include that. */
1921 offset
+= file
->skip
;
1923 file
->seek_pending
= false;
1926 * Are we moving at all?
1929 /* No. Just return the current position. */
1934 * Are we seeking backwards?
1940 * Do we have enough data before the current position in the
1941 * buffer that we can seek backwards within the buffer?
1943 if (-offset
<= offset_in_buffer(&file
->out
)) {
1945 * Yes. Adjust appropriately.
1947 * offset is negative, so -offset is non-negative, and
1948 * -offset is <= an unsigned and thus fits in an unsigned.
1949 * Get that value and adjust appropriately.
1951 * (Casting offset to unsigned makes it positive, which
1952 * is not what we would want, so we cast -offset instead.)
1954 * XXX - this won't work with -offset = 2^63, as its
1955 * negative isn't a valid 64-bit integer, but we are
1956 * not at all likely to see files big enough to ever
1957 * see a negative offset that large.
1959 unsigned adjustment
= (unsigned)(-offset
);
1961 file
->out
.avail
+= adjustment
;
1962 file
->out
.next
-= adjustment
;
1963 file
->pos
-= adjustment
;
1968 * No. Offset is positive; we're seeking forwards.
1970 * Do we have enough data after the current position in the
1971 * buffer that we can seek forwards within the buffer?
1973 if (offset
< file
->out
.avail
) {
1975 * Yes. Adjust appropriately.
1977 * offset is < an unsigned and thus fits in an unsigned,
1978 * so we can cast it to unsigned safely.
1980 file
->out
.avail
-= (unsigned)offset
;
1981 file
->out
.next
+= offset
;
1982 file
->pos
+= offset
;
1988 * We're not seeking within the buffer. Do we have "fast seek" data
1989 * for the location to which we will be seeking, and are we either
1990 * seeking backwards or is the fast seek point past what is in the
1991 * buffer? (We don't want to "fast seek" backwards to a point that
1992 * we've already read and buffered if we're actually seeking forwards.)
1994 * It might in certain cases be faster to continue reading linearly
1995 * foward rather than jump to the fast seek point if the distance
1996 * to the fast seek point is small, but we might only be able to do that
1997 * if the compression context doesn't change (which for LZ4 includes if
1998 * we jump to a LZ4 with different options.)
1999 * XXX - profile different buffer and SPAN sizes
2001 if ((here
= fast_seek_find(file
, file
->pos
+ offset
)) &&
2002 (offset
< 0 || here
->out
>= file
->pos
+ file
->out
.avail
)) {
2006 * Yes. Use that data to do the seek.
2007 * Note that this will be true only if file_set_random_access()
2008 * has been called on this file, which should never be the case
2011 switch (here
->compression
) {
2013 #ifdef USE_ZLIB_OR_ZLIBNG
2015 #ifdef HAVE_INFLATEPRIME
2016 off
= here
->in
- (here
->data
.zlib
.bits
? 1 : 0);
2017 #else /* HAVE_INFLATEPRIME */
2019 #endif /* HAVE_INFLATEPRIME */
2023 case GZIP_AFTER_HEADER
:
2027 #endif /* USE_ZLIB_OR_ZLIBNG */
2031 ws_debug("fast seek lz4");
2035 #endif /* USE_LZ4 */
2038 /* In an uncompressed portion, seek directly to the offset */
2039 off2
= (file
->pos
+ offset
);
2040 off
= here
->in
+ (off2
- here
->out
);
2044 /* Otherwise, seek to the fast seek point to do any needed setup. */
2050 if (ws_lseek64(file
->fd
, off
, SEEK_SET
) == -1) {
2054 fast_seek_reset(file
);
2056 file
->raw_pos
= off
;
2057 buf_reset(&file
->out
);
2059 file
->seek_pending
= false;
2061 file
->err_info
= NULL
;
2062 buf_reset(&file
->in
);
2064 switch (here
->compression
) {
2066 #ifdef USE_ZLIB_OR_ZLIBNG
2068 zlib_stream
*strm
= &file
->strm
;
2069 ZLIB_PREFIX(inflateReset
)(strm
);
2070 strm
->adler
= here
->data
.zlib
.adler
;
2071 strm
->total_out
= here
->data
.zlib
.total_out
;
2072 #ifdef HAVE_INFLATEPRIME
2073 if (here
->data
.zlib
.bits
) {
2074 FILE_T state
= file
;
2075 int ret
= GZ_GETC();
2078 if (state
->err
== 0) {
2080 *err
= WTAP_ERR_SHORT_READ
;
2085 (void)ZLIB_PREFIX(inflatePrime
)(strm
, here
->data
.zlib
.bits
, ret
>> (8 - here
->data
.zlib
.bits
));
2087 #endif /* HAVE_INFLATEPRIME */
2088 (void)ZLIB_PREFIX(inflateSetDictionary
)(strm
, here
->data
.zlib
.window
, ZLIB_WINSIZE
);
2089 file
->compression
= ZLIB
;
2093 case GZIP_AFTER_HEADER
: {
2094 zlib_stream
* strm
= &file
->strm
;
2095 ZLIB_PREFIX(inflateReset
)(strm
);
2096 strm
->adler
= ZLIB_PREFIX(crc32
)(0L, Z_NULL
, 0);
2097 file
->compression
= ZLIB
;
2100 #endif /* USE_ZLIB_OR_ZLIBNG */
2104 /* If the frame information seems to have changed (i.e., we fast
2105 * seeked into a different frame that also has different flags
2106 * and options), then reset the context and re-read it.
2107 * Unfortunately the API doesn't provide a method to set the
2108 * context options explicitly based on an already read
2111 if (memcmp(&file
->lz4_info
, &here
->data
.lz4
.lz4_info
, sizeof(LZ4F_frameInfo_t
)) != 0) {
2112 #if LZ4_VERSION_NUMBER >= 10800
2113 LZ4F_resetDecompressionContext(file
->lz4_dctx
);
2114 #else /* LZ4_VERSION_NUMBER >= 10800 */
2115 LZ4F_freeDecompressionContext(file
->lz4_dctx
);
2116 const LZ4F_errorCode_t ret
= LZ4F_createDecompressionContext(&file
->lz4_dctx
, LZ4F_VERSION
);
2117 if (LZ4F_isError(ret
)) {
2118 file
->err
= WTAP_ERR_INTERNAL
;
2119 file
->err_info
= LZ4F_getErrorName(ret
);
2122 #endif /* LZ4_VERSION_NUMBER >= 10800 */
2123 size_t hdr_size
= LZ4F_HEADER_SIZE_MAX
;
2124 const LZ4F_errorCode_t frame_err
= LZ4F_getFrameInfo(file
->lz4_dctx
, &file
->lz4_info
, here
->data
.lz4
.lz4_hdr
, &hdr_size
);
2125 if (LZ4F_isError(frame_err
)) {
2126 file
->err
= WTAP_ERR_DECOMPRESS
;
2127 file
->err_info
= LZ4F_getErrorName(frame_err
);
2131 file
->lz4_info
= here
->data
.lz4
.lz4_info
;
2132 file
->compression
= LZ4
;
2134 #endif /* USE_LZ4 */
2139 const size_t ret
= ZSTD_initDStream(file
->zstd_dctx
);
2140 if (ZSTD_isError(ret
)) {
2141 file
->err
= WTAP_ERR_DECOMPRESS
;
2142 file
->err_info
= ZSTD_getErrorName(ret
);
2145 file
->compression
= ZSTD
;
2148 #endif /* HAVE_ZSTD */
2151 file
->compression
= here
->compression
;
2155 offset
= (file
->pos
+ offset
) - off2
;
2157 ws_debug("Fast seek OK! %"PRId64
, offset
);
2160 /* Don't skip forward yet, wait until we want to read from
2161 the file; that way, if we do multiple seeks in a row,
2162 all involving forward skips, they will be combined. */
2163 file
->seek_pending
= true;
2164 file
->skip
= offset
;
2166 return file
->pos
+ offset
;
2170 * Is this an uncompressed file, are we within the raw area,
2171 * are we either seeking backwards or seeking past the end
2172 * of the buffer, and are we set up for random access with
2173 * file_set_random_access()?
2175 * Again, note that this will never be true on a pipe, as
2176 * file_set_random_access() should never be called if we're
2177 * reading from a pipe.
2179 if (file
->compression
== UNCOMPRESSED
&& file
->pos
+ offset
>= file
->raw
2180 && (offset
< 0 || offset
>= file
->out
.avail
)
2181 && (file
->fast_seek
!= NULL
))
2184 * Yes. Just seek there within the file.
2186 if (ws_lseek64(file
->fd
, offset
- file
->out
.avail
, SEEK_CUR
) == -1) {
2190 file
->raw_pos
+= (offset
- file
->out
.avail
);
2191 buf_reset(&file
->out
);
2193 file
->seek_pending
= false;
2195 file
->err_info
= NULL
;
2196 buf_reset(&file
->in
);
2197 file
->pos
+= offset
;
2202 * Are we seeking backwards?
2206 * Yes. We have no fast seek data, so we have to rewind and
2208 * XXX - true only for compressed files.
2210 * Calculate the amount to skip forward after rewinding.
2212 offset
+= file
->pos
;
2213 if (offset
< 0) { /* before start of file! */
2217 /* rewind, then skip to offset */
2219 /* back up and start over */
2220 if (ws_lseek64(file
->fd
, file
->start
, SEEK_SET
) == -1) {
2224 fast_seek_reset(file
);
2225 file
->raw_pos
= file
->start
;
2230 * Either we're seeking backwards, but have rewound and now need to
2231 * skip forwards, or we're seeking forwards.
2233 * Skip what's in output buffer (one less gzgetc() check).
2235 n
= (int64_t)file
->out
.avail
> offset
? (unsigned)offset
: file
->out
.avail
;
2236 file
->out
.avail
-= n
;
2237 file
->out
.next
+= n
;
2241 /* request skip (if not zero) */
2243 /* Don't skip forward yet, wait until we want to read from
2244 the file; that way, if we do multiple seeks in a row,
2245 all involving forward skips, they will be combined. */
2246 file
->seek_pending
= true;
2247 file
->skip
= offset
;
2249 return file
->pos
+ offset
;
2253 file_tell(FILE_T stream
)
2255 /* return position */
2256 return stream
->pos
+ (stream
->seek_pending
? stream
->skip
: 0);
2260 file_tell_raw(FILE_T stream
)
2262 return stream
->raw_pos
;
2266 file_fstat(FILE_T stream
, ws_statb64
*statb
, int *err
)
2268 if (ws_fstat64(stream
->fd
, statb
) == -1) {
2277 file_iscompressed(FILE_T stream
)
2279 return stream
->is_compressed
;
2282 /* Returns a wtap compression type. If we don't know the compression type,
2283 * return WTAP_UNCOMPRESSED, but if our compression state is temporarily
2284 * UNKNOWN because we need to reread compression headers, return the last
2285 * known compression type.
2287 static wtap_compression_type
2288 file_get_compression_type(FILE_T stream
)
2290 if (stream
->is_compressed
) {
2291 switch ((stream
->compression
== UNKNOWN
) ? stream
->last_compression
: stream
->compression
) {
2294 case GZIP_AFTER_HEADER
:
2295 return WTAP_GZIP_COMPRESSED
;
2298 return WTAP_ZSTD_COMPRESSED
;
2301 return WTAP_LZ4_COMPRESSED
;
2304 return WTAP_UNCOMPRESSED
;
2306 default: /* UNKNOWN, should never happen if is_compressed is set */
2307 ws_assert_not_reached();
2308 return WTAP_UNCOMPRESSED
;
2311 return WTAP_UNCOMPRESSED
;
2315 file_read(void *buf
, unsigned int len
, FILE_T file
)
2319 /* if len is zero, avoid unnecessary operations */
2323 /* process a skip request */
2324 if (file
->seek_pending
) {
2325 file
->seek_pending
= false;
2326 if (gz_skip(file
, file
->skip
) == -1)
2331 * Get len bytes to buf, or less than len if at the end;
2332 * if buf is null, just throw the bytes away.
2336 if (file
->out
.avail
!= 0) {
2337 /* We have stuff in the output buffer; copy
2339 n
= file
->out
.avail
> len
? len
: file
->out
.avail
;
2341 memcpy(buf
, file
->out
.next
, n
);
2342 buf
= (char *)buf
+ n
;
2344 file
->out
.next
+= n
;
2345 file
->out
.avail
-= n
;
2349 } else if (file
->err
!= 0) {
2350 /* We have nothing in the output buffer, and
2351 we have an error that may not have been
2352 reported yet; that means we can't generate
2353 any more data into the output buffer, so
2354 return an error indication. */
2356 } else if (file
->eof
&& file
->in
.avail
== 0) {
2357 /* We have nothing in the output buffer, and
2358 we're at the end of the input; just return
2359 with what we've gotten so far. */
2362 /* We have nothing in the output buffer, and
2363 we can generate more data; get more output,
2364 looking for header if required, and
2365 keep looping to process the new stuff
2366 in the output buffer. */
2367 if (fill_out_buffer(file
) == -1)
2376 * XXX - this *peeks* at next byte, not a character.
2379 file_peekc(FILE_T file
)
2383 /* check that we're reading and that there's no error */
2387 /* try output buffer (no need to check for skip request) */
2388 if (file
->out
.avail
!= 0) {
2389 return *(file
->out
.next
);
2392 /* process a skip request */
2393 if (file
->seek_pending
) {
2394 file
->seek_pending
= false;
2395 if (gz_skip(file
, file
->skip
) == -1)
2398 /* if we processed a skip request, there may be data in the buffer,
2399 * or an error could have occurred; likewise if we didn't do seek but
2400 * now call fill_out_buffer, the errors can occur. So we do this while
2401 * loop to check before and after - this is basically the logic from
2402 * file_read() but only for peeking not consuming a byte
2405 if (file
->out
.avail
!= 0) {
2406 return *(file
->out
.next
);
2408 else if (file
->err
!= 0) {
2411 else if (file
->eof
&& file
->in
.avail
== 0) {
2414 else if (fill_out_buffer(file
) == -1) {
2418 /* it's actually impossible to get here */
2423 * XXX - this gets a byte, not a character.
2426 file_getc(FILE_T file
)
2428 unsigned char buf
[1];
2431 /* check that we're reading and that there's no error */
2435 /* try output buffer (no need to check for skip request) */
2436 if (file
->out
.avail
!= 0) {
2439 return *(file
->out
.next
)++;
2442 ret
= file_read(buf
, 1, file
);
2443 return ret
< 1 ? -1 : buf
[0];
2447 * Like file_gets, but returns a pointer to the terminating NUL
2448 * on success and NULL on failure.
2451 file_getsp(char *buf
, int len
, FILE_T file
)
2457 /* check parameters */
2458 if (buf
== NULL
|| len
< 1)
2461 /* check that there's no error */
2465 /* process a skip request */
2466 if (file
->seek_pending
) {
2467 file
->seek_pending
= false;
2468 if (gz_skip(file
, file
->skip
) == -1)
2472 /* copy output bytes up to new line or len - 1, whichever comes first --
2473 append a terminating zero to the string (we don't check for a zero in
2474 the contents, let the user worry about that) */
2476 left
= (unsigned)len
- 1;
2478 /* assure that something is in the output buffer */
2479 if (file
->out
.avail
== 0) {
2480 /* We have nothing in the output buffer. */
2481 if (file
->err
!= 0) {
2482 /* We have an error that may not have
2483 been reported yet; that means we
2484 can't generate any more data into
2485 the output buffer, so return an
2486 error indication. */
2489 if (fill_out_buffer(file
) == -1)
2490 return NULL
; /* error */
2491 if (file
->out
.avail
== 0) { /* end of file */
2492 if (curp
== buf
) /* got bupkus */
2494 break; /* got something -- return it */
2498 /* look for end-of-line in current output buffer */
2499 n
= file
->out
.avail
> left
? left
: file
->out
.avail
;
2500 eol
= (unsigned char *)memchr(file
->out
.next
, '\n', n
);
2502 n
= (unsigned)(eol
- file
->out
.next
) + 1;
2504 /* copy through end-of-line, or remainder if not found */
2505 memcpy(curp
, file
->out
.next
, n
);
2506 file
->out
.avail
-= n
;
2507 file
->out
.next
+= n
;
2511 } while (left
&& eol
== NULL
);
2513 /* found end-of-line or out of space -- add a terminator and return
2520 * Returns a pointer to the beginning of the buffer on success
2521 * and NULL on failure.
2524 file_gets(char *buf
, int len
, FILE_T file
)
2526 if (!file_getsp(buf
, len
, file
)) return NULL
;
2531 file_eof(FILE_T file
)
2533 /* return end-of-file state */
2534 return (file
->eof
&& file
->in
.avail
== 0 && file
->out
.avail
== 0);
2538 * Routine to return a Wiretap error code (0 for no error, an errno
2539 * for a file error, or a WTAP_ERR_ code for other errors) for an
2540 * I/O stream. Also returns an error string for some errors.
2543 file_error(FILE_T fh
, char **err_info
)
2545 if (fh
->err
!=0 && err_info
) {
2546 /* g_strdup() returns NULL for NULL argument */
2547 *err_info
= g_strdup(fh
->err_info
);
2553 file_clearerr(FILE_T stream
)
2555 /* clear error and end-of-file */
2557 stream
->err_info
= NULL
;
2558 stream
->eof
= false;
2562 file_fdclose(FILE_T file
)
2570 file_fdreopen(FILE_T file
, const char *path
)
2574 if ((fd
= ws_open(path
, O_RDONLY
|O_BINARY
, 0000)) == -1)
2581 file_close(FILE_T file
)
2585 /* free memory and close file */
2587 #ifdef USE_ZLIB_OR_ZLIBNG
2588 ZLIB_PREFIX(inflateEnd
)(&(file
->strm
));
2589 #endif /* USE_ZLIB_OR_ZLIBNG */
2591 ZSTD_freeDCtx(file
->zstd_dctx
);
2592 #endif /* HAVE_ZSTD */
2594 LZ4F_freeDecompressionContext(file
->lz4_dctx
);
2595 #endif /* USE_LZ4 */
2596 g_free(file
->out
.buf
);
2597 g_free(file
->in
.buf
);
2599 g_free(file
->fast_seek_cur
);
2601 file
->err_info
= NULL
;
2604 * If fd is -1, somebody's done a file_closefd() on us, so
2605 * we don't need to close the FD itself, and shouldn't do
2612 #ifdef USE_ZLIB_OR_ZLIBNG
2613 /* internal gzip file state data structure for writing */
2614 struct wtap_writer
{
2615 int fd
; /* file descriptor */
2616 int64_t pos
; /* current position in uncompressed data */
2617 unsigned size
; /* buffer size, zero if not allocated yet */
2618 unsigned want
; /* requested buffer size, default is GZBUFSIZE */
2619 unsigned char *in
; /* input buffer */
2620 unsigned char *out
; /* output buffer (double-sized when reading) */
2621 unsigned char *next
; /* next output data to deliver or write */
2622 int level
; /* compression level */
2623 int strategy
; /* compression strategy */
2624 int err
; /* error code */
2625 const char *err_info
; /* additional error information string for some errors */
2626 /* zlib deflate stream */
2627 zlib_stream strm
; /* stream structure in-place (not a pointer) */
2631 gzwfile_open(const char *path
)
2637 fd
= ws_open(path
, O_BINARY
|O_WRONLY
|O_CREAT
|O_TRUNC
, 0666);
2640 state
= gzwfile_fdopen(fd
);
2641 if (state
== NULL
) {
2650 gzwfile_fdopen(int fd
)
2654 /* allocate wtap_writer structure to return */
2655 state
= (GZWFILE_T
)g_try_malloc(sizeof *state
);
2659 state
->size
= 0; /* no buffers allocated yet */
2660 state
->want
= GZBUFSIZE
; /* requested buffer size */
2662 state
->level
= Z_DEFAULT_COMPRESSION
;
2663 state
->strategy
= Z_DEFAULT_STRATEGY
;
2665 /* initialize stream */
2666 state
->err
= Z_OK
; /* clear error */
2667 state
->err_info
= NULL
; /* clear additional error information */
2668 state
->pos
= 0; /* no uncompressed data yet */
2669 state
->strm
.avail_in
= 0; /* no input data yet */
2675 /* Initialize state for writing a gzip file. Mark initialization by setting
2676 state->size to non-zero. Return -1, and set state->err and possibly
2677 state->err_info, on failure; return 0 on success. */
2679 gz_init(GZWFILE_T state
)
2683 zng_streamp strm
= &(state
->strm
);
2684 #else /* HAVE_ZLIBNG */
2685 z_streamp strm
= &(state
->strm
);
2686 #endif /* HAVE_ZLIBNG */
2688 /* allocate input and output buffers */
2689 state
->in
= (unsigned char *)g_try_malloc(state
->want
);
2690 state
->out
= (unsigned char *)g_try_malloc(state
->want
);
2691 if (state
->in
== NULL
|| state
->out
== NULL
) {
2694 state
->err
= ENOMEM
;
2698 /* allocate deflate memory, set up for gzip compression */
2699 strm
->zalloc
= Z_NULL
;
2700 strm
->zfree
= Z_NULL
;
2701 strm
->opaque
= Z_NULL
;
2702 ret
= ZLIB_PREFIX(deflateInit2
)(strm
, state
->level
, Z_DEFLATED
,
2703 15 + 16, 8, state
->strategy
);
2707 if (ret
== Z_MEM_ERROR
) {
2708 /* This means "not enough memory". */
2709 state
->err
= ENOMEM
;
2711 /* This "shouldn't happen". */
2712 state
->err
= WTAP_ERR_INTERNAL
;
2713 state
->err_info
= "Unknown error from deflateInit2()";
2718 /* mark state as initialized */
2719 state
->size
= state
->want
;
2721 /* initialize write buffer */
2722 strm
->avail_out
= state
->size
;
2723 strm
->next_out
= state
->out
;
2724 state
->next
= strm
->next_out
;
2728 /* Compress whatever is at avail_in and next_in and write to the output file.
2729 Return -1, and set state->err and possibly state->err_info, if there is
2730 an error writing to the output file; return 0 on success.
2731 flush is assumed to be a valid deflate() flush value. If flush is Z_FINISH,
2732 then the deflate() state is reset to start a new gzip stream. */
2734 gz_comp(GZWFILE_T state
, int flush
)
2740 zng_streamp strm
= &(state
->strm
);
2741 #else /* HAVE_ZLIBNG */
2742 z_streamp strm
= &(state
->strm
);
2743 #endif /* HAVE_ZLIBNG */
2744 /* allocate memory if this is the first time through */
2745 if (state
->size
== 0 && gz_init(state
) == -1)
2748 /* run deflate() on provided input until it produces no more output */
2751 /* write out current buffer contents if full, or if flushing, but if
2752 doing Z_FINISH then don't write until we get to Z_STREAM_END */
2753 if (strm
->avail_out
== 0 || (flush
!= Z_NO_FLUSH
&&
2754 (flush
!= Z_FINISH
|| ret
== Z_STREAM_END
))) {
2755 have
= strm
->next_out
- state
->next
;
2757 got
= ws_write(state
->fd
, state
->next
, (unsigned int)have
);
2762 if ((ptrdiff_t)got
!= have
) {
2763 state
->err
= WTAP_ERR_SHORT_WRITE
;
2767 if (strm
->avail_out
== 0) {
2768 strm
->avail_out
= state
->size
;
2769 strm
->next_out
= state
->out
;
2771 state
->next
= strm
->next_out
;
2775 have
= strm
->avail_out
;
2776 ret
= ZLIB_PREFIX(deflate
)(strm
, flush
);
2777 if (ret
== Z_STREAM_ERROR
) {
2778 /* This "shouldn't happen". */
2779 state
->err
= WTAP_ERR_INTERNAL
;
2780 state
->err_info
= "Z_STREAM_ERROR from deflate()";
2783 have
-= strm
->avail_out
;
2786 /* if that completed a deflate stream, allow another to start */
2787 if (flush
== Z_FINISH
)
2788 ZLIB_PREFIX(deflateReset
)(strm
);
2790 /* all done, no errors */
2794 /* Write out len bytes from buf. Return 0, and set state->err, on
2795 failure or on an attempt to write 0 bytes (in which case state->err
2796 is Z_OK); return the number of bytes written on success. */
2798 gzwfile_write(GZWFILE_T state
, const void *buf
, unsigned len
)
2804 #else /* HAVE_ZLIBNG */
2806 #endif /* HAVE_ZLIBNG */
2808 strm
= &(state
->strm
);
2810 /* check that there's no error */
2811 if (state
->err
!= Z_OK
)
2814 /* if len is zero, avoid unnecessary operations */
2818 /* allocate memory if this is the first time through */
2819 if (state
->size
== 0 && gz_init(state
) == -1)
2822 /* for small len, copy to input buffer, otherwise compress directly */
2823 if (len
< state
->size
) {
2824 /* copy to input buffer, compress when full */
2826 if (strm
->avail_in
== 0)
2827 strm
->next_in
= state
->in
;
2828 n
= state
->size
- strm
->avail_in
;
2833 memcpy((Bytef
*)strm
->next_in
+ strm
->avail_in
, buf
, n
);
2836 memcpy(strm
->next_in
+ strm
->avail_in
, buf
, n
);
2837 #endif /* z_const */
2838 strm
->avail_in
+= n
;
2840 buf
= (const char *)buf
+ n
;
2842 if (len
&& gz_comp(state
, Z_NO_FLUSH
) == -1)
2847 /* consume whatever's left in the input buffer */
2848 if (strm
->avail_in
!= 0 && gz_comp(state
, Z_NO_FLUSH
) == -1)
2851 /* directly compress user buffer to file */
2852 strm
->avail_in
= len
;
2854 strm
->next_in
= (z_const Bytef
*)buf
;
2857 strm
->next_in
= (Bytef
*)buf
;
2859 #endif /* z_const */
2861 if (gz_comp(state
, Z_NO_FLUSH
) == -1)
2865 /* input was all buffered or compressed (put will fit in int) */
2869 /* Flush out what we've written so far. Returns -1, and sets state->err,
2870 on failure; returns 0 on success. */
2872 gzwfile_flush(GZWFILE_T state
)
2874 /* check that there's no error */
2875 if (state
->err
!= Z_OK
)
2878 /* compress remaining data with Z_SYNC_FLUSH */
2879 gz_comp(state
, Z_SYNC_FLUSH
);
2880 if (state
->err
!= Z_OK
)
2885 /* Flush out all data written, and close the file. Returns a Wiretap
2886 error on failure; returns 0 on success. */
2888 gzwfile_close(GZWFILE_T state
)
2892 /* flush, free memory, and close file */
2893 if (gz_comp(state
, Z_FINISH
) == -1)
2895 (void)ZLIB_PREFIX(deflateEnd
)(&(state
->strm
));
2899 if (ws_close(state
->fd
) == -1 && ret
== 0)
2906 gzwfile_geterr(GZWFILE_T state
)
2910 #endif /* USE_ZLIB_OR_ZLIBNG */
2913 /* internal lz4 file state data structure for writing */
2915 int fd
; /* file descriptor */
2916 int64_t pos
; /* current position in uncompressed data */
2918 size_t size_out
; /* buffer size, zero if not allocated yet */
2919 size_t want
; /* requested buffer size, default is LZ4BUFSIZE */
2920 size_t want_out
; /* requested output buffer size, determined from want */
2921 unsigned char *out
; /* output buffer, containing uncompressed data */
2922 int err
; /* error code */
2923 const char *err_info
; /* additional error information string for some errors */
2924 LZ4F_preferences_t lz4_prefs
;
2925 LZ4F_cctx
*lz4_cctx
;
2929 lz4wfile_open(const char *path
)
2935 fd
= ws_open(path
, O_BINARY
|O_WRONLY
|O_CREAT
|O_TRUNC
, 0666);
2938 state
= lz4wfile_fdopen(fd
);
2939 if (state
== NULL
) {
2948 lz4wfile_fdopen(int fd
)
2952 /* allocate wtap_writer structure to return */
2953 state
= (LZ4WFILE_T
)g_try_malloc(sizeof *state
);
2957 state
->size_out
= 0; /* no buffer allocated yet */
2958 state
->want
= LZ4BUFSIZE
; /* max input size (a block) */
2959 state
->want_out
= LZ4F_compressBound(state
->want
, &state
->lz4_prefs
);
2961 * This size guarantees that we will always have enough room to
2962 * write the result of LZ4F_compressUpdate (or Flush or End),
2963 * so long as the output buffer is empty (i.e., we immediately
2964 * write to the output file anything the compressor hands back
2965 * instead of buffering.)
2968 memset(&state
->lz4_prefs
, 0, sizeof(LZ4F_preferences_t
));
2969 /* Use the same prefs as the lz4 command line utility defaults. */
2970 state
->lz4_prefs
.frameInfo
.blockMode
= LZ4F_blockIndependent
; /* Allows fast seek */
2971 state
->lz4_prefs
.frameInfo
.contentChecksumFlag
= 1;
2972 state
->lz4_prefs
.frameInfo
.blockSizeID
= LZ4F_max4MB
;
2973 /* XXX - What should we set state->lz4_prefs.compressionLevel to?
2974 * The command line utility uses 1, recommends 9 as another option, and
2975 * also there's 12 (max).
2977 * We could provide an API call or perhaps two or three preset options.
2979 state
->lz4_prefs
.compressionLevel
= 1;
2981 /* initialize stream */
2982 state
->err
= 0; /* clear error */
2983 state
->err_info
= NULL
; /* clear additional error information */
2984 state
->pos
= 0; /* no uncompressed data yet */
2991 /* Writes len bytes from the output buffer to the file.
2992 * Return true on success; returns false and sets state->err on failure.
2995 lz4_write_out(LZ4WFILE_T state
, size_t len
)
2998 ssize_t got
= ws_write(state
->fd
, state
->out
, (unsigned)len
);
3003 if ((unsigned)got
!= len
) {
3004 state
->err
= WTAP_ERR_SHORT_WRITE
;
3007 state
->pos_out
+= got
;
3012 /* Initialize state for writing an lz4 file. Mark initialization by setting
3013 state->size to non-zero. Return -1, and set state->err and possibly
3014 state->err_info, on failure; return 0 on success. */
3016 lz4_init(LZ4WFILE_T state
)
3018 LZ4F_errorCode_t ret
;
3020 /* create Compression context */
3021 ret
= LZ4F_createCompressionContext(&state
->lz4_cctx
, LZ4F_VERSION
);
3022 if (LZ4F_isError(ret
)) {
3023 state
->err
= WTAP_ERR_CANT_WRITE
; // XXX - WTAP_ERR_COMPRESS?
3024 state
->err_info
= LZ4F_getErrorName(ret
);
3028 /* allocate buffer */
3029 state
->out
= (unsigned char *)g_try_malloc(state
->want_out
);
3030 if (state
->out
== NULL
) {
3032 LZ4F_freeCompressionContext(state
->lz4_cctx
);
3033 state
->err
= ENOMEM
;
3037 ret
= LZ4F_compressBegin(state
->lz4_cctx
, state
->out
, state
->want_out
, &state
->lz4_prefs
);
3038 if (LZ4F_isError(ret
)) {
3039 state
->err
= WTAP_ERR_CANT_WRITE
; // XXX - WTAP_ERR_COMPRESS?
3040 state
->err_info
= LZ4F_getErrorName(ret
);
3043 if (!lz4_write_out(state
, ret
)) {
3047 /* mark state as initialized */
3048 state
->size_out
= state
->want_out
;
3053 /* Write out len bytes from buf. Return 0, and set state->err, on
3054 failure or on an attempt to write 0 bytes (in which case state->err
3055 is 0); return the number of bytes written on success. */
3057 lz4wfile_write(LZ4WFILE_T state
, const void *buf
, size_t len
)
3062 /* check that there's no error */
3063 if (state
->err
!= 0)
3066 /* if len is zero, avoid unnecessary operations */
3070 /* allocate memory if this is the first time through */
3071 if (state
->size_out
== 0 && lz4_init(state
) == -1)
3075 to_write
= MIN(len
, state
->want
);
3076 size_t bytesWritten
= LZ4F_compressUpdate(state
->lz4_cctx
, state
->out
, state
->size_out
,
3077 buf
, to_write
, NULL
);
3078 if (LZ4F_isError(bytesWritten
)) {
3079 state
->err
= WTAP_ERR_CANT_WRITE
; // XXX - WTAP_ERR_COMPRESS?
3080 state
->err_info
= LZ4F_getErrorName(bytesWritten
);
3083 if (!lz4_write_out(state
, bytesWritten
)) {
3086 state
->pos
+= to_write
;
3090 /* input was all buffered or compressed */
3094 /* Flush out what we've written so far. Returns -1, and sets state->err,
3095 on failure; returns 0 on success. */
3097 lz4wfile_flush(LZ4WFILE_T state
)
3099 size_t bytesWritten
;
3100 /* check that there's no error */
3101 if (state
->err
!= 0)
3104 bytesWritten
= LZ4F_flush(state
->lz4_cctx
, state
->out
, state
->size_out
, NULL
);
3105 if (LZ4F_isError(bytesWritten
)) {
3106 // Should never happen if size_out >= LZ4F_compressBound(0, prefsPtr)
3107 state
->err
= WTAP_ERR_INTERNAL
;
3110 if (!lz4_write_out(state
, bytesWritten
)) {
3116 /* Flush out all data written, and close the file. Returns a Wiretap
3117 error on failure; returns 0 on success. */
3119 lz4wfile_close(LZ4WFILE_T state
)
3123 /* flush, free memory, and close file */
3124 size_t bytesWritten
= LZ4F_compressEnd(state
->lz4_cctx
, state
->out
, state
->size_out
, NULL
);
3125 if (LZ4F_isError(bytesWritten
)) {
3126 // Should never happen if size_out >= LZ4F_compressBound(0, prefsPtr)
3127 ret
= WTAP_ERR_INTERNAL
;
3129 if (!lz4_write_out(state
, bytesWritten
)) {
3133 LZ4F_freeCompressionContext(state
->lz4_cctx
);
3134 if (ws_close(state
->fd
) == -1 && ret
== 0)
3141 lz4wfile_geterr(LZ4WFILE_T state
)
3145 #endif /* USE_LZ4 */
3147 * Editor modelines - https://www.wireshark.org/tools/modelines.html
3152 * indent-tabs-mode: nil
3155 * vi: set shiftwidth=4 tabstop=8 expandtab:
3156 * :indentSize=4:tabSize=8:noTabs=true: