regen pidl all: rm epan/dissectors/pidl/*-stamp; pushd epan/dissectors/pidl/ && make...
[wireshark-sm.git] / wiretap / file_wrappers.c
blob9460bbdcd6affceafee2ff42830ea4b69ef88949
1 /* file_wrappers.c
3 * Wiretap Library
4 * Copyright (c) 1998 by Gilbert Ramirez <gram@alumni.rice.edu>
6 * SPDX-License-Identifier: GPL-2.0-or-later
7 */
9 /* file_access interface based heavily on zlib gzread.c and gzlib.c from zlib
10 * Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
11 * under licence:
13 * SPDX-License-Identifier: Zlib
17 #include "config.h"
19 #define WS_LOG_DOMAIN LOG_DOMAIN_WIRETAP
21 #include "file_wrappers.h"
23 #include <assert.h>
24 #include <errno.h>
25 #include <string.h>
26 #include "wtap-int.h"
28 #include <wsutil/file_util.h>
30 #if defined(HAVE_ZLIB) && !defined(HAVE_ZLIBNG)
31 #define USE_ZLIB_OR_ZLIBNG
32 #define ZLIB_CONST
33 #define ZLIB_PREFIX(x) x
34 #include <zlib.h>
35 typedef z_stream zlib_stream;
36 #endif /* defined(HAVE_ZLIB) && !defined(HAVE_ZLIBNG) */
38 #ifdef HAVE_ZLIBNG
39 #define USE_ZLIB_OR_ZLIBNG
40 #define HAVE_INFLATEPRIME 1
41 #define ZLIB_PREFIX(x) zng_ ## x
42 #include <zlib-ng.h>
43 typedef zng_stream zlib_stream;
44 #endif /* HAVE_ZLIBNG */
46 #ifdef HAVE_ZSTD
47 #include <zstd.h>
48 #endif /* HAVE_ZSTD */
50 #ifdef HAVE_LZ4
51 #include <lz4.h>
53 #if LZ4_VERSION_NUMBER >= 10703
54 #define USE_LZ4
55 #include <lz4frame.h>
56 #ifndef LZ4F_BLOCK_HEADER_SIZE /* Added in LZ4_VERSION_NUMBER 10902 */
57 #define LZ4F_BLOCK_HEADER_SIZE 4
58 #endif /* LZ4F_BLOCK_HEADER_SIZE */
59 #endif /* LZ4_VERSION_NUMBER >= 10703 */
60 #endif /* HAVE_LZ4 */
63 * List of compression types supported.
65 static struct compression_type {
66 wtap_compression_type type;
67 const char *extension;
68 const char *description;
69 const char *name;
70 const bool can_write_compressed;
71 } compression_types[] = {
72 #ifdef USE_ZLIB_OR_ZLIBNG
73 { WTAP_GZIP_COMPRESSED, "gz", "gzip compressed", "gzip", true },
74 #endif /* USE_ZLIB_OR_ZLIBNG */
75 #ifdef HAVE_ZSTD
76 { WTAP_ZSTD_COMPRESSED, "zst", "zstd compressed", "zstd", false },
77 #endif /* HAVE_ZSTD */
78 #ifdef USE_LZ4
79 { WTAP_LZ4_COMPRESSED, "lz4", "lz4 compressed", "lz4", true },
80 #endif /* USE_LZ4 */
81 { WTAP_UNCOMPRESSED, NULL, NULL, "none", true },
82 { WTAP_UNKNOWN_COMPRESSION, NULL, NULL, NULL, false },
85 static wtap_compression_type file_get_compression_type(FILE_T stream);
87 wtap_compression_type
88 wtap_name_to_compression_type(const char *name)
90 for (struct compression_type *p = compression_types;
91 p->type != WTAP_UNKNOWN_COMPRESSION; p++) {
92 if (!g_strcmp0(name, p->name))
93 return p->type;
95 return WTAP_UNKNOWN_COMPRESSION;
98 wtap_compression_type
99 wtap_extension_to_compression_type(const char *ext)
101 for (struct compression_type *p = compression_types;
102 p->type != WTAP_UNKNOWN_COMPRESSION; p++) {
103 if (!g_strcmp0(ext, p->extension))
104 return p->type;
106 return WTAP_UNKNOWN_COMPRESSION;
109 bool
110 wtap_can_write_compression_type(wtap_compression_type compression_type)
112 for (struct compression_type *p = compression_types; p->type != WTAP_UNKNOWN_COMPRESSION; p++) {
113 if (compression_type == p->type)
114 return p->can_write_compressed;
117 return false;
120 wtap_compression_type
121 wtap_get_compression_type(wtap *wth)
123 return file_get_compression_type((wth->fh == NULL) ? wth->random_fh : wth->fh);
126 const char *
127 wtap_compression_type_description(wtap_compression_type compression_type)
129 for (struct compression_type *p = compression_types;
130 p->type != WTAP_UNCOMPRESSED; p++) {
131 if (p->type == compression_type)
132 return p->description;
134 return NULL;
137 const char *
138 wtap_compression_type_extension(wtap_compression_type compression_type)
140 for (struct compression_type *p = compression_types;
141 p->type != WTAP_UNCOMPRESSED; p++) {
142 if (p->type == compression_type)
143 return p->extension;
145 return NULL;
148 GSList *
149 wtap_get_all_compression_type_extensions_list(void)
151 GSList *extensions;
153 extensions = NULL; /* empty list, to start with */
155 for (struct compression_type *p = compression_types;
156 p->type != WTAP_UNCOMPRESSED; p++)
157 extensions = g_slist_prepend(extensions, (void *)p->extension);
159 return extensions;
162 GSList *
163 wtap_get_all_output_compression_type_names_list(void)
165 GSList *names;
167 names = NULL; /* empty list, to start with */
169 for (struct compression_type *p = compression_types;
170 p->type != WTAP_UNCOMPRESSED; p++) {
171 if (p->can_write_compressed)
172 names = g_slist_prepend(names, (void *)p->name);
175 return names;
178 /* #define GZBUFSIZE 8192 */
179 #define GZBUFSIZE 4096
180 #define LZ4BUFSIZE 4194304 // 4MiB, maximum block size
182 /* values for wtap_reader compression */
183 typedef enum {
184 UNKNOWN, /* unknown - look for a compression header */
185 UNCOMPRESSED, /* uncompressed - copy input directly */
186 ZLIB, /* decompress a zlib stream */
187 GZIP_AFTER_HEADER,
188 ZSTD,
189 LZ4,
190 } compression_t;
193 * We limit the size of our input and output buffers to 2^30 bytes,
194 * because:
196 * 1) on Windows with MSVC, the return value of _read() is int,
197 * so the biggest read you can do is INT_MAX, and the biggest
198 * power of 2 below that is 2^30;
200 * 2) the "avail_in" and "avail_out" values in a z_stream structure
201 * in zlib are uInts, and those are unsigned ints, and that
202 * imposes a limit on the buffer size when we're reading a
203 * gzipped file.
205 * Thus, we use unsigned for the buffer sizes, offsets, amount available
206 * from the buffer, etc.
208 * If we want an even bigger buffer for uncompressed data, or for
209 * some other form of compression, then the unsigned-sized values should
210 * be in structure values used only for reading gzipped files, and
211 * other values should be used for uncompressed data or data
212 * compressed using other algorithms (e.g., in a union).
214 #define MAX_READ_BUF_SIZE (1U << 30)
216 struct wtap_reader_buf {
217 uint8_t *buf; /* buffer */
218 uint8_t *next; /* next byte to deliver from buffer */
219 unsigned avail; /* number of bytes available to deliver at next */
222 struct wtap_reader {
223 int fd; /* file descriptor */
224 int64_t raw_pos; /* current position in file (just to not call lseek()) */
225 int64_t pos; /* current position in uncompressed data */
226 unsigned size; /* buffer size */
228 struct wtap_reader_buf in; /* input buffer, containing compressed data */
229 struct wtap_reader_buf out; /* output buffer, containing uncompressed data */
231 bool eof; /* true if end of input file reached */
232 int64_t start; /* where the gzip data started, for rewinding */
233 int64_t raw; /* where the raw data started, for seeking */
234 compression_t compression; /* type of compression, if any */
235 compression_t last_compression; /* last known compression type */
236 bool is_compressed; /* false if completely uncompressed, true otherwise */
238 /* seek request */
239 int64_t skip; /* amount to skip (already rewound if backwards) */
240 bool seek_pending; /* true if seek request pending */
242 /* error information */
243 int err; /* error code */
244 const char *err_info; /* additional error information string for some errors */
247 * Decompression stream information.
249 * XXX - should this be a union?
251 #ifdef USE_ZLIB_OR_ZLIBNG
252 /* zlib inflate stream */
253 zlib_stream strm; /* stream structure in-place (not a pointer) */
254 bool dont_check_crc; /* true if we aren't supposed to check the CRC */
255 #endif /* USE_ZLIB_OR_ZLIBNG */
256 #ifdef HAVE_ZSTD
257 ZSTD_DCtx *zstd_dctx;
258 #endif /* HAVE_ZSTD */
259 #ifdef USE_LZ4
260 LZ4F_dctx *lz4_dctx;
261 LZ4F_frameInfo_t lz4_info;
262 unsigned char lz4_hdr[LZ4F_HEADER_SIZE_MAX];
263 #endif /* USE_LZ4 */
265 /* fast seeking */
266 GPtrArray *fast_seek;
267 void *fast_seek_cur;
270 /* Current read offset within a buffer. */
271 static unsigned
272 offset_in_buffer(struct wtap_reader_buf *buf)
274 /* buf->next points to the next byte to read, and buf->buf points
275 to the first byte in the buffer, so the difference between them
276 is the offset.
278 This will fit in an unsigned int, because it can't be bigger
279 than the size of the buffer, which is an unsigned int. */
280 return (unsigned)(buf->next - buf->buf);
283 /* Number of bytes of data that are in a buffer. */
284 static unsigned
285 bytes_in_buffer(struct wtap_reader_buf *buf)
287 /* buf->next + buf->avail points just past the last byte of data in
288 the buffer.
289 Thus, (buf->next + buf->avail) - buf->buf is the number of bytes
290 of data in the buffer.
292 This will fit in an unsigned, because it can't be bigger
293 than the size of the buffer, which is a unsigned. */
294 return (unsigned)((buf->next + buf->avail) - buf->buf);
297 /* Reset a buffer, discarding all data in the buffer, so we read into
298 it starting at the beginning. */
299 static void
300 buf_reset(struct wtap_reader_buf *buf)
302 buf->next = buf->buf;
303 buf->avail = 0;
306 static int
307 buf_read(FILE_T state, struct wtap_reader_buf *buf)
309 unsigned space_left, to_read;
310 unsigned char *read_ptr;
311 ssize_t ret;
313 /* How much space is left at the end of the buffer?
314 XXX - the output buffer actually has state->size * 2 bytes. */
315 space_left = state->size - bytes_in_buffer(buf);
316 if (space_left == 0) {
317 /* There's no space left, so we start fresh at the beginning
318 of the buffer. */
319 buf_reset(buf);
321 read_ptr = buf->buf;
322 to_read = state->size;
323 } else {
324 /* There's some space left; try to read as much data as we
325 can into that space. We may get less than that if we're
326 reading from a pipe or if we're near the end of the file. */
327 read_ptr = buf->next + buf->avail;
328 to_read = space_left;
331 ret = ws_read(state->fd, read_ptr, to_read);
332 if (ret < 0) {
333 state->err = errno;
334 state->err_info = NULL;
335 return -1;
337 if (ret == 0)
338 state->eof = true;
339 state->raw_pos += ret;
340 buf->avail += (unsigned)ret;
341 return 0;
344 static int /* gz_avail */
345 fill_in_buffer(FILE_T state)
347 if (state->err != 0)
348 return -1;
349 if (!state->eof) {
350 if (buf_read(state, &state->in) < 0)
351 return -1;
353 return 0;
356 #define ZLIB_WINSIZE 32768
357 #define LZ4_WINSIZE 65536
359 struct fast_seek_point {
360 int64_t out; /* corresponding offset in uncompressed data */
361 int64_t in; /* offset in input file of first full byte */
363 compression_t compression;
364 union {
365 struct {
366 #ifdef HAVE_INFLATEPRIME
367 int bits; /* number of bits (1-7) from byte at in - 1, or 0 */
368 #endif /* HAVE_INFLATEPRIME */
369 unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
371 /* be gentle with Z_STREAM_END, 8 bytes more... Another solution would be to comment checks out */
372 uint32_t adler;
373 uint32_t total_out;
374 } zlib;
375 #ifdef USE_LZ4
376 struct {
377 LZ4F_frameInfo_t lz4_info;
378 unsigned char lz4_hdr[LZ4F_HEADER_SIZE_MAX];
379 #if 0
380 unsigned char window[LZ4_WINSIZE]; /* preceding 64K of uncompressed data */
381 #endif
382 } lz4;
383 #endif
384 } data;
387 struct zlib_cur_seek_point {
388 unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
389 unsigned int pos;
390 unsigned int have;
393 #define SPAN INT64_C(1048576)
394 static struct fast_seek_point *
395 fast_seek_find(FILE_T file, int64_t pos)
397 struct fast_seek_point *smallest = NULL;
398 struct fast_seek_point *item;
399 unsigned low, i, max;
401 if (!file->fast_seek)
402 return NULL;
404 for (low = 0, max = file->fast_seek->len; low < max; ) {
405 i = (low + max) / 2;
406 item = (struct fast_seek_point *)file->fast_seek->pdata[i];
408 if (pos < item->out)
409 max = i;
410 else if (pos > item->out) {
411 smallest = item;
412 low = i + 1;
413 } else {
414 return item;
417 return smallest;
420 static void
421 fast_seek_header(FILE_T file, int64_t in_pos, int64_t out_pos,
422 compression_t compression)
424 struct fast_seek_point *item = NULL;
426 if (!file->fast_seek) {
427 return;
430 if (file->fast_seek->len != 0)
431 item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
433 /* fast_seek_header always adds a fast seek point, even if less than
434 * SPAN from the last one. That is because it used for new streams
435 * (including concatenated streams) where the compression type
436 * or, for LZ4, compression options, may change.
438 if (!item || item->out < out_pos) {
439 struct fast_seek_point *val = g_new(struct fast_seek_point,1);
440 val->in = in_pos;
441 val->out = out_pos;
442 val->compression = compression;
444 #ifdef USE_LZ4
445 if (compression == LZ4) {
446 val->data.lz4.lz4_info = file->lz4_info;
447 memcpy(val->data.lz4.lz4_hdr, file->lz4_hdr, LZ4F_HEADER_SIZE_MAX);
449 #endif /* USE_LZ4 */
450 g_ptr_array_add(file->fast_seek, val);
454 static void
455 fast_seek_reset(FILE_T state)
457 switch (state->compression) {
459 case UNKNOWN:
460 break;
462 case UNCOMPRESSED:
463 /* Nothing to do */
464 break;
466 case ZLIB:
467 #ifdef USE_ZLIB_OR_ZLIBNG
468 if (state->fast_seek_cur != NULL) {
469 struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
471 cur->have = 0;
473 #else
474 /* This "cannot happen" */
475 ws_assert_not_reached();
476 #endif /* USE_ZLIB_OR_ZLIBNG */
477 break;
479 case GZIP_AFTER_HEADER:
480 break;
482 case ZSTD:
483 #ifdef HAVE_ZSTD
484 /* Anything to do? */
485 #else
486 /* This "cannot happen" */
487 ws_assert_not_reached();
488 #endif /* HAVE_ZSTD */
489 break;
491 case LZ4:
492 #ifdef HAVE_LZ4
493 /* Anything to do? */
494 #else
495 /* This "cannot happen" */
496 ws_assert_not_reached();
497 #endif /* HAVE_LZ4 */
498 break;
500 /* Add other compression types here */
502 default:
503 /* This "cannot happen" */
504 ws_assert_not_reached();
505 break;
509 static bool
510 uncompressed_fill_out_buffer(FILE_T state)
512 if (buf_read(state, &state->out) < 0)
513 return false;
514 return true;
517 /* Get next byte from input, or -1 if end or error.
519 * Note:
521 * 1) errors from buf_read(), and thus from fill_in_buffer(), are
522 * "sticky", and fill_in_buffer() won't do any reading if there's
523 * an error;
525 * 2) GZ_GETC() returns -1 on an EOF;
527 * so it's safe to make multiple GZ_GETC() calls and only check the
528 * last one for an error. */
529 #define GZ_GETC() ((state->in.avail == 0 && fill_in_buffer(state) == -1) ? -1 : \
530 (state->in.avail == 0 ? -1 : \
531 (state->in.avail--, *(state->in.next)++)))
535 * Gzipped files, using compression from zlib or zlib-ng.
537 * https://tools.ietf.org/html/rfc1952 (RFC 1952)
539 #ifdef USE_ZLIB_OR_ZLIBNG
541 /* Get a one-byte integer and return 0 on success and the value in *ret.
542 Otherwise -1 is returned, state->err is set, and *ret is not modified. */
543 static int
544 gz_next1(FILE_T state, uint8_t *ret)
546 int ch;
548 ch = GZ_GETC();
549 if (ch == -1) {
550 if (state->err == 0) {
551 /* EOF */
552 state->err = WTAP_ERR_SHORT_READ;
553 state->err_info = NULL;
555 return -1;
557 *ret = ch;
558 return 0;
561 /* Get a two-byte little-endian integer and return 0 on success and the value
562 in *ret. Otherwise -1 is returned, state->err is set, and *ret is not
563 modified. */
564 static int
565 gz_next2(FILE_T state, uint16_t *ret)
567 uint16_t val;
568 int ch;
570 val = GZ_GETC();
571 ch = GZ_GETC();
572 if (ch == -1) {
573 if (state->err == 0) {
574 /* EOF */
575 state->err = WTAP_ERR_SHORT_READ;
576 state->err_info = NULL;
578 return -1;
580 val += (uint16_t)ch << 8;
581 *ret = val;
582 return 0;
585 /* Get a four-byte little-endian integer and return 0 on success and the value
586 in *ret. Otherwise -1 is returned, state->err is set, and *ret is not
587 modified. */
588 static int
589 gz_next4(FILE_T state, uint32_t *ret)
591 uint32_t val;
592 int ch;
594 val = GZ_GETC();
595 val += (unsigned)GZ_GETC() << 8;
596 val += (uint32_t)GZ_GETC() << 16;
597 ch = GZ_GETC();
598 if (ch == -1) {
599 if (state->err == 0) {
600 /* EOF */
601 state->err = WTAP_ERR_SHORT_READ;
602 state->err_info = NULL;
604 return -1;
606 val += (uint32_t)ch << 24;
607 *ret = val;
608 return 0;
611 /* Skip the specified number of bytes and return 0 on success. Otherwise -1
612 is returned. */
613 static int
614 gz_skipn(FILE_T state, size_t n)
616 while (n != 0) {
617 if (GZ_GETC() == -1) {
618 if (state->err == 0) {
619 /* EOF */
620 state->err = WTAP_ERR_SHORT_READ;
621 state->err_info = NULL;
623 return -1;
625 n--;
627 return 0;
630 /* Skip a null-terminated string and return 0 on success. Otherwise -1
631 is returned. */
632 static int
633 gz_skipzstr(FILE_T state)
635 int ch;
637 /* It's null-terminated, so scan until we read a byte with
638 the value 0 or get an error. */
639 while ((ch = GZ_GETC()) > 0)
641 if (ch == -1) {
642 if (state->err == 0) {
643 /* EOF */
644 state->err = WTAP_ERR_SHORT_READ;
645 state->err_info = NULL;
647 return -1;
649 return 0;
652 static void
653 zlib_fast_seek_add(FILE_T file, struct zlib_cur_seek_point *point, int bits, int64_t in_pos, int64_t out_pos)
655 /* it's for sure after gzip header, so file->fast_seek->len != 0 */
656 struct fast_seek_point *item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
658 #ifndef HAVE_INFLATEPRIME
659 if (bits)
660 return;
661 #endif /* HAVE_INFLATEPRIME */
663 /* Glib has got Balanced Binary Trees (GTree) but I couldn't find a way to do quick search for nearest (and smaller) value to seek (It's what fast_seek_find() do)
664 * Inserting value in middle of sorted array is expensive, so we want to add only in the end.
665 * It's not big deal, cause first-read don't usually invoke seeking
667 if (item->out + SPAN < out_pos) {
668 struct fast_seek_point *val = g_new(struct fast_seek_point,1);
669 val->in = in_pos;
670 val->out = out_pos;
671 val->compression = ZLIB;
672 #ifdef HAVE_INFLATEPRIME
673 val->data.zlib.bits = bits;
674 #endif /* HAVE_INFLATEPRIME */
675 if (point->pos != 0) {
676 unsigned int left = ZLIB_WINSIZE - point->pos;
678 memcpy(val->data.zlib.window, point->window + point->pos, left);
679 memcpy(val->data.zlib.window + left, point->window, point->pos);
680 } else
681 memcpy(val->data.zlib.window, point->window, ZLIB_WINSIZE);
684 * XXX - strm.adler is a uLong in at least some versions
685 * of zlib, and uLong is an unsigned long in at least
686 * some of those versions, which means it's 64-bit
687 * on LP64 platforms, even though the checksum is
688 * 32-bit. We assume the actual Adler checksum
689 * is in the lower 32 bits of strm.adler; as the
690 * checksum in the file is only 32 bits, we save only
691 * those lower 32 bits, and cast away any additional
692 * bits to squelch warnings.
694 * The same applies to strm.total_out.
696 val->data.zlib.adler = (uint32_t) file->strm.adler;
697 val->data.zlib.total_out = (uint32_t) file->strm.total_out;
698 g_ptr_array_add(file->fast_seek, val);
703 * Based on what gz_decomp() in zlib does.
705 static void
706 zlib_fill_out_buffer(FILE_T state)
708 int ret = 0; /* XXX */
709 uint32_t crc, len;
710 #ifdef HAVE_ZLIBNG
711 zng_streamp strm = &(state->strm);
712 #else /* HAVE_ZLIBNG */
713 z_streamp strm = &(state->strm);
714 #endif /* HAVE_ZLIBNG */
715 unsigned char *buf = state->out.buf;
716 unsigned int count = state->size << 1;
718 unsigned char *buf2 = buf;
719 unsigned int count2 = count;
721 strm->avail_out = count;
722 strm->next_out = buf;
724 /* fill output buffer up to end of deflate stream or error */
725 do {
726 /* get more input for inflate() */
727 if (state->in.avail == 0 && fill_in_buffer(state) == -1)
728 break;
729 if (state->in.avail == 0) {
730 /* EOF */
731 state->err = WTAP_ERR_SHORT_READ;
732 state->err_info = NULL;
733 break;
736 strm->avail_in = state->in.avail;
737 strm->next_in = state->in.next;
738 /* decompress and handle errors */
739 #ifdef Z_BLOCK
740 ret = ZLIB_PREFIX(inflate)(strm, Z_BLOCK);
741 #else /* Z_BLOCK */
742 ret = ZLIB_PREFIX(inflate)(strm, Z_NO_FLUSH);
743 #endif /* Z_BLOCK */
744 state->in.avail = strm->avail_in;
745 #ifdef z_const
746 DIAG_OFF(cast-qual)
747 state->in.next = (unsigned char *)strm->next_in;
748 DIAG_ON(cast-qual)
749 #else /* z_const */
750 state->in.next = strm->next_in;
751 #endif /* z_const */
752 if (ret == Z_STREAM_ERROR) {
753 state->err = WTAP_ERR_DECOMPRESS;
754 state->err_info = strm->msg;
755 break;
757 if (ret == Z_NEED_DICT) {
758 state->err = WTAP_ERR_DECOMPRESS;
759 state->err_info = "preset dictionary needed";
760 break;
762 if (ret == Z_MEM_ERROR) {
763 /* This means "not enough memory". */
764 state->err = ENOMEM;
765 state->err_info = NULL;
766 break;
768 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
769 state->err = WTAP_ERR_DECOMPRESS;
770 state->err_info = strm->msg;
771 break;
774 * XXX - Z_BUF_ERROR?
777 strm->adler = ZLIB_PREFIX(crc32)(strm->adler, buf2, count2 - strm->avail_out);
778 #ifdef Z_BLOCK
779 if (state->fast_seek_cur != NULL) {
780 struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
781 unsigned int ready = count2 - strm->avail_out;
783 if (ready < ZLIB_WINSIZE) {
784 unsigned left = ZLIB_WINSIZE - cur->pos;
786 if (ready >= left) {
787 memcpy(cur->window + cur->pos, buf2, left);
788 if (ready != left)
789 memcpy(cur->window, buf2 + left, ready - left);
791 cur->pos = ready - left;
792 cur->have += ready;
793 } else {
794 memcpy(cur->window + cur->pos, buf2, ready);
795 cur->pos += ready;
796 cur->have += ready;
799 if (cur->have >= ZLIB_WINSIZE)
800 cur->have = ZLIB_WINSIZE;
802 } else {
803 memcpy(cur->window, buf2 + (ready - ZLIB_WINSIZE), ZLIB_WINSIZE);
804 cur->pos = 0;
805 cur->have = ZLIB_WINSIZE;
808 if (cur->have >= ZLIB_WINSIZE && ret != Z_STREAM_END && (strm->data_type & 128) && !(strm->data_type & 64))
809 zlib_fast_seek_add(state, cur, (strm->data_type & 7), state->raw_pos - strm->avail_in, state->pos + (count - strm->avail_out));
811 #endif /* Z_BLOCK */
812 buf2 = (buf2 + count2 - strm->avail_out);
813 count2 = strm->avail_out;
815 } while (strm->avail_out && ret != Z_STREAM_END);
817 /* update available output and crc check value */
818 state->out.next = buf;
819 state->out.avail = count - strm->avail_out;
821 /* Check gzip trailer if at end of deflate stream.
822 We don't fail immediately here, we just set an error
823 indication, so that we try to process what data we
824 got before the error. The next attempt to read
825 something past that data will get the error. */
826 if (ret == Z_STREAM_END) {
827 if (gz_next4(state, &crc) != -1 &&
828 gz_next4(state, &len) != -1) {
829 if (crc != strm->adler && !state->dont_check_crc) {
830 state->err = WTAP_ERR_DECOMPRESS;
831 state->err_info = "bad CRC";
832 } else if (len != (strm->total_out & 0xffffffffUL)) {
833 state->err = WTAP_ERR_DECOMPRESS;
834 state->err_info = "length field wrong";
837 state->last_compression = state->compression;
838 state->compression = UNKNOWN; /* ready for next stream, once have is 0 */
839 g_free(state->fast_seek_cur);
840 state->fast_seek_cur = NULL;
843 #endif /* USE_ZLIB_OR_ZLIBNG */
846 * Check for a gzip header.
848 * Based on the gzip-specific stuff gz_head() from zlib does.
850 static int
851 check_for_zlib_compression(FILE_T state)
854 * Look for the gzip header. The first two bytes are 31 and 139,
855 * and if we find it, return success if we support gzip and an
856 * error if we don't.
858 if (state->in.next[0] == 31) {
859 state->in.avail--;
860 state->in.next++;
862 /* Make sure the byte after the first byte is present */
863 if (state->in.avail == 0 && fill_in_buffer(state) == -1) {
864 /* Read error. */
865 return -1;
867 if (state->in.avail != 0) {
868 if (state->in.next[0] == 139) {
870 * We have what looks like the ID1 and ID2 bytes of a gzip
871 * header.
872 * Continue processing the file.
874 * XXX - some capture file formats (I'M LOOKING AT YOU,
875 * ENDACE!) can have 31 in the first byte of the file
876 * and 139 in the second byte of the file. For now, in
877 * those cases, you lose.
879 #ifdef USE_ZLIB_OR_ZLIBNG
880 uint8_t cm;
881 uint8_t flags;
882 uint16_t len;
883 uint16_t hcrc;
885 state->in.avail--;
886 state->in.next++;
888 /* read rest of header */
890 /* compression method (CM) */
891 if (gz_next1(state, &cm) == -1)
892 return -1;
893 if (cm != 8) {
894 state->err = WTAP_ERR_DECOMPRESS;
895 state->err_info = "unknown compression method";
896 return -1;
899 /* flags (FLG) */
900 if (gz_next1(state, &flags) == -1) {
901 /* Read error. */
902 return -1;
904 if (flags & 0xe0) { /* reserved flag bits */
905 state->err = WTAP_ERR_DECOMPRESS;
906 state->err_info = "reserved flag bits set";
907 return -1;
910 /* modification time (MTIME) */
911 if (gz_skipn(state, 4) == -1) {
912 /* Read error. */
913 return -1;
916 /* extra flags (XFL) */
917 if (gz_skipn(state, 1) == -1) {
918 /* Read error. */
919 return -1;
922 /* operating system (OS) */
923 if (gz_skipn(state, 1) == -1) {
924 /* Read error. */
925 return -1;
928 if (flags & 4) {
929 /* extra field - get XLEN */
930 if (gz_next2(state, &len) == -1) {
931 /* Read error. */
932 return -1;
935 /* skip the extra field */
936 if (gz_skipn(state, len) == -1) {
937 /* Read error. */
938 return -1;
941 if (flags & 8) {
942 /* file name */
943 if (gz_skipzstr(state) == -1) {
944 /* Read error. */
945 return -1;
948 if (flags & 16) {
949 /* comment */
950 if (gz_skipzstr(state) == -1) {
951 /* Read error. */
952 return -1;
955 if (flags & 2) {
956 /* header crc */
957 if (gz_next2(state, &hcrc) == -1) {
958 /* Read error. */
959 return -1;
961 /* XXX - check the CRC? */
964 /* set up for decompression */
965 ZLIB_PREFIX(inflateReset)(&(state->strm));
966 state->strm.adler = ZLIB_PREFIX(crc32)(0L, Z_NULL, 0);
967 state->compression = ZLIB;
968 state->is_compressed = true;
969 #ifdef Z_BLOCK
970 if (state->fast_seek) {
971 struct zlib_cur_seek_point *cur = g_new(struct zlib_cur_seek_point,1);
973 cur->pos = cur->have = 0;
974 g_free(state->fast_seek_cur);
975 state->fast_seek_cur = cur;
976 fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, GZIP_AFTER_HEADER);
978 #endif /* Z_BLOCK */
979 return 1;
980 #else /* USE_ZLIB_OR_ZLIBNG */
981 state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
982 state->err_info = "reading gzip-compressed files isn't supported";
983 return -1;
984 #endif /* USE_ZLIB_OR_ZLIBNG */
988 * Not a gzip file. "Unget" the first character; either:
990 * 1) we read both of the first two bytes into the
991 * buffer with the first ws_read, so we can just back
992 * up by one byte;
994 * 2) we only read the first byte into the buffer with
995 * the first ws_read (e.g., because we're reading from
996 * a pipe and only the first byte had been written to
997 * the pipe at that point), and read the second byte
998 * into the buffer after the first byte in the
999 * fill_in_buffer call, so we now have two bytes in
1000 * the buffer, and can just back up by one byte.
1002 state->in.avail++;
1003 state->in.next--;
1006 return 0;
1011 * Zstandard compression.
1013 * https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
1015 #ifdef HAVE_ZSTD
1016 static bool
1017 zstd_fill_out_buffer(FILE_T state)
1019 ws_assert(state->out.avail == 0);
1021 if (state->in.avail == 0 && fill_in_buffer(state) == -1)
1022 return false;
1024 ZSTD_outBuffer output = {state->out.buf, state->size << 1, 0};
1025 ZSTD_inBuffer input = {state->in.next, state->in.avail, 0};
1026 const size_t ret = ZSTD_decompressStream(state->zstd_dctx, &output, &input);
1027 if (ZSTD_isError(ret)) {
1028 state->err = WTAP_ERR_DECOMPRESS;
1029 state->err_info = ZSTD_getErrorName(ret);
1030 return false;
1033 state->in.next = state->in.next + input.pos;
1034 state->in.avail -= (unsigned)input.pos;
1036 state->out.next = output.dst;
1037 state->out.avail = (unsigned)output.pos;
1039 if (ret == 0) {
1040 state->last_compression = state->compression;
1041 state->compression = UNKNOWN;
1043 return true;
1045 #endif /* HAVE_ZSTD */
1048 * Check for a Zstandard header.
1050 static int
1051 check_for_zstd_compression(FILE_T state)
1054 * Look for the Zstandard header, and, if we find it, return
1055 * success if we support Zstandard and an error if we don't.
1057 if (state->in.avail >= 4
1058 && state->in.next[0] == 0x28 && state->in.next[1] == 0xb5
1059 && state->in.next[2] == 0x2f && state->in.next[3] == 0xfd) {
1060 #ifdef HAVE_ZSTD
1061 const size_t ret = ZSTD_initDStream(state->zstd_dctx);
1062 if (ZSTD_isError(ret)) {
1063 state->err = WTAP_ERR_DECOMPRESS;
1064 state->err_info = ZSTD_getErrorName(ret);
1065 return -1;
1068 fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, ZSTD);
1069 state->compression = ZSTD;
1070 state->is_compressed = true;
1071 return 1;
1072 #else /* HAVE_ZSTD */
1073 state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
1074 state->err_info = "reading zstd-compressed files isn't supported";
1075 return -1;
1076 #endif /* HAVE_ZSTD */
1078 return 0;
1082 * lz4 compression.
1084 * https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
1086 #ifdef USE_LZ4
1087 static void
1088 lz4_fast_seek_add(FILE_T file, struct zlib_cur_seek_point *point _U_, int64_t in_pos, int64_t out_pos)
1090 if (!file->fast_seek) {
1091 return;
1094 struct fast_seek_point *item = NULL;
1096 if (file->fast_seek->len != 0)
1097 item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
1099 /* As of Glib 2.68 GTree has g_tree_upper_bound, or we could use a
1100 * wmem_tree. However, since our initial read is usually sequential
1101 * only adding seek points at the end of the ptr array is fast and fine.
1104 /* don't bother adding jump points between very small blocks (min SPAN) */
1105 if (!item || item->out + SPAN < out_pos) {
1106 struct fast_seek_point *val = g_new(struct fast_seek_point,1);
1107 val->in = in_pos;
1108 val->out = out_pos;
1109 val->compression = LZ4;
1110 #if 0
1111 if (point->pos != 0) {
1112 unsigned int left = LZ4_WINSIZE - point->pos;
1114 memcpy(val->data.zlib.window, point->window + point->pos, left);
1115 memcpy(val->data.zlib.window + left, point->window, point->pos);
1116 } else
1117 memcpy(val->data.zlib.window, point->window, ZZ4_WINSIZE);
1118 #endif
1120 val->data.lz4.lz4_info = file->lz4_info;
1121 memcpy(val->data.lz4.lz4_hdr, file->lz4_hdr, LZ4F_HEADER_SIZE_MAX);
1122 g_ptr_array_add(file->fast_seek, val);
1126 static bool
1127 lz4_fill_out_buffer(FILE_T state)
1129 ws_assert(state->out.avail == 0);
1131 if (state->in.avail == 0 && fill_in_buffer(state) == -1)
1132 return false;
1135 * We should be at the start of a block. First, determine the size of
1136 * the block. We tell LZ4F_decompress that there's no room to put
1137 * the decompressed block; this will make it read the block size
1138 * header and stop, returning the size of the block (plus next
1139 * header) as hint of how much input to hand next.
1142 size_t outBufSize = 0; // Zero so we don't actually consume the block
1143 size_t inBufSize;
1145 size_t compressedSize = 0;
1147 do {
1148 /* get more input for decompress() */
1149 if (state->in.avail == 0 && fill_in_buffer(state) == -1)
1150 break;
1151 if (state->eof) {
1152 state->err = WTAP_ERR_SHORT_READ;
1153 state->err_info = NULL;
1154 break;
1157 inBufSize = state->in.avail;
1158 compressedSize = LZ4F_decompress(state->lz4_dctx, state->out.buf, &outBufSize, state->in.next, &inBufSize, NULL);
1160 if (LZ4F_isError(compressedSize)) {
1161 state->err = WTAP_ERR_DECOMPRESS;
1162 state->err_info = LZ4F_getErrorName(compressedSize);
1163 return false;
1166 state->in.next += (unsigned)inBufSize;
1167 state->in.avail -= (unsigned)inBufSize;
1169 if (compressedSize == 0) {
1170 /* End of Frame */
1171 state->last_compression = state->compression;
1172 state->compression = UNKNOWN;
1173 return true;
1176 ws_assert(outBufSize == 0);
1178 } while (compressedSize < LZ4F_BLOCK_HEADER_SIZE);
1181 * We don't want to add a fast seek point for the end of frame,
1182 * especially if there's another frame or other stream after it,
1183 * which would have the same out position. So add it after the
1184 * reading the block size - but point to where the block size
1185 * is so that we'll fast seek to the block size again.
1187 if (state->lz4_info.blockMode == LZ4F_blockIndependent) {
1189 * XXX - If state->lz4_info.blockMode == LZ4F_blockLinked, it doesn't
1190 * seem like the LZ4 Frame API can handle this, we would need to use
1191 * the low level Block API and pass the last 64KiB window of data to
1192 * LZ4_setStreamDecode and use LZ4_decompress_safe_continue (similar
1193 * to gzip). So for now we can't do fast seek with it (we do add one
1194 * header at the frame beginning so that concatenated frames and other
1195 * decompression streams work.)
1197 lz4_fast_seek_add(state, NULL, state->raw_pos - state->in.avail - LZ4F_BLOCK_HEADER_SIZE, state->pos);
1200 // Now actually read the entire next block - but not the next header
1201 compressedSize -= LZ4F_BLOCK_HEADER_SIZE;
1202 state->out.next = state->out.buf;
1204 if (compressedSize > state->size) {
1206 * What is this? Either bogus, or some new variant of LZ4 Frames with
1207 * a larger block size we don't support. We could have a buffer
1208 * overrun if we try to process it.
1210 * TODO - We could realloc here.
1212 state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
1213 state->err_info = "lz4 compressed block size too large";
1214 return false;
1217 size_t ret;
1218 do {
1219 /* get more input for decompress() */
1220 if (state->in.avail == 0 && fill_in_buffer(state) == -1)
1221 break;
1222 if (state->eof) {
1223 state->err = WTAP_ERR_SHORT_READ;
1224 state->err_info = NULL;
1225 break;
1228 outBufSize = (state->size << 1) - offset_in_buffer(&state->out);
1229 inBufSize = MIN(state->in.avail, compressedSize);
1230 ret = LZ4F_decompress(state->lz4_dctx, state->out.next, &outBufSize, state->in.next, &inBufSize, NULL);
1232 if (LZ4F_isError(ret)) {
1233 state->err = WTAP_ERR_DECOMPRESS;
1234 state->err_info = LZ4F_getErrorName(ret);
1235 return false;
1237 state->in.next += (unsigned)inBufSize;
1238 state->in.avail -= (unsigned)inBufSize;
1239 compressedSize -= inBufSize;
1241 state->out.next += (unsigned)outBufSize;
1242 state->out.avail += (unsigned)outBufSize;
1243 } while (compressedSize != 0);
1245 state->out.next = state->out.buf;
1247 #if 0
1248 /* This is an alternative implementation using the lower-level
1249 * LZ4 Block API. Doing something like this might be necessary
1250 * to handle linked blocks, because the Frame API doesn't have
1251 * a method to reset the dictionary / window.
1253 int outBufSize = state->size << 1;
1254 uint32_t compressedSize;
1255 if (gz_next4(state, &compressedSize) == -1) {
1256 return false;
1258 if (compressedSize == 0) {
1259 /* EndMark */
1260 if (state->lz4_info.contentChecksumFlag) {
1261 uint32_t xxHash;
1262 if (gz_next4(state, &xxHash) == -1) {
1263 return false;
1265 /* XXX - check hash? */
1267 state->last_compression = state->compression;
1268 state->compression = UNKNOWN;
1269 return true;
1271 bool uncompressed = compressedSize >> 31;
1272 compressedSize &= 0x7FFFFFFF;
1273 if (compressedSize > state->size) {
1274 // TODO - we could realloc here
1275 state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
1276 state->err_info = "lz4 compressed block size too large";
1277 return false;
1281 * We have to read an entire block as we're using the low-level
1282 * Block API instead of the LZ4 Frame API.
1284 if (compressedSize > (unsigned)state->in.avail) {
1285 memmove(state->in.buf, state->in.next, state->in.avail);
1286 state->in.next = state->in.buf;
1287 while ((unsigned)state->in.avail < compressedSize) {
1288 if (state->eof) {
1289 state->err = WTAP_ERR_SHORT_READ;
1290 state->err_info = NULL;
1291 return false;
1293 if (fill_in_buffer(state) == -1) {
1294 return false;
1299 int decompressedSize;
1300 if (uncompressed) {
1301 memcpy(state->out.buf, state->in.buf, compressedSize);
1302 decompressedSize = compressedSize;
1303 } else {
1304 decompressedSize = LZ4_decompress_safe(state->in.next, state->out.buf, compressedSize, outBufSize);
1305 //const size_t ret = LZ4F_decompress(state->lz4_dctx, state->out.buf, &outBufSize, state->in.next, &inBufSize, NULL);
1306 if (LZ4F_isError(decompressedSize)) {
1307 state->err = WTAP_ERR_DECOMPRESS;
1308 state->err_info = LZ4F_getErrorName(decompressedSize);
1309 return false;
1314 * We assume LZ4F_decompress() will not set inBufSize to a
1315 * value > state->in.avail.
1317 state->in.next += compressedSize;
1318 state->in.avail -= compressedSize;
1320 state->out.next = state->out.buf;
1321 state->out.avail = (unsigned)decompressedSize;
1323 if (state->lz4_info.blockChecksumFlag == LZ4F_blockChecksumEnabled) {
1324 uint32_t xxHash;
1325 if (gz_next4(state, &xxHash) == -1) {
1326 return false;
1328 /* XXX - check hash? */
1330 #endif
1331 return true;
1333 #endif /* USE_LZ4 */
1336 * Check for an lz4 header.
1338 static int
1339 check_for_lz4_compression(FILE_T state)
1342 * Look for the lz4 header, and, if we find it, return success
1343 * if we support lz4 and an error if we don't.
1345 if (state->in.avail >= 4
1346 && state->in.next[0] == 0x04 && state->in.next[1] == 0x22
1347 && state->in.next[2] == 0x4d && state->in.next[3] == 0x18) {
1348 #ifdef USE_LZ4
1349 #if LZ4_VERSION_NUMBER >= 10800
1350 LZ4F_resetDecompressionContext(state->lz4_dctx);
1351 #else /* LZ4_VERSION_NUMBER >= 10800 */
1352 LZ4F_freeDecompressionContext(state->lz4_dctx);
1353 const LZ4F_errorCode_t ret = LZ4F_createDecompressionContext(&state->lz4_dctx, LZ4F_VERSION);
1354 if (LZ4F_isError(ret)) {
1355 state->err = WTAP_ERR_INTERNAL;
1356 state->err_info = LZ4F_getErrorName(ret);
1357 return -1;
1359 #endif /* LZ4_VERSION_NUMBER >= 10800 */
1360 size_t headerSize = LZ4F_HEADER_SIZE_MAX;
1361 #if LZ4_VERSION_NUMBER >= 10903
1363 * In 1.9.3+ we can handle a silly edge case of a tiny valid
1364 * frame at the end of a file that is smaller than the maximum
1365 * header size. (lz4frame.h added the function in 1.9.0, but
1366 * only for the static library; it wasn't exported until 1.9.3)
1368 while (state->in.avail < LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH) {
1369 if (fill_in_buffer(state) == -1) {
1370 return -1;
1372 if (state->eof) {
1373 state->err = WTAP_ERR_SHORT_READ;
1374 state->err_info = NULL;
1375 return 0;
1378 headerSize = LZ4F_headerSize(state->in.next, state->in.avail);
1379 if (LZ4F_isError(headerSize)) {
1380 state->err = WTAP_ERR_DECOMPRESS;
1381 state->err_info = LZ4F_getErrorName(headerSize);
1382 return -1;
1384 #endif /* LZ4_VERSION_NUMBER >= 10903 */
1385 while (state->in.avail < headerSize) {
1386 if (fill_in_buffer(state) == -1) {
1387 return -1;
1389 if (state->eof) {
1390 state->err = WTAP_ERR_SHORT_READ;
1391 state->err_info = NULL;
1392 return 0;
1395 size_t inBufSize = state->in.avail;
1396 memcpy(state->lz4_hdr, state->in.next, headerSize);
1397 const LZ4F_errorCode_t err = LZ4F_getFrameInfo(state->lz4_dctx, &state->lz4_info, state->in.next, &inBufSize);
1398 if (LZ4F_isError(err)) {
1399 state->err = WTAP_ERR_DECOMPRESS;
1400 state->err_info = LZ4F_getErrorName(err);
1401 return -1;
1405 * XXX - We could check state->lz4_info.blockSizeID here, and
1406 * only realloc the buffers to a larger value if the max
1407 * block size is bigger than state->size. Also we could fail
1408 * on unknown values?
1410 state->in.avail -= (unsigned)inBufSize;
1411 state->in.next += (unsigned)inBufSize;
1413 fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, LZ4);
1414 state->compression = LZ4;
1415 state->is_compressed = true;
1416 return 1;
1417 #else /* USE_LZ4 */
1418 state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
1419 state->err_info = "reading lz4-compressed files isn't supported";
1420 return -1;
1421 #endif /* USE_LZ4 */
1423 return 0;
1426 typedef int (*compression_type_test)(FILE_T);
1428 static compression_type_test const compression_type_tests[] = {
1429 check_for_zlib_compression,
1430 check_for_zstd_compression,
1431 check_for_lz4_compression,
1435 * Used when we haven't yet determined whether we have a compressed file
1436 * and, if we do, what sort of compressed file it is.
1438 * Based on the non-gzip-specific stuff that gz_head() from zlib does.
1440 static int
1441 check_for_compression(FILE_T state)
1444 * If this isn't the first frame / compressed stream, ensure that
1445 * we're starting at the beginning of the buffer. This shouldn't
1446 * get called much.
1448 * This is to avoid edge cases where a previous frame finished but
1449 * state->in.next is close to the end of the buffer so there isn't
1450 * much room to put the start of the next frame.
1451 * This also lets us put back bytes if things go wrong.
1453 if (state->in.next != state->in.buf) {
1454 memmove(state->in.buf, state->in.next, state->in.avail);
1455 state->in.next = state->in.buf;
1458 /* get some data in the input buffer */
1459 if (state->in.avail == 0) {
1460 if (fill_in_buffer(state) == -1)
1461 return -1;
1462 if (state->in.avail == 0)
1463 return 0;
1467 * Check for the compression types we support.
1469 for (size_t i = 0; i < G_N_ELEMENTS(compression_type_tests); i++) {
1470 int ret;
1472 ret = compression_type_tests[i](state);
1473 if (ret == -1)
1474 return -1; /* error */
1475 if (ret == 1)
1476 return 0; /* found it */
1480 * Some other compressed file formats we might want to support:
1482 * XZ format:
1483 * https://tukaani.org/xz/
1484 * https://github.com/tukaani-project/xz
1485 * https://github.com/tukaani-project/xz/blob/master/doc/xz-file-format.txt
1487 * Bzip2 format:
1488 * https://www.sourceware.org/bzip2/
1489 * https://gitlab.com/bzip2/bzip2/
1490 * https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
1491 * (GitHub won't render it; download and open it)
1493 * Lzip format:
1494 * https://www.nongnu.org/lzip/
1498 * We didn't see anything that looks like a header for any type of
1499 * compressed file that we support, so just do uncompressed I/O.
1501 * XXX - This fast seek data is for the case where a compressed stream
1502 * ends and is followed by an uncompressed portion. It only works if
1503 * the uncompressed portion is at the end, as we don't constantly scan
1504 * for magic bytes in the middle of uncompressed data. (Concatenated
1505 * compressed streams _do_ work, even streams of different compression types.)
1507 if (state->fast_seek)
1508 fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, UNCOMPRESSED);
1511 /* doing raw i/o, save start of raw data for seeking, copy any leftover
1512 input to output -- this assumes that the output buffer is larger than
1513 the input buffer, which also assures space for gzungetc() */
1514 state->raw = state->pos;
1515 state->out.next = state->out.buf;
1516 /* not a compressed file -- copy everything we've read into the
1517 input buffer to the output buffer and fall to raw i/o */
1518 if (state->in.avail) {
1519 memcpy(state->out.buf, state->in.next, state->in.avail);
1520 state->out.avail = state->in.avail;
1522 /* Now discard everything in the input buffer */
1523 buf_reset(&state->in);
1525 state->compression = UNCOMPRESSED;
1526 return 0;
1530 * Based on what gz_make() in zlib does.
1532 static int
1533 fill_out_buffer(FILE_T state)
1535 if (state->compression == UNKNOWN) {
1537 * We don't yet know whether the file is compressed,
1538 * so check for a compressed-file header.
1540 if (check_for_compression(state) == -1)
1541 return -1;
1542 if (state->out.avail != 0) /* got some data from check_for_compression() */
1543 return 0;
1547 * We got no data from check_for_compression(), or we didn't call
1548 * it as we already know the compression type, so read some more
1549 * data.
1551 switch (state->compression) {
1553 case UNCOMPRESSED:
1554 /* straight copy */
1555 if (!uncompressed_fill_out_buffer(state))
1556 return -1;
1557 break;
1559 #ifdef USE_ZLIB_OR_ZLIBNG
1560 case ZLIB:
1561 /* zlib (gzip) decompress */
1562 zlib_fill_out_buffer(state);
1563 break;
1564 #endif /* USE_ZLIB_OR_ZLIBNG */
1566 #ifdef HAVE_ZSTD
1567 case ZSTD:
1568 /* zstd decompress */
1569 if (!zstd_fill_out_buffer(state))
1570 return -1;
1571 break;
1572 #endif /* HAVE_ZSTD */
1574 #ifdef USE_LZ4
1575 case LZ4:
1576 /* lz4 decompress */
1577 if (!lz4_fill_out_buffer(state))
1578 return -1;
1579 break;
1580 #endif /* USE_LZ4 */
1582 default:
1583 /* Unknown compression type; keep reading */
1584 break;
1586 return 0;
1589 static int
1590 gz_skip(FILE_T state, int64_t len)
1592 unsigned n;
1594 /* skip over len bytes or reach end-of-file, whichever comes first */
1595 while (len)
1596 if (state->out.avail != 0) {
1597 /* We have stuff in the output buffer; skip over
1598 it. */
1599 n = (int64_t)state->out.avail > len ? (unsigned)len : state->out.avail;
1600 state->out.avail -= n;
1601 state->out.next += n;
1602 state->pos += n;
1603 len -= n;
1604 } else if (state->err != 0) {
1605 /* We have nothing in the output buffer, and
1606 we have an error that may not have been
1607 reported yet; that means we can't generate
1608 any more data into the output buffer, so
1609 return an error indication. */
1610 return -1;
1611 } else if (state->eof && state->in.avail == 0) {
1612 /* We have nothing in the output buffer, and
1613 we're at the end of the input; just return. */
1614 break;
1615 } else {
1616 /* We have nothing in the output buffer, and
1617 we can generate more data; get more output,
1618 looking for header if required. */
1619 if (fill_out_buffer(state) == -1)
1620 return -1;
1622 return 0;
1625 static void
1626 gz_reset(FILE_T state)
1628 buf_reset(&state->out); /* no output data available */
1629 state->eof = false; /* not at end of file */
1630 state->compression = UNKNOWN; /* look for compression header */
1632 state->seek_pending = false; /* no seek request pending */
1633 state->err = 0; /* clear error */
1634 state->err_info = NULL;
1635 state->pos = 0; /* no uncompressed data yet */
1636 buf_reset(&state->in); /* no input data yet */
1639 FILE_T
1640 file_fdopen(int fd)
1643 * XXX - we now check whether we have st_blksize in struct stat;
1644 * it's not available on all platforms.
1646 * I'm not sure why we're testing _STATBUF_ST_BLKSIZE; it's not
1647 * set on all platforms that have st_blksize in struct stat.
1648 * (Not all platforms have st_blksize in struct stat.)
1650 * Is there some reason *not* to make the buffer size the maximum
1651 * of GBUFSIZE and st_blksize? On most UN*Xes, the standard I/O
1652 * library does I/O with st_blksize as the buffer size; on others,
1653 * and on Windows, it's a 4K buffer size. If st_blksize is bigger
1654 * than GBUFSIZE (which is currently 4KB), that's probably a
1655 * hint that reading in st_blksize chunks is considered a good
1656 * idea (e.g., an 8K/1K Berkeley fast file system with st_blksize
1657 * being 8K, or APFS, where st_blksize is big on at least some
1658 * versions of macOS).
1660 #ifdef _STATBUF_ST_BLKSIZE
1661 ws_statb64 st;
1662 #endif /* _STATBUF_ST_BLKSIZE */
1663 #ifdef HAVE_ZSTD
1664 size_t zstd_buf_size;
1665 #endif /* HAVE_ZSTD */
1666 unsigned want = GZBUFSIZE;
1667 FILE_T state;
1668 #ifdef USE_LZ4
1669 size_t ret;
1670 #endif /* USE_LZ4 */
1672 if (fd == -1)
1673 return NULL;
1675 /* allocate FILE_T structure to return */
1676 state = (FILE_T)g_try_malloc0(sizeof *state);
1677 if (state == NULL)
1678 return NULL;
1680 state->fast_seek_cur = NULL;
1681 state->fast_seek = NULL;
1683 /* open the file with the appropriate mode (or just use fd) */
1684 state->fd = fd;
1686 /* we don't yet know whether it's compressed */
1687 state->is_compressed = false;
1688 state->last_compression = UNKNOWN;
1690 /* save the current position for rewinding (only if reading) */
1691 state->start = ws_lseek64(state->fd, 0, SEEK_CUR);
1692 if (state->start == -1) state->start = 0;
1693 state->raw_pos = state->start;
1695 /* initialize stream */
1696 gz_reset(state);
1698 #ifdef _STATBUF_ST_BLKSIZE
1700 * See what I/O size the file system recommends using, and if
1701 * it's bigger than what we're using and isn't too big, use
1702 * it.
1704 if (ws_fstat64(fd, &st) >= 0) {
1706 * Yes, st_blksize can be bigger than an int; apparently,
1707 * it's a long on LP64 Linux, for example.
1709 * If the value is too big to fit into a unsigned,
1710 * just use the maximum read buffer size.
1712 * On top of that, the Single UNIX Speification says that
1713 * st_blksize is of type blksize_t, which is a *signed*
1714 * integer type, and, at minimum, macOS 11.6 and Linux 5.14.11's
1715 * include/uapi/asm-generic/stat.h define it as such.
1717 * However, other OSes might make it unsigned, and older versions
1718 * of OSes that currently make it signed might make it unsigned,
1719 * so we try to avoid warnings from that.
1721 * We cast MAX_READ_BUF_SIZE to long in order to avoid the
1722 * warning, although it might introduce warnings on platforms
1723 * where st_blocksize is unsigned; we'll deal with that if
1724 * it ever shows up as an issue.
1726 * MAX_READ_BUF_SIZE is < the largest *signed* 32-bt integer,
1727 * so casting it to long won't turn it into a negative number.
1728 * (We only support 32-bit and 64-bit 2's-complement platforms.)
1730 if (st.st_blksize <= (long)MAX_READ_BUF_SIZE)
1731 want = (unsigned)st.st_blksize;
1732 else
1733 want = MAX_READ_BUF_SIZE;
1734 /* XXX, verify result? */
1736 #endif /* _STATBUF_ST_BLKSIZE */
1737 #ifdef HAVE_ZSTD
1738 /* we should have separate input and output buf sizes */
1739 zstd_buf_size = ZSTD_DStreamInSize();
1740 if (zstd_buf_size > want) {
1741 if (zstd_buf_size <= MAX_READ_BUF_SIZE)
1742 want = (unsigned)zstd_buf_size;
1743 else
1744 want = MAX_READ_BUF_SIZE;
1746 zstd_buf_size = ZSTD_DStreamOutSize();
1747 if (zstd_buf_size > want) {
1748 if (zstd_buf_size <= MAX_READ_BUF_SIZE)
1749 want = (unsigned)zstd_buf_size;
1750 else
1751 want = MAX_READ_BUF_SIZE;
1753 #endif /* HAVE_ZSTD */
1754 #ifdef USE_LZ4
1755 if (LZ4BUFSIZE > want) {
1756 if (LZ4BUFSIZE <= MAX_READ_BUF_SIZE) {
1757 want = LZ4BUFSIZE;
1758 } else {
1759 goto err;
1762 #endif /* USE_LZ4 */
1764 /* allocate buffers */
1765 state->in.buf = (unsigned char *)g_try_malloc(want);
1766 state->in.next = state->in.buf;
1767 state->in.avail = 0;
1768 state->out.buf = (unsigned char *)g_try_malloc(want << 1);
1769 state->out.next = state->out.buf;
1770 state->out.avail = 0;
1771 state->size = want;
1772 if (state->in.buf == NULL || state->out.buf == NULL) {
1773 goto err;
1776 #ifdef USE_ZLIB_OR_ZLIBNG
1777 /* allocate inflate memory */
1778 state->strm.zalloc = Z_NULL;
1779 state->strm.zfree = Z_NULL;
1780 state->strm.opaque = Z_NULL;
1781 state->strm.avail_in = 0;
1782 state->strm.next_in = Z_NULL;
1783 if (ZLIB_PREFIX(inflateInit2)(&(state->strm), -15) != Z_OK) { /* raw inflate */
1784 goto err;
1787 /* for now, assume we should check the crc */
1788 state->dont_check_crc = false;
1789 #endif /* USE_ZLIB_OR_ZLIBNG */
1791 #ifdef HAVE_ZSTD
1792 state->zstd_dctx = ZSTD_createDCtx();
1793 if (state->zstd_dctx == NULL) {
1794 goto err;
1796 #endif /* HAVE_ZSTD */
1798 #ifdef USE_LZ4
1799 ret = LZ4F_createDecompressionContext(&state->lz4_dctx, LZ4F_VERSION);
1800 if (LZ4F_isError(ret)) {
1801 goto err;
1803 #endif /* USE_LZ4 */
1805 /* return stream */
1806 return state;
1808 err:
1809 #ifdef USE_ZLIB_OR_ZLIBNG
1810 ZLIB_PREFIX(inflateEnd)(&state->strm);
1811 #endif /* USE_ZLIB_OR_ZLIBNG */
1812 #ifdef HAVE_ZSTD
1813 ZSTD_freeDCtx(state->zstd_dctx);
1814 #endif /* HAVE_ZSTD */
1815 #ifdef USE_LZ4
1816 LZ4F_freeDecompressionContext(state->lz4_dctx);
1817 #endif /* USE_LZ4 */
1818 g_free(state->out.buf);
1819 g_free(state->in.buf);
1820 g_free(state);
1821 errno = ENOMEM;
1822 return NULL;
1825 FILE_T
1826 file_open(const char *path)
1828 int fd;
1829 FILE_T ft;
1830 #ifdef USE_ZLIB_OR_ZLIBNG
1831 const char *suffixp;
1832 #endif /* USE_ZLIB_OR_ZLIBNG */
1834 /* open file and do correct filename conversions.
1836 XXX - do we need O_LARGEFILE? On UN*X, if we need to do
1837 something special to get large file support, the configure
1838 script should have set us up with the appropriate #defines,
1839 so we should be getting a large-file-enabled file descriptor
1840 here. Pre-Large File Summit UN*Xes, and possibly even some
1841 post-LFS UN*Xes, might require O_LARGEFILE here, though.
1842 If so, we should probably handle that in ws_open(). */
1843 if ((fd = ws_open(path, O_RDONLY|O_BINARY, 0000)) == -1)
1844 return NULL;
1846 /* open file handle */
1847 ft = file_fdopen(fd);
1848 if (ft == NULL) {
1849 ws_close(fd);
1850 return NULL;
1853 #ifdef USE_ZLIB_OR_ZLIBNG
1855 * If this file's name ends in ".caz", it's probably a compressed
1856 * Windows Sniffer file. The compression is gzip, but if we
1857 * process the CRC as specified by RFC 1952, the computed CRC
1858 * doesn't match the stored CRC.
1860 * Compressed Windows Sniffer files don't all have the same CRC
1861 * value; is it just random crap, or are they running the CRC on
1862 * a different set of data than you're supposed to (e.g., not
1863 * CRCing some of the data), or something such as that?
1865 * For now, we just set a flag to ignore CRC errors.
1867 suffixp = strrchr(path, '.');
1868 if (suffixp != NULL) {
1869 if (g_ascii_strcasecmp(suffixp, ".caz") == 0)
1870 ft->dont_check_crc = true;
1872 #endif /* USE_ZLIB_OR_ZLIBNG */
1874 return ft;
1877 void
1878 file_set_random_access(FILE_T stream, bool random_flag _U_, GPtrArray *seek)
1880 stream->fast_seek = seek;
1883 int64_t
1884 file_seek(FILE_T file, int64_t offset, int whence, int *err)
1886 struct fast_seek_point *here;
1887 unsigned n;
1889 if (whence != SEEK_SET && whence != SEEK_CUR && whence != SEEK_END) {
1890 ws_assert_not_reached();
1892 *err = EINVAL;
1893 return -1;
1897 /* Normalize offset to a SEEK_CUR specification */
1898 if (whence == SEEK_END) {
1899 /* Seek relative to the end of the file; given that we might be
1900 reading from a compressed file, we do that by seeking to the
1901 end of the file, making an offset relative to the end of
1902 the file an offset relative to the current position.
1904 XXX - we don't actually use this yet, but, for uncompressed
1905 files, we could optimize it, if desired, by directly using
1906 ws_lseek64(). */
1907 if (gz_skip(file, INT64_MAX) == -1) {
1908 *err = file->err;
1909 return -1;
1911 if (offset == 0) {
1912 /* We are done */
1913 return file->pos;
1915 } else if (whence == SEEK_SET)
1916 offset -= file->pos;
1917 else if (file->seek_pending) {
1918 /* There's a forward-skip pending, so file->pos doesn't reflect
1919 the actual file position, it represents the position from
1920 which we're skipping; update the offset to include that. */
1921 offset += file->skip;
1923 file->seek_pending = false;
1926 * Are we moving at all?
1928 if (offset == 0) {
1929 /* No. Just return the current position. */
1930 return file->pos;
1934 * Are we seeking backwards?
1936 if (offset < 0) {
1938 * Yes.
1940 * Do we have enough data before the current position in the
1941 * buffer that we can seek backwards within the buffer?
1943 if (-offset <= offset_in_buffer(&file->out)) {
1945 * Yes. Adjust appropriately.
1947 * offset is negative, so -offset is non-negative, and
1948 * -offset is <= an unsigned and thus fits in an unsigned.
1949 * Get that value and adjust appropriately.
1951 * (Casting offset to unsigned makes it positive, which
1952 * is not what we would want, so we cast -offset instead.)
1954 * XXX - this won't work with -offset = 2^63, as its
1955 * negative isn't a valid 64-bit integer, but we are
1956 * not at all likely to see files big enough to ever
1957 * see a negative offset that large.
1959 unsigned adjustment = (unsigned)(-offset);
1961 file->out.avail += adjustment;
1962 file->out.next -= adjustment;
1963 file->pos -= adjustment;
1964 return file->pos;
1966 } else {
1968 * No. Offset is positive; we're seeking forwards.
1970 * Do we have enough data after the current position in the
1971 * buffer that we can seek forwards within the buffer?
1973 if (offset < file->out.avail) {
1975 * Yes. Adjust appropriately.
1977 * offset is < an unsigned and thus fits in an unsigned,
1978 * so we can cast it to unsigned safely.
1980 file->out.avail -= (unsigned)offset;
1981 file->out.next += offset;
1982 file->pos += offset;
1983 return file->pos;
1988 * We're not seeking within the buffer. Do we have "fast seek" data
1989 * for the location to which we will be seeking, and are we either
1990 * seeking backwards or is the fast seek point past what is in the
1991 * buffer? (We don't want to "fast seek" backwards to a point that
1992 * we've already read and buffered if we're actually seeking forwards.)
1994 * It might in certain cases be faster to continue reading linearly
1995 * foward rather than jump to the fast seek point if the distance
1996 * to the fast seek point is small, but we might only be able to do that
1997 * if the compression context doesn't change (which for LZ4 includes if
1998 * we jump to a LZ4 with different options.)
1999 * XXX - profile different buffer and SPAN sizes
2001 if ((here = fast_seek_find(file, file->pos + offset)) &&
2002 (offset < 0 || here->out >= file->pos + file->out.avail)) {
2003 int64_t off, off2;
2006 * Yes. Use that data to do the seek.
2007 * Note that this will be true only if file_set_random_access()
2008 * has been called on this file, which should never be the case
2009 * for a pipe.
2011 switch (here->compression) {
2013 #ifdef USE_ZLIB_OR_ZLIBNG
2014 case ZLIB:
2015 #ifdef HAVE_INFLATEPRIME
2016 off = here->in - (here->data.zlib.bits ? 1 : 0);
2017 #else /* HAVE_INFLATEPRIME */
2018 off = here->in;
2019 #endif /* HAVE_INFLATEPRIME */
2020 off2 = here->out;
2021 break;
2023 case GZIP_AFTER_HEADER:
2024 off = here->in;
2025 off2 = here->out;
2026 break;
2027 #endif /* USE_ZLIB_OR_ZLIBNG */
2029 #ifdef USE_LZ4
2030 case LZ4:
2031 ws_debug("fast seek lz4");
2032 off = here->in;
2033 off2 = here->out;
2034 break;
2035 #endif /* USE_LZ4 */
2037 case UNCOMPRESSED:
2038 /* In an uncompressed portion, seek directly to the offset */
2039 off2 = (file->pos + offset);
2040 off = here->in + (off2 - here->out);
2041 break;
2043 default:
2044 /* Otherwise, seek to the fast seek point to do any needed setup. */
2045 off = here->in;
2046 off2 = here->out;
2047 break;
2050 if (ws_lseek64(file->fd, off, SEEK_SET) == -1) {
2051 *err = errno;
2052 return -1;
2054 fast_seek_reset(file);
2056 file->raw_pos = off;
2057 buf_reset(&file->out);
2058 file->eof = false;
2059 file->seek_pending = false;
2060 file->err = 0;
2061 file->err_info = NULL;
2062 buf_reset(&file->in);
2064 switch (here->compression) {
2066 #ifdef USE_ZLIB_OR_ZLIBNG
2067 case ZLIB: {
2068 zlib_stream*strm = &file->strm;
2069 ZLIB_PREFIX(inflateReset)(strm);
2070 strm->adler = here->data.zlib.adler;
2071 strm->total_out = here->data.zlib.total_out;
2072 #ifdef HAVE_INFLATEPRIME
2073 if (here->data.zlib.bits) {
2074 FILE_T state = file;
2075 int ret = GZ_GETC();
2077 if (ret == -1) {
2078 if (state->err == 0) {
2079 /* EOF */
2080 *err = WTAP_ERR_SHORT_READ;
2081 } else
2082 *err = state->err;
2083 return -1;
2085 (void)ZLIB_PREFIX(inflatePrime)(strm, here->data.zlib.bits, ret >> (8 - here->data.zlib.bits));
2087 #endif /* HAVE_INFLATEPRIME */
2088 (void)ZLIB_PREFIX(inflateSetDictionary)(strm, here->data.zlib.window, ZLIB_WINSIZE);
2089 file->compression = ZLIB;
2090 break;
2093 case GZIP_AFTER_HEADER: {
2094 zlib_stream* strm = &file->strm;
2095 ZLIB_PREFIX(inflateReset)(strm);
2096 strm->adler = ZLIB_PREFIX(crc32)(0L, Z_NULL, 0);
2097 file->compression = ZLIB;
2098 break;
2100 #endif /* USE_ZLIB_OR_ZLIBNG */
2102 #ifdef USE_LZ4
2103 case LZ4:
2104 /* If the frame information seems to have changed (i.e., we fast
2105 * seeked into a different frame that also has different flags
2106 * and options), then reset the context and re-read it.
2107 * Unfortunately the API doesn't provide a method to set the
2108 * context options explicitly based on an already read
2109 * LZ4F_frameInfo_t.
2111 if (memcmp(&file->lz4_info, &here->data.lz4.lz4_info, sizeof(LZ4F_frameInfo_t)) != 0) {
2112 #if LZ4_VERSION_NUMBER >= 10800
2113 LZ4F_resetDecompressionContext(file->lz4_dctx);
2114 #else /* LZ4_VERSION_NUMBER >= 10800 */
2115 LZ4F_freeDecompressionContext(file->lz4_dctx);
2116 const LZ4F_errorCode_t ret = LZ4F_createDecompressionContext(&file->lz4_dctx, LZ4F_VERSION);
2117 if (LZ4F_isError(ret)) {
2118 file->err = WTAP_ERR_INTERNAL;
2119 file->err_info = LZ4F_getErrorName(ret);
2120 return -1;
2122 #endif /* LZ4_VERSION_NUMBER >= 10800 */
2123 size_t hdr_size = LZ4F_HEADER_SIZE_MAX;
2124 const LZ4F_errorCode_t frame_err = LZ4F_getFrameInfo(file->lz4_dctx, &file->lz4_info, here->data.lz4.lz4_hdr, &hdr_size);
2125 if (LZ4F_isError(frame_err)) {
2126 file->err = WTAP_ERR_DECOMPRESS;
2127 file->err_info = LZ4F_getErrorName(frame_err);
2128 return -1;
2131 file->lz4_info = here->data.lz4.lz4_info;
2132 file->compression = LZ4;
2133 break;
2134 #endif /* USE_LZ4 */
2136 #ifdef HAVE_ZSTD
2137 case ZSTD:
2139 const size_t ret = ZSTD_initDStream(file->zstd_dctx);
2140 if (ZSTD_isError(ret)) {
2141 file->err = WTAP_ERR_DECOMPRESS;
2142 file->err_info = ZSTD_getErrorName(ret);
2143 return -1;
2145 file->compression = ZSTD;
2146 break;
2148 #endif /* HAVE_ZSTD */
2150 default:
2151 file->compression = here->compression;
2152 break;
2155 offset = (file->pos + offset) - off2;
2156 file->pos = off2;
2157 ws_debug("Fast seek OK! %"PRId64, offset);
2159 if (offset) {
2160 /* Don't skip forward yet, wait until we want to read from
2161 the file; that way, if we do multiple seeks in a row,
2162 all involving forward skips, they will be combined. */
2163 file->seek_pending = true;
2164 file->skip = offset;
2166 return file->pos + offset;
2170 * Is this an uncompressed file, are we within the raw area,
2171 * are we either seeking backwards or seeking past the end
2172 * of the buffer, and are we set up for random access with
2173 * file_set_random_access()?
2175 * Again, note that this will never be true on a pipe, as
2176 * file_set_random_access() should never be called if we're
2177 * reading from a pipe.
2179 if (file->compression == UNCOMPRESSED && file->pos + offset >= file->raw
2180 && (offset < 0 || offset >= file->out.avail)
2181 && (file->fast_seek != NULL))
2184 * Yes. Just seek there within the file.
2186 if (ws_lseek64(file->fd, offset - file->out.avail, SEEK_CUR) == -1) {
2187 *err = errno;
2188 return -1;
2190 file->raw_pos += (offset - file->out.avail);
2191 buf_reset(&file->out);
2192 file->eof = false;
2193 file->seek_pending = false;
2194 file->err = 0;
2195 file->err_info = NULL;
2196 buf_reset(&file->in);
2197 file->pos += offset;
2198 return file->pos;
2202 * Are we seeking backwards?
2204 if (offset < 0) {
2206 * Yes. We have no fast seek data, so we have to rewind and
2207 * seek forward.
2208 * XXX - true only for compressed files.
2210 * Calculate the amount to skip forward after rewinding.
2212 offset += file->pos;
2213 if (offset < 0) { /* before start of file! */
2214 *err = EINVAL;
2215 return -1;
2217 /* rewind, then skip to offset */
2219 /* back up and start over */
2220 if (ws_lseek64(file->fd, file->start, SEEK_SET) == -1) {
2221 *err = errno;
2222 return -1;
2224 fast_seek_reset(file);
2225 file->raw_pos = file->start;
2226 gz_reset(file);
2230 * Either we're seeking backwards, but have rewound and now need to
2231 * skip forwards, or we're seeking forwards.
2233 * Skip what's in output buffer (one less gzgetc() check).
2235 n = (int64_t)file->out.avail > offset ? (unsigned)offset : file->out.avail;
2236 file->out.avail -= n;
2237 file->out.next += n;
2238 file->pos += n;
2239 offset -= n;
2241 /* request skip (if not zero) */
2242 if (offset) {
2243 /* Don't skip forward yet, wait until we want to read from
2244 the file; that way, if we do multiple seeks in a row,
2245 all involving forward skips, they will be combined. */
2246 file->seek_pending = true;
2247 file->skip = offset;
2249 return file->pos + offset;
2252 int64_t
2253 file_tell(FILE_T stream)
2255 /* return position */
2256 return stream->pos + (stream->seek_pending ? stream->skip : 0);
2259 int64_t
2260 file_tell_raw(FILE_T stream)
2262 return stream->raw_pos;
2266 file_fstat(FILE_T stream, ws_statb64 *statb, int *err)
2268 if (ws_fstat64(stream->fd, statb) == -1) {
2269 if (err != NULL)
2270 *err = errno;
2271 return -1;
2273 return 0;
2276 bool
2277 file_iscompressed(FILE_T stream)
2279 return stream->is_compressed;
2282 /* Returns a wtap compression type. If we don't know the compression type,
2283 * return WTAP_UNCOMPRESSED, but if our compression state is temporarily
2284 * UNKNOWN because we need to reread compression headers, return the last
2285 * known compression type.
2287 static wtap_compression_type
2288 file_get_compression_type(FILE_T stream)
2290 if (stream->is_compressed) {
2291 switch ((stream->compression == UNKNOWN) ? stream->last_compression : stream->compression) {
2293 case ZLIB:
2294 case GZIP_AFTER_HEADER:
2295 return WTAP_GZIP_COMPRESSED;
2297 case ZSTD:
2298 return WTAP_ZSTD_COMPRESSED;
2300 case LZ4:
2301 return WTAP_LZ4_COMPRESSED;
2303 case UNCOMPRESSED:
2304 return WTAP_UNCOMPRESSED;
2306 default: /* UNKNOWN, should never happen if is_compressed is set */
2307 ws_assert_not_reached();
2308 return WTAP_UNCOMPRESSED;
2311 return WTAP_UNCOMPRESSED;
2315 file_read(void *buf, unsigned int len, FILE_T file)
2317 unsigned got, n;
2319 /* if len is zero, avoid unnecessary operations */
2320 if (len == 0)
2321 return 0;
2323 /* process a skip request */
2324 if (file->seek_pending) {
2325 file->seek_pending = false;
2326 if (gz_skip(file, file->skip) == -1)
2327 return -1;
2331 * Get len bytes to buf, or less than len if at the end;
2332 * if buf is null, just throw the bytes away.
2334 got = 0;
2335 do {
2336 if (file->out.avail != 0) {
2337 /* We have stuff in the output buffer; copy
2338 what we have. */
2339 n = file->out.avail > len ? len : file->out.avail;
2340 if (buf != NULL) {
2341 memcpy(buf, file->out.next, n);
2342 buf = (char *)buf + n;
2344 file->out.next += n;
2345 file->out.avail -= n;
2346 len -= n;
2347 got += n;
2348 file->pos += n;
2349 } else if (file->err != 0) {
2350 /* We have nothing in the output buffer, and
2351 we have an error that may not have been
2352 reported yet; that means we can't generate
2353 any more data into the output buffer, so
2354 return an error indication. */
2355 return -1;
2356 } else if (file->eof && file->in.avail == 0) {
2357 /* We have nothing in the output buffer, and
2358 we're at the end of the input; just return
2359 with what we've gotten so far. */
2360 break;
2361 } else {
2362 /* We have nothing in the output buffer, and
2363 we can generate more data; get more output,
2364 looking for header if required, and
2365 keep looping to process the new stuff
2366 in the output buffer. */
2367 if (fill_out_buffer(file) == -1)
2368 return -1;
2370 } while (len);
2372 return (int)got;
2376 * XXX - this *peeks* at next byte, not a character.
2379 file_peekc(FILE_T file)
2381 int ret = 0;
2383 /* check that we're reading and that there's no error */
2384 if (file->err != 0)
2385 return -1;
2387 /* try output buffer (no need to check for skip request) */
2388 if (file->out.avail != 0) {
2389 return *(file->out.next);
2392 /* process a skip request */
2393 if (file->seek_pending) {
2394 file->seek_pending = false;
2395 if (gz_skip(file, file->skip) == -1)
2396 return -1;
2398 /* if we processed a skip request, there may be data in the buffer,
2399 * or an error could have occurred; likewise if we didn't do seek but
2400 * now call fill_out_buffer, the errors can occur. So we do this while
2401 * loop to check before and after - this is basically the logic from
2402 * file_read() but only for peeking not consuming a byte
2404 while (1) {
2405 if (file->out.avail != 0) {
2406 return *(file->out.next);
2408 else if (file->err != 0) {
2409 return -1;
2411 else if (file->eof && file->in.avail == 0) {
2412 return -1;
2414 else if (fill_out_buffer(file) == -1) {
2415 return -1;
2418 /* it's actually impossible to get here */
2419 return ret;
2423 * XXX - this gets a byte, not a character.
2426 file_getc(FILE_T file)
2428 unsigned char buf[1];
2429 int ret;
2431 /* check that we're reading and that there's no error */
2432 if (file->err != 0)
2433 return -1;
2435 /* try output buffer (no need to check for skip request) */
2436 if (file->out.avail != 0) {
2437 file->out.avail--;
2438 file->pos++;
2439 return *(file->out.next)++;
2442 ret = file_read(buf, 1, file);
2443 return ret < 1 ? -1 : buf[0];
2447 * Like file_gets, but returns a pointer to the terminating NUL
2448 * on success and NULL on failure.
2450 char *
2451 file_getsp(char *buf, int len, FILE_T file)
2453 unsigned left, n;
2454 char *curp;
2455 unsigned char *eol;
2457 /* check parameters */
2458 if (buf == NULL || len < 1)
2459 return NULL;
2461 /* check that there's no error */
2462 if (file->err != 0)
2463 return NULL;
2465 /* process a skip request */
2466 if (file->seek_pending) {
2467 file->seek_pending = false;
2468 if (gz_skip(file, file->skip) == -1)
2469 return NULL;
2472 /* copy output bytes up to new line or len - 1, whichever comes first --
2473 append a terminating zero to the string (we don't check for a zero in
2474 the contents, let the user worry about that) */
2475 curp = buf;
2476 left = (unsigned)len - 1;
2477 if (left) do {
2478 /* assure that something is in the output buffer */
2479 if (file->out.avail == 0) {
2480 /* We have nothing in the output buffer. */
2481 if (file->err != 0) {
2482 /* We have an error that may not have
2483 been reported yet; that means we
2484 can't generate any more data into
2485 the output buffer, so return an
2486 error indication. */
2487 return NULL;
2489 if (fill_out_buffer(file) == -1)
2490 return NULL; /* error */
2491 if (file->out.avail == 0) { /* end of file */
2492 if (curp == buf) /* got bupkus */
2493 return NULL;
2494 break; /* got something -- return it */
2498 /* look for end-of-line in current output buffer */
2499 n = file->out.avail > left ? left : file->out.avail;
2500 eol = (unsigned char *)memchr(file->out.next, '\n', n);
2501 if (eol != NULL)
2502 n = (unsigned)(eol - file->out.next) + 1;
2504 /* copy through end-of-line, or remainder if not found */
2505 memcpy(curp, file->out.next, n);
2506 file->out.avail -= n;
2507 file->out.next += n;
2508 file->pos += n;
2509 left -= n;
2510 curp += n;
2511 } while (left && eol == NULL);
2513 /* found end-of-line or out of space -- add a terminator and return
2514 a pointer to it */
2515 *curp = '\0';
2516 return curp;
2520 * Returns a pointer to the beginning of the buffer on success
2521 * and NULL on failure.
2523 char *
2524 file_gets(char *buf, int len, FILE_T file)
2526 if (!file_getsp(buf, len, file)) return NULL;
2527 return buf;
2531 file_eof(FILE_T file)
2533 /* return end-of-file state */
2534 return (file->eof && file->in.avail == 0 && file->out.avail == 0);
2538 * Routine to return a Wiretap error code (0 for no error, an errno
2539 * for a file error, or a WTAP_ERR_ code for other errors) for an
2540 * I/O stream. Also returns an error string for some errors.
2543 file_error(FILE_T fh, char **err_info)
2545 if (fh->err!=0 && err_info) {
2546 /* g_strdup() returns NULL for NULL argument */
2547 *err_info = g_strdup(fh->err_info);
2549 return fh->err;
2552 void
2553 file_clearerr(FILE_T stream)
2555 /* clear error and end-of-file */
2556 stream->err = 0;
2557 stream->err_info = NULL;
2558 stream->eof = false;
2561 void
2562 file_fdclose(FILE_T file)
2564 if (file->fd != -1)
2565 ws_close(file->fd);
2566 file->fd = -1;
2569 bool
2570 file_fdreopen(FILE_T file, const char *path)
2572 int fd;
2574 if ((fd = ws_open(path, O_RDONLY|O_BINARY, 0000)) == -1)
2575 return false;
2576 file->fd = fd;
2577 return true;
2580 void
2581 file_close(FILE_T file)
2583 int fd = file->fd;
2585 /* free memory and close file */
2586 if (file->size) {
2587 #ifdef USE_ZLIB_OR_ZLIBNG
2588 ZLIB_PREFIX(inflateEnd)(&(file->strm));
2589 #endif /* USE_ZLIB_OR_ZLIBNG */
2590 #ifdef HAVE_ZSTD
2591 ZSTD_freeDCtx(file->zstd_dctx);
2592 #endif /* HAVE_ZSTD */
2593 #ifdef USE_LZ4
2594 LZ4F_freeDecompressionContext(file->lz4_dctx);
2595 #endif /* USE_LZ4 */
2596 g_free(file->out.buf);
2597 g_free(file->in.buf);
2599 g_free(file->fast_seek_cur);
2600 file->err = 0;
2601 file->err_info = NULL;
2602 g_free(file);
2604 * If fd is -1, somebody's done a file_closefd() on us, so
2605 * we don't need to close the FD itself, and shouldn't do
2606 * so.
2608 if (fd != -1)
2609 ws_close(fd);
2612 #ifdef USE_ZLIB_OR_ZLIBNG
2613 /* internal gzip file state data structure for writing */
2614 struct wtap_writer {
2615 int fd; /* file descriptor */
2616 int64_t pos; /* current position in uncompressed data */
2617 unsigned size; /* buffer size, zero if not allocated yet */
2618 unsigned want; /* requested buffer size, default is GZBUFSIZE */
2619 unsigned char *in; /* input buffer */
2620 unsigned char *out; /* output buffer (double-sized when reading) */
2621 unsigned char *next; /* next output data to deliver or write */
2622 int level; /* compression level */
2623 int strategy; /* compression strategy */
2624 int err; /* error code */
2625 const char *err_info; /* additional error information string for some errors */
2626 /* zlib deflate stream */
2627 zlib_stream strm; /* stream structure in-place (not a pointer) */
2630 GZWFILE_T
2631 gzwfile_open(const char *path)
2633 int fd;
2634 GZWFILE_T state;
2635 int save_errno;
2637 fd = ws_open(path, O_BINARY|O_WRONLY|O_CREAT|O_TRUNC, 0666);
2638 if (fd == -1)
2639 return NULL;
2640 state = gzwfile_fdopen(fd);
2641 if (state == NULL) {
2642 save_errno = errno;
2643 ws_close(fd);
2644 errno = save_errno;
2646 return state;
2649 GZWFILE_T
2650 gzwfile_fdopen(int fd)
2652 GZWFILE_T state;
2654 /* allocate wtap_writer structure to return */
2655 state = (GZWFILE_T)g_try_malloc(sizeof *state);
2656 if (state == NULL)
2657 return NULL;
2658 state->fd = fd;
2659 state->size = 0; /* no buffers allocated yet */
2660 state->want = GZBUFSIZE; /* requested buffer size */
2662 state->level = Z_DEFAULT_COMPRESSION;
2663 state->strategy = Z_DEFAULT_STRATEGY;
2665 /* initialize stream */
2666 state->err = Z_OK; /* clear error */
2667 state->err_info = NULL; /* clear additional error information */
2668 state->pos = 0; /* no uncompressed data yet */
2669 state->strm.avail_in = 0; /* no input data yet */
2671 /* return stream */
2672 return state;
2675 /* Initialize state for writing a gzip file. Mark initialization by setting
2676 state->size to non-zero. Return -1, and set state->err and possibly
2677 state->err_info, on failure; return 0 on success. */
2678 static int
2679 gz_init(GZWFILE_T state)
2681 int ret;
2682 #ifdef HAVE_ZLIBNG
2683 zng_streamp strm = &(state->strm);
2684 #else /* HAVE_ZLIBNG */
2685 z_streamp strm = &(state->strm);
2686 #endif /* HAVE_ZLIBNG */
2688 /* allocate input and output buffers */
2689 state->in = (unsigned char *)g_try_malloc(state->want);
2690 state->out = (unsigned char *)g_try_malloc(state->want);
2691 if (state->in == NULL || state->out == NULL) {
2692 g_free(state->out);
2693 g_free(state->in);
2694 state->err = ENOMEM;
2695 return -1;
2698 /* allocate deflate memory, set up for gzip compression */
2699 strm->zalloc = Z_NULL;
2700 strm->zfree = Z_NULL;
2701 strm->opaque = Z_NULL;
2702 ret = ZLIB_PREFIX(deflateInit2)(strm, state->level, Z_DEFLATED,
2703 15 + 16, 8, state->strategy);
2704 if (ret != Z_OK) {
2705 g_free(state->out);
2706 g_free(state->in);
2707 if (ret == Z_MEM_ERROR) {
2708 /* This means "not enough memory". */
2709 state->err = ENOMEM;
2710 } else {
2711 /* This "shouldn't happen". */
2712 state->err = WTAP_ERR_INTERNAL;
2713 state->err_info = "Unknown error from deflateInit2()";
2715 return -1;
2718 /* mark state as initialized */
2719 state->size = state->want;
2721 /* initialize write buffer */
2722 strm->avail_out = state->size;
2723 strm->next_out = state->out;
2724 state->next = strm->next_out;
2725 return 0;
2728 /* Compress whatever is at avail_in and next_in and write to the output file.
2729 Return -1, and set state->err and possibly state->err_info, if there is
2730 an error writing to the output file; return 0 on success.
2731 flush is assumed to be a valid deflate() flush value. If flush is Z_FINISH,
2732 then the deflate() state is reset to start a new gzip stream. */
2733 static int
2734 gz_comp(GZWFILE_T state, int flush)
2736 int ret;
2737 ssize_t got;
2738 ptrdiff_t have;
2739 #ifdef HAVE_ZLIBNG
2740 zng_streamp strm = &(state->strm);
2741 #else /* HAVE_ZLIBNG */
2742 z_streamp strm = &(state->strm);
2743 #endif /* HAVE_ZLIBNG */
2744 /* allocate memory if this is the first time through */
2745 if (state->size == 0 && gz_init(state) == -1)
2746 return -1;
2748 /* run deflate() on provided input until it produces no more output */
2749 ret = Z_OK;
2750 do {
2751 /* write out current buffer contents if full, or if flushing, but if
2752 doing Z_FINISH then don't write until we get to Z_STREAM_END */
2753 if (strm->avail_out == 0 || (flush != Z_NO_FLUSH &&
2754 (flush != Z_FINISH || ret == Z_STREAM_END))) {
2755 have = strm->next_out - state->next;
2756 if (have) {
2757 got = ws_write(state->fd, state->next, (unsigned int)have);
2758 if (got < 0) {
2759 state->err = errno;
2760 return -1;
2762 if ((ptrdiff_t)got != have) {
2763 state->err = WTAP_ERR_SHORT_WRITE;
2764 return -1;
2767 if (strm->avail_out == 0) {
2768 strm->avail_out = state->size;
2769 strm->next_out = state->out;
2771 state->next = strm->next_out;
2774 /* compress */
2775 have = strm->avail_out;
2776 ret = ZLIB_PREFIX(deflate)(strm, flush);
2777 if (ret == Z_STREAM_ERROR) {
2778 /* This "shouldn't happen". */
2779 state->err = WTAP_ERR_INTERNAL;
2780 state->err_info = "Z_STREAM_ERROR from deflate()";
2781 return -1;
2783 have -= strm->avail_out;
2784 } while (have);
2786 /* if that completed a deflate stream, allow another to start */
2787 if (flush == Z_FINISH)
2788 ZLIB_PREFIX(deflateReset)(strm);
2790 /* all done, no errors */
2791 return 0;
2794 /* Write out len bytes from buf. Return 0, and set state->err, on
2795 failure or on an attempt to write 0 bytes (in which case state->err
2796 is Z_OK); return the number of bytes written on success. */
2797 unsigned
2798 gzwfile_write(GZWFILE_T state, const void *buf, unsigned len)
2800 unsigned put = len;
2801 unsigned n;
2802 #ifdef HAVE_ZLIBNG
2803 zng_streamp strm;
2804 #else /* HAVE_ZLIBNG */
2805 z_streamp strm;
2806 #endif /* HAVE_ZLIBNG */
2808 strm = &(state->strm);
2810 /* check that there's no error */
2811 if (state->err != Z_OK)
2812 return 0;
2814 /* if len is zero, avoid unnecessary operations */
2815 if (len == 0)
2816 return 0;
2818 /* allocate memory if this is the first time through */
2819 if (state->size == 0 && gz_init(state) == -1)
2820 return 0;
2822 /* for small len, copy to input buffer, otherwise compress directly */
2823 if (len < state->size) {
2824 /* copy to input buffer, compress when full */
2825 do {
2826 if (strm->avail_in == 0)
2827 strm->next_in = state->in;
2828 n = state->size - strm->avail_in;
2829 if (n > len)
2830 n = len;
2831 #ifdef z_const
2832 DIAG_OFF(cast-qual)
2833 memcpy((Bytef *)strm->next_in + strm->avail_in, buf, n);
2834 DIAG_ON(cast-qual)
2835 #else /* z_const */
2836 memcpy(strm->next_in + strm->avail_in, buf, n);
2837 #endif /* z_const */
2838 strm->avail_in += n;
2839 state->pos += n;
2840 buf = (const char *)buf + n;
2841 len -= n;
2842 if (len && gz_comp(state, Z_NO_FLUSH) == -1)
2843 return 0;
2844 } while (len);
2846 else {
2847 /* consume whatever's left in the input buffer */
2848 if (strm->avail_in != 0 && gz_comp(state, Z_NO_FLUSH) == -1)
2849 return 0;
2851 /* directly compress user buffer to file */
2852 strm->avail_in = len;
2853 #ifdef z_const
2854 strm->next_in = (z_const Bytef *)buf;
2855 #else /* z_const */
2856 DIAG_OFF(cast-qual)
2857 strm->next_in = (Bytef *)buf;
2858 DIAG_ON(cast-qual)
2859 #endif /* z_const */
2860 state->pos += len;
2861 if (gz_comp(state, Z_NO_FLUSH) == -1)
2862 return 0;
2865 /* input was all buffered or compressed (put will fit in int) */
2866 return (int)put;
2869 /* Flush out what we've written so far. Returns -1, and sets state->err,
2870 on failure; returns 0 on success. */
2872 gzwfile_flush(GZWFILE_T state)
2874 /* check that there's no error */
2875 if (state->err != Z_OK)
2876 return -1;
2878 /* compress remaining data with Z_SYNC_FLUSH */
2879 gz_comp(state, Z_SYNC_FLUSH);
2880 if (state->err != Z_OK)
2881 return -1;
2882 return 0;
2885 /* Flush out all data written, and close the file. Returns a Wiretap
2886 error on failure; returns 0 on success. */
2888 gzwfile_close(GZWFILE_T state)
2890 int ret = 0;
2892 /* flush, free memory, and close file */
2893 if (gz_comp(state, Z_FINISH) == -1)
2894 ret = state->err;
2895 (void)ZLIB_PREFIX(deflateEnd)(&(state->strm));
2896 g_free(state->out);
2897 g_free(state->in);
2898 state->err = Z_OK;
2899 if (ws_close(state->fd) == -1 && ret == 0)
2900 ret = errno;
2901 g_free(state);
2902 return ret;
2906 gzwfile_geterr(GZWFILE_T state)
2908 return state->err;
2910 #endif /* USE_ZLIB_OR_ZLIBNG */
2912 #ifdef USE_LZ4
2913 /* internal lz4 file state data structure for writing */
2914 struct lz4_writer {
2915 int fd; /* file descriptor */
2916 int64_t pos; /* current position in uncompressed data */
2917 int64_t pos_out;
2918 size_t size_out; /* buffer size, zero if not allocated yet */
2919 size_t want; /* requested buffer size, default is LZ4BUFSIZE */
2920 size_t want_out; /* requested output buffer size, determined from want */
2921 unsigned char *out; /* output buffer, containing uncompressed data */
2922 int err; /* error code */
2923 const char *err_info; /* additional error information string for some errors */
2924 LZ4F_preferences_t lz4_prefs;
2925 LZ4F_cctx *lz4_cctx;
2928 LZ4WFILE_T
2929 lz4wfile_open(const char *path)
2931 int fd;
2932 LZ4WFILE_T state;
2933 int save_errno;
2935 fd = ws_open(path, O_BINARY|O_WRONLY|O_CREAT|O_TRUNC, 0666);
2936 if (fd == -1)
2937 return NULL;
2938 state = lz4wfile_fdopen(fd);
2939 if (state == NULL) {
2940 save_errno = errno;
2941 ws_close(fd);
2942 errno = save_errno;
2944 return state;
2947 LZ4WFILE_T
2948 lz4wfile_fdopen(int fd)
2950 LZ4WFILE_T state;
2952 /* allocate wtap_writer structure to return */
2953 state = (LZ4WFILE_T)g_try_malloc(sizeof *state);
2954 if (state == NULL)
2955 return NULL;
2956 state->fd = fd;
2957 state->size_out = 0; /* no buffer allocated yet */
2958 state->want = LZ4BUFSIZE; /* max input size (a block) */
2959 state->want_out = LZ4F_compressBound(state->want, &state->lz4_prefs);
2961 * This size guarantees that we will always have enough room to
2962 * write the result of LZ4F_compressUpdate (or Flush or End),
2963 * so long as the output buffer is empty (i.e., we immediately
2964 * write to the output file anything the compressor hands back
2965 * instead of buffering.)
2968 memset(&state->lz4_prefs, 0, sizeof(LZ4F_preferences_t));
2969 /* Use the same prefs as the lz4 command line utility defaults. */
2970 state->lz4_prefs.frameInfo.blockMode = LZ4F_blockIndependent; /* Allows fast seek */
2971 state->lz4_prefs.frameInfo.contentChecksumFlag = 1;
2972 state->lz4_prefs.frameInfo.blockSizeID = LZ4F_max4MB;
2973 /* XXX - What should we set state->lz4_prefs.compressionLevel to?
2974 * The command line utility uses 1, recommends 9 as another option, and
2975 * also there's 12 (max).
2977 * We could provide an API call or perhaps two or three preset options.
2979 state->lz4_prefs.compressionLevel = 1;
2981 /* initialize stream */
2982 state->err = 0; /* clear error */
2983 state->err_info = NULL; /* clear additional error information */
2984 state->pos = 0; /* no uncompressed data yet */
2985 state->pos_out = 0;
2987 /* return stream */
2988 return state;
2991 /* Writes len bytes from the output buffer to the file.
2992 * Return true on success; returns false and sets state->err on failure.
2994 static bool
2995 lz4_write_out(LZ4WFILE_T state, size_t len)
2997 if (len > 0) {
2998 ssize_t got = ws_write(state->fd, state->out, (unsigned)len);
2999 if (got < 0) {
3000 state->err = errno;
3001 return false;
3003 if ((unsigned)got != len) {
3004 state->err = WTAP_ERR_SHORT_WRITE;
3005 return false;
3007 state->pos_out += got;
3009 return true;
3012 /* Initialize state for writing an lz4 file. Mark initialization by setting
3013 state->size to non-zero. Return -1, and set state->err and possibly
3014 state->err_info, on failure; return 0 on success. */
3015 static int
3016 lz4_init(LZ4WFILE_T state)
3018 LZ4F_errorCode_t ret;
3020 /* create Compression context */
3021 ret = LZ4F_createCompressionContext(&state->lz4_cctx, LZ4F_VERSION);
3022 if (LZ4F_isError(ret)) {
3023 state->err = WTAP_ERR_CANT_WRITE; // XXX - WTAP_ERR_COMPRESS?
3024 state->err_info = LZ4F_getErrorName(ret);
3025 return -1;
3028 /* allocate buffer */
3029 state->out = (unsigned char *)g_try_malloc(state->want_out);
3030 if (state->out == NULL) {
3031 g_free(state->out);
3032 LZ4F_freeCompressionContext(state->lz4_cctx);
3033 state->err = ENOMEM;
3034 return -1;
3037 ret = LZ4F_compressBegin(state->lz4_cctx, state->out, state->want_out, &state->lz4_prefs);
3038 if (LZ4F_isError(ret)) {
3039 state->err = WTAP_ERR_CANT_WRITE; // XXX - WTAP_ERR_COMPRESS?
3040 state->err_info = LZ4F_getErrorName(ret);
3041 return -1;
3043 if (!lz4_write_out(state, ret)) {
3044 return -1;
3047 /* mark state as initialized */
3048 state->size_out = state->want_out;
3050 return 0;
3053 /* Write out len bytes from buf. Return 0, and set state->err, on
3054 failure or on an attempt to write 0 bytes (in which case state->err
3055 is 0); return the number of bytes written on success. */
3056 size_t
3057 lz4wfile_write(LZ4WFILE_T state, const void *buf, size_t len)
3059 size_t to_write;
3060 size_t put = len;
3062 /* check that there's no error */
3063 if (state->err != 0)
3064 return 0;
3066 /* if len is zero, avoid unnecessary operations */
3067 if (len == 0)
3068 return 0;
3070 /* allocate memory if this is the first time through */
3071 if (state->size_out == 0 && lz4_init(state) == -1)
3072 return 0;
3074 do {
3075 to_write = MIN(len, state->want);
3076 size_t bytesWritten = LZ4F_compressUpdate(state->lz4_cctx, state->out, state->size_out,
3077 buf, to_write, NULL);
3078 if (LZ4F_isError(bytesWritten)) {
3079 state->err = WTAP_ERR_CANT_WRITE; // XXX - WTAP_ERR_COMPRESS?
3080 state->err_info = LZ4F_getErrorName(bytesWritten);
3081 return 0;
3083 if (!lz4_write_out(state, bytesWritten)) {
3084 return 0;
3086 state->pos += to_write;
3087 len -= to_write;
3088 } while (len);
3090 /* input was all buffered or compressed */
3091 return put;
3094 /* Flush out what we've written so far. Returns -1, and sets state->err,
3095 on failure; returns 0 on success. */
3097 lz4wfile_flush(LZ4WFILE_T state)
3099 size_t bytesWritten;
3100 /* check that there's no error */
3101 if (state->err != 0)
3102 return -1;
3104 bytesWritten = LZ4F_flush(state->lz4_cctx, state->out, state->size_out, NULL);
3105 if (LZ4F_isError(bytesWritten)) {
3106 // Should never happen if size_out >= LZ4F_compressBound(0, prefsPtr)
3107 state->err = WTAP_ERR_INTERNAL;
3108 return -1;
3110 if (!lz4_write_out(state, bytesWritten)) {
3111 return -1;
3113 return 0;
3116 /* Flush out all data written, and close the file. Returns a Wiretap
3117 error on failure; returns 0 on success. */
3119 lz4wfile_close(LZ4WFILE_T state)
3121 int ret = 0;
3123 /* flush, free memory, and close file */
3124 size_t bytesWritten = LZ4F_compressEnd(state->lz4_cctx, state->out, state->size_out, NULL);
3125 if (LZ4F_isError(bytesWritten)) {
3126 // Should never happen if size_out >= LZ4F_compressBound(0, prefsPtr)
3127 ret = WTAP_ERR_INTERNAL;
3129 if (!lz4_write_out(state, bytesWritten)) {
3130 ret = state->err;
3132 g_free(state->out);
3133 LZ4F_freeCompressionContext(state->lz4_cctx);
3134 if (ws_close(state->fd) == -1 && ret == 0)
3135 ret = errno;
3136 g_free(state);
3137 return ret;
3141 lz4wfile_geterr(LZ4WFILE_T state)
3143 return state->err;
3145 #endif /* USE_LZ4 */
3147 * Editor modelines - https://www.wireshark.org/tools/modelines.html
3149 * Local variables:
3150 * c-basic-offset: 4
3151 * tab-width: 8
3152 * indent-tabs-mode: nil
3153 * End:
3155 * vi: set shiftwidth=4 tabstop=8 expandtab:
3156 * :indentSize=4:tabSize=8:noTabs=true: