kerberos: remember account details from dissect_krb5_PAC_UPN_DNS_INFO on EncTicketPar...
[wireshark-sm.git] / wiretap / file_wrappers.c
blob4202a4bb180093deb22e4edc80d3bdc15a0fcc3a
1 /* file_wrappers.c
3 * Wiretap Library
4 * Copyright (c) 1998 by Gilbert Ramirez <gram@alumni.rice.edu>
6 * SPDX-License-Identifier: GPL-2.0-or-later
7 */
9 /* file_access interface based heavily on zlib gzread.c and gzlib.c from zlib
10 * Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
11 * under licence:
13 * SPDX-License-Identifier: Zlib
17 #include "config.h"
18 #include "file_wrappers.h"
20 #include <assert.h>
21 #include <errno.h>
22 #include <string.h>
23 #include "wtap-int.h"
25 #include <wsutil/file_util.h>
27 #ifdef HAVE_ZLIB
28 #define ZLIB_CONST
29 #include <zlib.h>
30 #endif /* HAVE_ZLIB */
32 #ifdef HAVE_ZSTD
33 #include <zstd.h>
34 #endif
36 #ifdef HAVE_LZ4
37 #include <lz4.h>
39 #if LZ4_VERSION_NUMBER >= 10703
40 #define USE_LZ4
41 #include <lz4frame.h>
42 #endif
43 #endif
46 * See RFC 1952:
48 * https://tools.ietf.org/html/rfc1952
50 * for a description of the gzip file format.
52 * Some other compressed file formats we might want to support:
54 * XZ format: https://tukaani.org/xz/
56 * Bzip2 format: https://www.sourceware.org/bzip2/
58 * Lzip format: https://www.nongnu.org/lzip/
62 * List of compression types supported.
64 static struct compression_type {
65 wtap_compression_type type;
66 const char *extension;
67 const char *description;
68 } compression_types[] = {
69 #ifdef HAVE_ZLIB
70 { WTAP_GZIP_COMPRESSED, "gz", "gzip compressed" },
71 #endif
72 #ifdef HAVE_ZSTD
73 { WTAP_ZSTD_COMPRESSED, "zst", "zstd compressed" },
74 #endif
75 #ifdef USE_LZ4
76 { WTAP_LZ4_COMPRESSED, "lz4", "lz4 compressed" },
77 #endif
78 { WTAP_UNCOMPRESSED, NULL, NULL }
81 static wtap_compression_type file_get_compression_type(FILE_T stream);
83 wtap_compression_type
84 wtap_get_compression_type(wtap *wth)
86 return file_get_compression_type((wth->fh == NULL) ? wth->random_fh : wth->fh);
89 const char *
90 wtap_compression_type_description(wtap_compression_type compression_type)
92 for (struct compression_type *p = compression_types;
93 p->type != WTAP_UNCOMPRESSED; p++) {
94 if (p->type == compression_type)
95 return p->description;
97 return NULL;
100 const char *
101 wtap_compression_type_extension(wtap_compression_type compression_type)
103 for (struct compression_type *p = compression_types;
104 p->type != WTAP_UNCOMPRESSED; p++) {
105 if (p->type == compression_type)
106 return p->extension;
108 return NULL;
111 GSList *
112 wtap_get_all_compression_type_extensions_list(void)
114 GSList *extensions;
116 extensions = NULL; /* empty list, to start with */
118 for (struct compression_type *p = compression_types;
119 p->type != WTAP_UNCOMPRESSED; p++)
120 extensions = g_slist_prepend(extensions, (void *)p->extension);
122 return extensions;
125 /* #define GZBUFSIZE 8192 */
126 #define GZBUFSIZE 4096
128 /* values for wtap_reader compression */
129 typedef enum {
130 UNKNOWN, /* unknown - look for a compression header */
131 UNCOMPRESSED, /* uncompressed - copy input directly */
132 ZLIB, /* decompress a zlib stream */
133 GZIP_AFTER_HEADER,
134 ZSTD,
135 LZ4,
136 } compression_t;
139 * We limit the size of our input and output buffers to 2^30 bytes,
140 * because:
142 * 1) on Windows with MSVC, the return value of _read() is int,
143 * so the biggest read you can do is INT_MAX, and the biggest
144 * power of 2 below that is 2^30;
146 * 2) the "avail_in" and "avail_out" values in a z_stream structure
147 * in zlib are uInts, and those are unsigned ints, and that
148 * imposes a limit on the buffer size when we're reading a
149 * gzipped file.
151 * Thus, we use unsigned for the buffer sizes, offsets, amount available
152 * from the buffer, etc.
154 * If we want an even bigger buffer for uncompressed data, or for
155 * some other form of compression, then the unsigned-sized values should
156 * be in structure values used only for reading gzipped files, and
157 * other values should be used for uncompressed data or data
158 * compressed using other algorithms (e.g., in a union).
160 #define MAX_READ_BUF_SIZE (1U << 30)
162 struct wtap_reader_buf {
163 uint8_t *buf; /* buffer */
164 uint8_t *next; /* next byte to deliver from buffer */
165 unsigned avail; /* number of bytes available to deliver at next */
168 struct wtap_reader {
169 int fd; /* file descriptor */
170 int64_t raw_pos; /* current position in file (just to not call lseek()) */
171 int64_t pos; /* current position in uncompressed data */
172 unsigned size; /* buffer size */
174 struct wtap_reader_buf in; /* input buffer, containing compressed data */
175 struct wtap_reader_buf out; /* output buffer, containing uncompressed data */
177 bool eof; /* true if end of input file reached */
178 int64_t start; /* where the gzip data started, for rewinding */
179 int64_t raw; /* where the raw data started, for seeking */
180 compression_t compression; /* type of compression, if any */
181 compression_t last_compression; /* last known compression type */
182 bool is_compressed; /* false if completely uncompressed, true otherwise */
184 /* seek request */
185 int64_t skip; /* amount to skip (already rewound if backwards) */
186 bool seek_pending; /* true if seek request pending */
188 /* error information */
189 int err; /* error code */
190 const char *err_info; /* additional error information string for some errors */
192 #ifdef HAVE_ZLIB
193 /* zlib inflate stream */
194 z_stream strm; /* stream structure in-place (not a pointer) */
195 bool dont_check_crc; /* true if we aren't supposed to check the CRC */
196 #endif
197 /* fast seeking */
198 GPtrArray *fast_seek;
199 void *fast_seek_cur;
200 #ifdef HAVE_ZSTD
201 ZSTD_DCtx *zstd_dctx;
202 #endif
203 #ifdef USE_LZ4
204 LZ4F_dctx *lz4_dctx;
205 #endif
208 /* Current read offset within a buffer. */
209 static unsigned
210 offset_in_buffer(struct wtap_reader_buf *buf)
212 /* buf->next points to the next byte to read, and buf->buf points
213 to the first byte in the buffer, so the difference between them
214 is the offset.
216 This will fit in an unsigned int, because it can't be bigger
217 than the size of the buffer, which is an unsigned int. */
218 return (unsigned)(buf->next - buf->buf);
221 /* Number of bytes of data that are in a buffer. */
222 static unsigned
223 bytes_in_buffer(struct wtap_reader_buf *buf)
225 /* buf->next + buf->avail points just past the last byte of data in
226 the buffer.
227 Thus, (buf->next + buf->avail) - buf->buf is the number of bytes
228 of data in the buffer.
230 This will fit in an unsigned, because it can't be bigger
231 than the size of the buffer, which is a unsigned. */
232 return (unsigned)((buf->next + buf->avail) - buf->buf);
235 /* Reset a buffer, discarding all data in the buffer, so we read into
236 it starting at the beginning. */
237 static void
238 buf_reset(struct wtap_reader_buf *buf)
240 buf->next = buf->buf;
241 buf->avail = 0;
244 static int
245 buf_read(FILE_T state, struct wtap_reader_buf *buf)
247 unsigned space_left, to_read;
248 unsigned char *read_ptr;
249 ssize_t ret;
251 /* How much space is left at the end of the buffer?
252 XXX - the output buffer actually has state->size * 2 bytes. */
253 space_left = state->size - bytes_in_buffer(buf);
254 if (space_left == 0) {
255 /* There's no space left, so we start fresh at the beginning
256 of the buffer. */
257 buf_reset(buf);
259 read_ptr = buf->buf;
260 to_read = state->size;
261 } else {
262 /* There's some space left; try to read as much data as we
263 can into that space. We may get less than that if we're
264 reading from a pipe or if we're near the end of the file. */
265 read_ptr = buf->next + buf->avail;
266 to_read = space_left;
269 ret = ws_read(state->fd, read_ptr, to_read);
270 if (ret < 0) {
271 state->err = errno;
272 state->err_info = NULL;
273 return -1;
275 if (ret == 0)
276 state->eof = true;
277 state->raw_pos += ret;
278 buf->avail += (unsigned)ret;
279 return 0;
282 static int /* gz_avail */
283 fill_in_buffer(FILE_T state)
285 if (state->err != 0)
286 return -1;
287 if (!state->eof) {
288 if (buf_read(state, &state->in) < 0)
289 return -1;
291 return 0;
294 #define ZLIB_WINSIZE 32768
296 struct fast_seek_point {
297 int64_t out; /* corresponding offset in uncompressed data */
298 int64_t in; /* offset in input file of first full byte */
300 compression_t compression;
301 union {
302 struct {
303 #ifdef HAVE_INFLATEPRIME
304 int bits; /* number of bits (1-7) from byte at in - 1, or 0 */
305 #endif
306 unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
308 /* be gentle with Z_STREAM_END, 8 bytes more... Another solution would be to comment checks out */
309 uint32_t adler;
310 uint32_t total_out;
311 } zlib;
312 } data;
315 struct zlib_cur_seek_point {
316 unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
317 unsigned int pos;
318 unsigned int have;
321 #define SPAN INT64_C(1048576)
322 static struct fast_seek_point *
323 fast_seek_find(FILE_T file, int64_t pos)
325 struct fast_seek_point *smallest = NULL;
326 struct fast_seek_point *item;
327 unsigned low, i, max;
329 if (!file->fast_seek)
330 return NULL;
332 for (low = 0, max = file->fast_seek->len; low < max; ) {
333 i = (low + max) / 2;
334 item = (struct fast_seek_point *)file->fast_seek->pdata[i];
336 if (pos < item->out)
337 max = i;
338 else if (pos > item->out) {
339 smallest = item;
340 low = i + 1;
341 } else {
342 return item;
345 return smallest;
348 static void
349 fast_seek_header(FILE_T file, int64_t in_pos, int64_t out_pos,
350 compression_t compression)
352 struct fast_seek_point *item = NULL;
354 if (file->fast_seek->len != 0)
355 item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
357 if (!item || item->out < out_pos) {
358 struct fast_seek_point *val = g_new(struct fast_seek_point,1);
359 val->in = in_pos;
360 val->out = out_pos;
361 val->compression = compression;
363 g_ptr_array_add(file->fast_seek, val);
367 static void
368 fast_seek_reset(
369 #ifdef HAVE_ZLIB
370 FILE_T state)
371 #else
372 FILE_T state _U_)
373 #endif
375 #ifdef HAVE_ZLIB
376 if (state->compression == ZLIB && state->fast_seek_cur != NULL) {
377 struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
379 cur->have = 0;
381 #endif
384 #ifdef HAVE_ZLIB
386 /* Get next byte from input, or -1 if end or error.
388 * Note:
390 * 1) errors from buf_read(), and thus from fill_in_buffer(), are
391 * "sticky", and fill_in_buffer() won't do any reading if there's
392 * an error;
394 * 2) GZ_GETC() returns -1 on an EOF;
396 * so it's safe to make multiple GZ_GETC() calls and only check the
397 * last one for an error. */
398 #define GZ_GETC() ((state->in.avail == 0 && fill_in_buffer(state) == -1) ? -1 : \
399 (state->in.avail == 0 ? -1 : \
400 (state->in.avail--, *(state->in.next)++)))
402 /* Get a one-byte integer and return 0 on success and the value in *ret.
403 Otherwise -1 is returned, state->err is set, and *ret is not modified. */
404 static int
405 gz_next1(FILE_T state, uint8_t *ret)
407 int ch;
409 ch = GZ_GETC();
410 if (ch == -1) {
411 if (state->err == 0) {
412 /* EOF */
413 state->err = WTAP_ERR_SHORT_READ;
414 state->err_info = NULL;
416 return -1;
418 *ret = ch;
419 return 0;
422 /* Get a two-byte little-endian integer and return 0 on success and the value
423 in *ret. Otherwise -1 is returned, state->err is set, and *ret is not
424 modified. */
425 static int
426 gz_next2(FILE_T state, uint16_t *ret)
428 uint16_t val;
429 int ch;
431 val = GZ_GETC();
432 ch = GZ_GETC();
433 if (ch == -1) {
434 if (state->err == 0) {
435 /* EOF */
436 state->err = WTAP_ERR_SHORT_READ;
437 state->err_info = NULL;
439 return -1;
441 val += (uint16_t)ch << 8;
442 *ret = val;
443 return 0;
446 /* Get a four-byte little-endian integer and return 0 on success and the value
447 in *ret. Otherwise -1 is returned, state->err is set, and *ret is not
448 modified. */
449 static int
450 gz_next4(FILE_T state, uint32_t *ret)
452 uint32_t val;
453 int ch;
455 val = GZ_GETC();
456 val += (unsigned)GZ_GETC() << 8;
457 val += (uint32_t)GZ_GETC() << 16;
458 ch = GZ_GETC();
459 if (ch == -1) {
460 if (state->err == 0) {
461 /* EOF */
462 state->err = WTAP_ERR_SHORT_READ;
463 state->err_info = NULL;
465 return -1;
467 val += (uint32_t)ch << 24;
468 *ret = val;
469 return 0;
472 /* Skip the specified number of bytes and return 0 on success. Otherwise -1
473 is returned. */
474 static int
475 gz_skipn(FILE_T state, size_t n)
477 while (n != 0) {
478 if (GZ_GETC() == -1) {
479 if (state->err == 0) {
480 /* EOF */
481 state->err = WTAP_ERR_SHORT_READ;
482 state->err_info = NULL;
484 return -1;
486 n--;
488 return 0;
491 /* Skip a null-terminated string and return 0 on success. Otherwise -1
492 is returned. */
493 static int
494 gz_skipzstr(FILE_T state)
496 int ch;
498 /* It's null-terminated, so scan until we read a byte with
499 the value 0 or get an error. */
500 while ((ch = GZ_GETC()) > 0)
502 if (ch == -1) {
503 if (state->err == 0) {
504 /* EOF */
505 state->err = WTAP_ERR_SHORT_READ;
506 state->err_info = NULL;
508 return -1;
510 return 0;
513 static void
514 zlib_fast_seek_add(FILE_T file, struct zlib_cur_seek_point *point, int bits, int64_t in_pos, int64_t out_pos)
516 /* it's for sure after gzip header, so file->fast_seek->len != 0 */
517 struct fast_seek_point *item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
519 #ifndef HAVE_INFLATEPRIME
520 if (bits)
521 return;
522 #endif
524 /* Glib has got Balanced Binary Trees (GTree) but I couldn't find a way to do quick search for nearest (and smaller) value to seek (It's what fast_seek_find() do)
525 * Inserting value in middle of sorted array is expensive, so we want to add only in the end.
526 * It's not big deal, cause first-read don't usually invoke seeking
528 if (item->out + SPAN < out_pos) {
529 struct fast_seek_point *val = g_new(struct fast_seek_point,1);
530 val->in = in_pos;
531 val->out = out_pos;
532 val->compression = ZLIB;
533 #ifdef HAVE_INFLATEPRIME
534 val->data.zlib.bits = bits;
535 #endif
536 if (point->pos != 0) {
537 unsigned int left = ZLIB_WINSIZE - point->pos;
539 memcpy(val->data.zlib.window, point->window + point->pos, left);
540 memcpy(val->data.zlib.window + left, point->window, point->pos);
541 } else
542 memcpy(val->data.zlib.window, point->window, ZLIB_WINSIZE);
545 * XXX - strm.adler is a uLong in at least some versions
546 * of zlib, and uLong is an unsigned long in at least
547 * some of those versions, which means it's 64-bit
548 * on LP64 platforms, even though the checksum is
549 * 32-bit. We assume the actual Adler checksum
550 * is in the lower 32 bits of strm.adler; as the
551 * checksum in the file is only 32 bits, we save only
552 * those lower 32 bits, and cast away any additional
553 * bits to squelch warnings.
555 * The same applies to strm.total_out.
557 val->data.zlib.adler = (uint32_t) file->strm.adler;
558 val->data.zlib.total_out = (uint32_t) file->strm.total_out;
559 g_ptr_array_add(file->fast_seek, val);
563 static void /* gz_decomp */
564 zlib_read(FILE_T state, unsigned char *buf, unsigned int count)
566 int ret = 0; /* XXX */
567 uint32_t crc, len;
568 z_streamp strm = &(state->strm);
570 unsigned char *buf2 = buf;
571 unsigned int count2 = count;
573 strm->avail_out = count;
574 strm->next_out = buf;
576 /* fill output buffer up to end of deflate stream or error */
577 do {
578 /* get more input for inflate() */
579 if (state->in.avail == 0 && fill_in_buffer(state) == -1)
580 break;
581 if (state->in.avail == 0) {
582 /* EOF */
583 state->err = WTAP_ERR_SHORT_READ;
584 state->err_info = NULL;
585 break;
588 strm->avail_in = state->in.avail;
589 strm->next_in = state->in.next;
590 /* decompress and handle errors */
591 #ifdef Z_BLOCK
592 ret = inflate(strm, Z_BLOCK);
593 #else
594 ret = inflate(strm, Z_NO_FLUSH);
595 #endif
596 state->in.avail = strm->avail_in;
597 #ifdef z_const
598 DIAG_OFF(cast-qual)
599 state->in.next = (unsigned char *)strm->next_in;
600 DIAG_ON(cast-qual)
601 #else
602 state->in.next = strm->next_in;
603 #endif
604 if (ret == Z_STREAM_ERROR) {
605 state->err = WTAP_ERR_DECOMPRESS;
606 state->err_info = strm->msg;
607 break;
609 if (ret == Z_NEED_DICT) {
610 state->err = WTAP_ERR_DECOMPRESS;
611 state->err_info = "preset dictionary needed";
612 break;
614 if (ret == Z_MEM_ERROR) {
615 /* This means "not enough memory". */
616 state->err = ENOMEM;
617 state->err_info = NULL;
618 break;
620 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
621 state->err = WTAP_ERR_DECOMPRESS;
622 state->err_info = strm->msg;
623 break;
626 * XXX - Z_BUF_ERROR?
629 strm->adler = crc32(strm->adler, buf2, count2 - strm->avail_out);
630 #ifdef Z_BLOCK
631 if (state->fast_seek_cur != NULL) {
632 struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
633 unsigned int ready = count2 - strm->avail_out;
635 if (ready < ZLIB_WINSIZE) {
636 unsigned left = ZLIB_WINSIZE - cur->pos;
638 if (ready >= left) {
639 memcpy(cur->window + cur->pos, buf2, left);
640 if (ready != left)
641 memcpy(cur->window, buf2 + left, ready - left);
643 cur->pos = ready - left;
644 cur->have += ready;
645 } else {
646 memcpy(cur->window + cur->pos, buf2, ready);
647 cur->pos += ready;
648 cur->have += ready;
651 if (cur->have >= ZLIB_WINSIZE)
652 cur->have = ZLIB_WINSIZE;
654 } else {
655 memcpy(cur->window, buf2 + (ready - ZLIB_WINSIZE), ZLIB_WINSIZE);
656 cur->pos = 0;
657 cur->have = ZLIB_WINSIZE;
660 if (cur->have >= ZLIB_WINSIZE && ret != Z_STREAM_END && (strm->data_type & 128) && !(strm->data_type & 64))
661 zlib_fast_seek_add(state, cur, (strm->data_type & 7), state->raw_pos - strm->avail_in, state->pos + (count - strm->avail_out));
663 #endif
664 buf2 = (buf2 + count2 - strm->avail_out);
665 count2 = strm->avail_out;
667 } while (strm->avail_out && ret != Z_STREAM_END);
669 /* update available output and crc check value */
670 state->out.next = buf;
671 state->out.avail = count - strm->avail_out;
673 /* Check gzip trailer if at end of deflate stream.
674 We don't fail immediately here, we just set an error
675 indication, so that we try to process what data we
676 got before the error. The next attempt to read
677 something past that data will get the error. */
678 if (ret == Z_STREAM_END) {
679 if (gz_next4(state, &crc) != -1 &&
680 gz_next4(state, &len) != -1) {
681 if (crc != strm->adler && !state->dont_check_crc) {
682 state->err = WTAP_ERR_DECOMPRESS;
683 state->err_info = "bad CRC";
684 } else if (len != (strm->total_out & 0xffffffffUL)) {
685 state->err = WTAP_ERR_DECOMPRESS;
686 state->err_info = "length field wrong";
689 state->last_compression = state->compression;
690 state->compression = UNKNOWN; /* ready for next stream, once have is 0 */
691 g_free(state->fast_seek_cur);
692 state->fast_seek_cur = NULL;
695 #endif
697 static int
698 gz_head(FILE_T state)
700 unsigned already_read;
702 /* get some data in the input buffer */
703 if (state->in.avail == 0) {
704 if (fill_in_buffer(state) == -1)
705 return -1;
706 if (state->in.avail == 0)
707 return 0;
710 /* look for the gzip magic header bytes 31 and 139 */
711 if (state->in.next[0] == 31) {
712 state->in.avail--;
713 state->in.next++;
715 /* Make sure the byte after the first byte is present */
716 if (state->in.avail == 0 && fill_in_buffer(state) == -1) {
717 /* Read error. */
718 return -1;
720 if (state->in.avail != 0) {
721 if (state->in.next[0] == 139) {
723 * We have what looks like the ID1 and ID2 bytes of a gzip
724 * header.
725 * Continue processing the file.
727 * XXX - some capture file formats (I'M LOOKING AT YOU,
728 * ENDACE!) can have 31 in the first byte of the file
729 * and 139 in the second byte of the file. For now, in
730 * those cases, you lose.
732 #ifdef HAVE_ZLIB
733 uint8_t cm;
734 uint8_t flags;
735 uint16_t len;
736 uint16_t hcrc;
738 state->in.avail--;
739 state->in.next++;
741 /* read rest of header */
743 /* compression method (CM) */
744 if (gz_next1(state, &cm) == -1)
745 return -1;
746 if (cm != 8) {
747 state->err = WTAP_ERR_DECOMPRESS;
748 state->err_info = "unknown compression method";
749 return -1;
752 /* flags (FLG) */
753 if (gz_next1(state, &flags) == -1) {
754 /* Read error. */
755 return -1;
757 if (flags & 0xe0) { /* reserved flag bits */
758 state->err = WTAP_ERR_DECOMPRESS;
759 state->err_info = "reserved flag bits set";
760 return -1;
763 /* modification time (MTIME) */
764 if (gz_skipn(state, 4) == -1) {
765 /* Read error. */
766 return -1;
769 /* extra flags (XFL) */
770 if (gz_skipn(state, 1) == -1) {
771 /* Read error. */
772 return -1;
775 /* operating system (OS) */
776 if (gz_skipn(state, 1) == -1) {
777 /* Read error. */
778 return -1;
781 if (flags & 4) {
782 /* extra field - get XLEN */
783 if (gz_next2(state, &len) == -1) {
784 /* Read error. */
785 return -1;
788 /* skip the extra field */
789 if (gz_skipn(state, len) == -1) {
790 /* Read error. */
791 return -1;
794 if (flags & 8) {
795 /* file name */
796 if (gz_skipzstr(state) == -1) {
797 /* Read error. */
798 return -1;
801 if (flags & 16) {
802 /* comment */
803 if (gz_skipzstr(state) == -1) {
804 /* Read error. */
805 return -1;
808 if (flags & 2) {
809 /* header crc */
810 if (gz_next2(state, &hcrc) == -1) {
811 /* Read error. */
812 return -1;
814 /* XXX - check the CRC? */
817 /* set up for decompression */
818 inflateReset(&(state->strm));
819 state->strm.adler = crc32(0L, Z_NULL, 0);
820 state->compression = ZLIB;
821 state->is_compressed = true;
822 #ifdef Z_BLOCK
823 if (state->fast_seek) {
824 struct zlib_cur_seek_point *cur = g_new(struct zlib_cur_seek_point,1);
826 cur->pos = cur->have = 0;
827 g_free(state->fast_seek_cur);
828 state->fast_seek_cur = cur;
829 fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, GZIP_AFTER_HEADER);
831 #endif /* Z_BLOCK */
832 return 0;
833 #else /* HAVE_ZLIB */
834 state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
835 state->err_info = "reading gzip-compressed files isn't supported";
836 return -1;
837 #endif /* HAVE_ZLIB */
841 * Not a gzip file. "Unget" the first character; either:
843 * 1) we read both of the first two bytes into the
844 * buffer with the first ws_read, so we can just back
845 * up by one byte;
847 * 2) we only read the first byte into the buffer with
848 * the first ws_read (e.g., because we're reading from
849 * a pipe and only the first byte had been written to
850 * the pipe at that point), and read the second byte
851 * into the buffer after the first byte in the
852 * fill_in_buffer call, so we now have two bytes in
853 * the buffer, and can just back up by one byte.
855 state->in.avail++;
856 state->in.next--;
859 #ifdef HAVE_LIBXZ
860 /* { 0xFD, '7', 'z', 'X', 'Z', 0x00 } */
861 /* FD 37 7A 58 5A 00 */
862 #endif
864 if (state->in.avail >= 4
865 && state->in.buf[0] == 0x28 && state->in.buf[1] == 0xb5
866 && state->in.buf[2] == 0x2f && state->in.buf[3] == 0xfd) {
867 #ifdef HAVE_ZSTD
868 const size_t ret = ZSTD_initDStream(state->zstd_dctx);
869 if (ZSTD_isError(ret)) {
870 state->err = WTAP_ERR_DECOMPRESS;
871 state->err_info = ZSTD_getErrorName(ret);
872 return -1;
875 state->compression = ZSTD;
876 state->is_compressed = true;
877 return 0;
878 #else
879 state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
880 state->err_info = "reading zstd-compressed files isn't supported";
881 return -1;
882 #endif
885 if (state->in.avail >= 4
886 && state->in.buf[0] == 0x04 && state->in.buf[1] == 0x22
887 && state->in.buf[2] == 0x4d && state->in.buf[3] == 0x18) {
888 #ifdef USE_LZ4
889 #if LZ4_VERSION_NUMBER >= 10800
890 LZ4F_resetDecompressionContext(state->lz4_dctx);
891 #else
892 LZ4F_freeDecompressionContext(state->lz4_dctx);
893 const LZ4F_errorCode_t ret = LZ4F_createDecompressionContext(&state->lz4_dctx, LZ4F_VERSION);
894 if (LZ4F_isError(ret)) {
895 state->err = WTAP_ERR_INTERNAL;
896 state->err_info = LZ4F_getErrorName(ret);
897 return -1;
899 #endif
900 state->compression = LZ4;
901 state->is_compressed = true;
902 return 0;
903 #else
904 state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
905 state->err_info = "reading lz4-compressed files isn't supported";
906 return -1;
907 #endif
910 if (state->fast_seek)
911 fast_seek_header(state, state->raw_pos - state->in.avail - state->out.avail, state->pos, UNCOMPRESSED);
913 /* doing raw i/o, save start of raw data for seeking, copy any leftover
914 input to output -- this assumes that the output buffer is larger than
915 the input buffer, which also assures space for gzungetc() */
916 state->raw = state->pos;
917 state->out.next = state->out.buf;
918 /* not a compressed file -- copy everything we've read into the
919 input buffer to the output buffer and fall to raw i/o */
920 already_read = bytes_in_buffer(&state->in);
921 if (already_read != 0) {
922 memcpy(state->out.buf, state->in.buf, already_read);
923 state->out.avail = already_read;
925 /* Now discard everything in the input buffer */
926 buf_reset(&state->in);
928 state->compression = UNCOMPRESSED;
929 return 0;
932 static int /* gz_make */
933 fill_out_buffer(FILE_T state)
935 if (state->compression == UNKNOWN) { /* look for compression header */
936 if (gz_head(state) == -1)
937 return -1;
938 if (state->out.avail != 0) /* got some data from gz_head() */
939 return 0;
941 if (state->compression == UNCOMPRESSED) { /* straight copy */
942 if (buf_read(state, &state->out) < 0)
943 return -1;
945 #ifdef HAVE_ZLIB
946 else if (state->compression == ZLIB) { /* decompress */
947 zlib_read(state, state->out.buf, state->size << 1);
949 #endif
950 #ifdef HAVE_ZSTD
951 else if (state->compression == ZSTD) {
952 ws_assert(state->out.avail == 0);
954 if (state->in.avail == 0 && fill_in_buffer(state) == -1)
955 return -1;
957 ZSTD_outBuffer output = {state->out.buf, state->size << 1, 0};
958 ZSTD_inBuffer input = {state->in.next, state->in.avail, 0};
959 const size_t ret = ZSTD_decompressStream(state->zstd_dctx, &output, &input);
960 if (ZSTD_isError(ret)) {
961 state->err = WTAP_ERR_DECOMPRESS;
962 state->err_info = ZSTD_getErrorName(ret);
963 return -1;
966 state->in.next = state->in.next + input.pos;
967 state->in.avail -= (unsigned)input.pos;
969 state->out.next = output.dst;
970 state->out.avail = (unsigned)output.pos;
972 if (ret == 0) {
973 state->last_compression = state->compression;
974 state->compression = UNKNOWN;
977 #endif
978 #ifdef USE_LZ4
979 else if (state->compression == LZ4) {
980 ws_assert(state->out.avail == 0);
982 if (state->in.avail == 0 && fill_in_buffer(state) == -1)
983 return -1;
985 size_t outBufSize = state->size << 1;
986 size_t inBufSize = state->in.avail;
987 const size_t ret = LZ4F_decompress(state->lz4_dctx, state->out.buf, &outBufSize, state->in.next, &inBufSize, NULL);
988 if (LZ4F_isError(ret)) {
989 state->err = WTAP_ERR_DECOMPRESS;
990 state->err_info = LZ4F_getErrorName(ret);
991 return -1;
995 * We assume LZ4F_decompress() will not set inBufSize to a
996 * value > state->in.avail.
998 state->in.next = state->in.next + inBufSize;
999 state->in.avail -= (unsigned)inBufSize;
1001 state->out.next = state->out.buf;
1002 state->out.avail = (unsigned)outBufSize;
1004 if (ret == 0) {
1005 state->last_compression = state->compression;
1006 state->compression = UNKNOWN;
1009 #endif
1010 return 0;
1013 static int
1014 gz_skip(FILE_T state, int64_t len)
1016 unsigned n;
1018 /* skip over len bytes or reach end-of-file, whichever comes first */
1019 while (len)
1020 if (state->out.avail != 0) {
1021 /* We have stuff in the output buffer; skip over
1022 it. */
1023 n = (int64_t)state->out.avail > len ? (unsigned)len : state->out.avail;
1024 state->out.avail -= n;
1025 state->out.next += n;
1026 state->pos += n;
1027 len -= n;
1028 } else if (state->err != 0) {
1029 /* We have nothing in the output buffer, and
1030 we have an error that may not have been
1031 reported yet; that means we can't generate
1032 any more data into the output buffer, so
1033 return an error indication. */
1034 return -1;
1035 } else if (state->eof && state->in.avail == 0) {
1036 /* We have nothing in the output buffer, and
1037 we're at the end of the input; just return. */
1038 break;
1039 } else {
1040 /* We have nothing in the output buffer, and
1041 we can generate more data; get more output,
1042 looking for header if required. */
1043 if (fill_out_buffer(state) == -1)
1044 return -1;
1046 return 0;
1049 static void
1050 gz_reset(FILE_T state)
1052 buf_reset(&state->out); /* no output data available */
1053 state->eof = false; /* not at end of file */
1054 state->compression = UNKNOWN; /* look for compression header */
1056 state->seek_pending = false; /* no seek request pending */
1057 state->err = 0; /* clear error */
1058 state->err_info = NULL;
1059 state->pos = 0; /* no uncompressed data yet */
1060 buf_reset(&state->in); /* no input data yet */
1063 FILE_T
1064 file_fdopen(int fd)
1067 * XXX - we now check whether we have st_blksize in struct stat;
1068 * it's not available on all platforms.
1070 * I'm not sure why we're testing _STATBUF_ST_BLKSIZE; it's not
1071 * set on all platforms that have st_blksize in struct stat.
1072 * (Not all platforms have st_blksize in struct stat.)
1074 * Is there some reason *not* to make the buffer size the maximum
1075 * of GBUFSIZE and st_blksize? On most UN*Xes, the standard I/O
1076 * library does I/O with st_blksize as the buffer size; on others,
1077 * and on Windows, it's a 4K buffer size. If st_blksize is bigger
1078 * than GBUFSIZE (which is currently 4KB), that's probably a
1079 * hint that reading in st_blksize chunks is considered a good
1080 * idea (e.g., an 8K/1K Berkeley fast file system with st_blksize
1081 * being 8K, or APFS, where st_blksize is big on at least some
1082 * versions of macOS).
1084 #ifdef _STATBUF_ST_BLKSIZE
1085 ws_statb64 st;
1086 #endif
1087 #ifdef HAVE_ZSTD
1088 size_t zstd_buf_size;
1089 #endif
1090 unsigned want = GZBUFSIZE;
1091 FILE_T state;
1092 #ifdef USE_LZ4
1093 size_t ret;
1094 #endif
1096 if (fd == -1)
1097 return NULL;
1099 /* allocate FILE_T structure to return */
1100 state = (FILE_T)g_try_malloc0(sizeof *state);
1101 if (state == NULL)
1102 return NULL;
1104 state->fast_seek_cur = NULL;
1105 state->fast_seek = NULL;
1107 /* open the file with the appropriate mode (or just use fd) */
1108 state->fd = fd;
1110 /* we don't yet know whether it's compressed */
1111 state->is_compressed = false;
1112 state->last_compression = UNKNOWN;
1114 /* save the current position for rewinding (only if reading) */
1115 state->start = ws_lseek64(state->fd, 0, SEEK_CUR);
1116 if (state->start == -1) state->start = 0;
1117 state->raw_pos = state->start;
1119 /* initialize stream */
1120 gz_reset(state);
1122 #ifdef _STATBUF_ST_BLKSIZE
1124 * See what I/O size the file system recommends using, and if
1125 * it's bigger than what we're using and isn't too big, use
1126 * it.
1128 if (ws_fstat64(fd, &st) >= 0) {
1130 * Yes, st_blksize can be bigger than an int; apparently,
1131 * it's a long on LP64 Linux, for example.
1133 * If the value is too big to fit into a unsigned,
1134 * just use the maximum read buffer size.
1136 * On top of that, the Single UNIX Speification says that
1137 * st_blksize is of type blksize_t, which is a *signed*
1138 * integer type, and, at minimum, macOS 11.6 and Linux 5.14.11's
1139 * include/uapi/asm-generic/stat.h define it as such.
1141 * However, other OSes might make it unsigned, and older versions
1142 * of OSes that currently make it signed might make it unsigned,
1143 * so we try to avoid warnings from that.
1145 * We cast MAX_READ_BUF_SIZE to long in order to avoid the
1146 * warning, although it might introduce warnings on platforms
1147 * where st_blocksize is unsigned; we'll deal with that if
1148 * it ever shows up as an issue.
1150 * MAX_READ_BUF_SIZE is < the largest *signed* 32-bt integer,
1151 * so casting it to long won't turn it into a negative number.
1152 * (We only support 32-bit and 64-bit 2's-complement platforms.)
1154 if (st.st_blksize <= (long)MAX_READ_BUF_SIZE)
1155 want = (unsigned)st.st_blksize;
1156 else
1157 want = MAX_READ_BUF_SIZE;
1158 /* XXX, verify result? */
1160 #endif
1161 #ifdef HAVE_ZSTD
1162 /* we should have separate input and output buf sizes */
1163 zstd_buf_size = ZSTD_DStreamInSize();
1164 if (zstd_buf_size > want) {
1165 if (zstd_buf_size <= MAX_READ_BUF_SIZE)
1166 want = (unsigned)zstd_buf_size;
1167 else
1168 want = MAX_READ_BUF_SIZE;
1170 zstd_buf_size = ZSTD_DStreamOutSize();
1171 if (zstd_buf_size > want) {
1172 if (zstd_buf_size <= MAX_READ_BUF_SIZE)
1173 want = (unsigned)zstd_buf_size;
1174 else
1175 want = MAX_READ_BUF_SIZE;
1177 #endif
1178 /* allocate buffers */
1179 state->in.buf = (unsigned char *)g_try_malloc(want);
1180 state->in.next = state->in.buf;
1181 state->in.avail = 0;
1182 state->out.buf = (unsigned char *)g_try_malloc(want << 1);
1183 state->out.next = state->out.buf;
1184 state->out.avail = 0;
1185 state->size = want;
1186 if (state->in.buf == NULL || state->out.buf == NULL) {
1187 goto err;
1190 #ifdef HAVE_ZLIB
1191 /* allocate inflate memory */
1192 state->strm.zalloc = Z_NULL;
1193 state->strm.zfree = Z_NULL;
1194 state->strm.opaque = Z_NULL;
1195 state->strm.avail_in = 0;
1196 state->strm.next_in = Z_NULL;
1197 if (inflateInit2(&(state->strm), -15) != Z_OK) { /* raw inflate */
1198 goto err;
1201 /* for now, assume we should check the crc */
1202 state->dont_check_crc = false;
1203 #endif
1205 #ifdef HAVE_ZSTD
1206 state->zstd_dctx = ZSTD_createDCtx();
1207 if (state->zstd_dctx == NULL) {
1208 goto err;
1210 #endif
1212 #ifdef USE_LZ4
1213 ret = LZ4F_createDecompressionContext(&state->lz4_dctx, LZ4F_VERSION);
1214 if (LZ4F_isError(ret)) {
1215 goto err;
1217 #endif
1219 /* return stream */
1220 return state;
1222 err:
1223 #ifdef HAVE_ZLIB
1224 inflateEnd(&state->strm);
1225 #endif
1226 #ifdef HAVE_ZSTD
1227 ZSTD_freeDCtx(state->zstd_dctx);
1228 #endif
1229 #ifdef USE_LZ4
1230 LZ4F_freeDecompressionContext(state->lz4_dctx);
1231 #endif
1232 g_free(state->out.buf);
1233 g_free(state->in.buf);
1234 g_free(state);
1235 errno = ENOMEM;
1236 return NULL;
1239 FILE_T
1240 file_open(const char *path)
1242 int fd;
1243 FILE_T ft;
1244 #ifdef HAVE_ZLIB
1245 const char *suffixp;
1246 #endif
1248 /* open file and do correct filename conversions.
1250 XXX - do we need O_LARGEFILE? On UN*X, if we need to do
1251 something special to get large file support, the configure
1252 script should have set us up with the appropriate #defines,
1253 so we should be getting a large-file-enabled file descriptor
1254 here. Pre-Large File Summit UN*Xes, and possibly even some
1255 post-LFS UN*Xes, might require O_LARGEFILE here, though.
1256 If so, we should probably handle that in ws_open(). */
1257 if ((fd = ws_open(path, O_RDONLY|O_BINARY, 0000)) == -1)
1258 return NULL;
1260 /* open file handle */
1261 ft = file_fdopen(fd);
1262 if (ft == NULL) {
1263 ws_close(fd);
1264 return NULL;
1267 #ifdef HAVE_ZLIB
1269 * If this file's name ends in ".caz", it's probably a compressed
1270 * Windows Sniffer file. The compression is gzip, but if we
1271 * process the CRC as specified by RFC 1952, the computed CRC
1272 * doesn't match the stored CRC.
1274 * Compressed Windows Sniffer files don't all have the same CRC
1275 * value; is it just random crap, or are they running the CRC on
1276 * a different set of data than you're supposed to (e.g., not
1277 * CRCing some of the data), or something such as that?
1279 * For now, we just set a flag to ignore CRC errors.
1281 suffixp = strrchr(path, '.');
1282 if (suffixp != NULL) {
1283 if (g_ascii_strcasecmp(suffixp, ".caz") == 0)
1284 ft->dont_check_crc = true;
1286 #endif
1288 return ft;
1291 void
1292 file_set_random_access(FILE_T stream, bool random_flag _U_, GPtrArray *seek)
1294 stream->fast_seek = seek;
1297 int64_t
1298 file_seek(FILE_T file, int64_t offset, int whence, int *err)
1300 struct fast_seek_point *here;
1301 unsigned n;
1303 if (whence != SEEK_SET && whence != SEEK_CUR && whence != SEEK_END) {
1304 ws_assert_not_reached();
1306 *err = EINVAL;
1307 return -1;
1311 /* Normalize offset to a SEEK_CUR specification */
1312 if (whence == SEEK_END) {
1313 /* Seek relative to the end of the file; given that we might be
1314 reading from a compressed file, we do that by seeking to the
1315 end of the file, making an offset relative to the end of
1316 the file an offset relative to the current position.
1318 XXX - we don't actually use this yet, but, for uncompressed
1319 files, we could optimize it, if desired, by directly using
1320 ws_lseek64(). */
1321 if (gz_skip(file, INT64_MAX) == -1) {
1322 *err = file->err;
1323 return -1;
1325 if (offset == 0) {
1326 /* We are done */
1327 return file->pos;
1329 } else if (whence == SEEK_SET)
1330 offset -= file->pos;
1331 else if (file->seek_pending) {
1332 /* There's a forward-skip pending, so file->pos doesn't reflect
1333 the actual file position, it represents the position from
1334 which we're skipping; update the offset to include that. */
1335 offset += file->skip;
1337 file->seek_pending = false;
1340 * Are we moving at all?
1342 if (offset == 0) {
1343 /* No. Just return the current position. */
1344 return file->pos;
1348 * Are we seeking backwards?
1350 if (offset < 0) {
1352 * Yes.
1354 * Do we have enough data before the current position in the
1355 * buffer that we can seek backwards within the buffer?
1357 if (-offset <= offset_in_buffer(&file->out)) {
1359 * Yes. Adjust appropriately.
1361 * offset is negative, so -offset is non-negative, and
1362 * -offset is <= an unsigned and thus fits in an unsigned.
1363 * Get that value and adjust appropriately.
1365 * (Casting offset to unsigned makes it positive, which
1366 * is not what we would want, so we cast -offset instead.)
1368 * XXX - this won't work with -offset = 2^63, as its
1369 * negative isn't a valid 64-bit integer, but we are
1370 * not at all likely to see files big enough to ever
1371 * see a negative offset that large.
1373 unsigned adjustment = (unsigned)(-offset);
1375 file->out.avail += adjustment;
1376 file->out.next -= adjustment;
1377 file->pos -= adjustment;
1378 return file->pos;
1380 } else {
1382 * No. Offset is positive; we're seeking forwards.
1384 * Do we have enough data after the current position in the
1385 * buffer that we can seek forwards within the buffer?
1387 if (offset < file->out.avail) {
1389 * Yes. Adjust appropriately.
1391 * offset is < an unsigned and thus fits in an unsigned,
1392 * so we can cast it to unsigned safely.
1394 file->out.avail -= (unsigned)offset;
1395 file->out.next += offset;
1396 file->pos += offset;
1397 return file->pos;
1402 * We're not seeking within the buffer. Do we have "fast seek" data
1403 * for the location to which we will be seeking, and is the offset
1404 * outside the span for compressed files or is this an uncompressed
1405 * file?
1407 * XXX, profile
1409 if ((here = fast_seek_find(file, file->pos + offset)) &&
1410 (offset < 0 || offset > SPAN || here->compression == UNCOMPRESSED)) {
1411 int64_t off, off2;
1414 * Yes. Use that data to do the seek.
1415 * Note that this will be true only if file_set_random_access()
1416 * has been called on this file, which should never be the case
1417 * for a pipe.
1419 #ifdef HAVE_ZLIB
1420 if (here->compression == ZLIB) {
1421 #ifdef HAVE_INFLATEPRIME
1422 off = here->in - (here->data.zlib.bits ? 1 : 0);
1423 #else
1424 off = here->in;
1425 #endif
1426 off2 = here->out;
1427 } else if (here->compression == GZIP_AFTER_HEADER) {
1428 off = here->in;
1429 off2 = here->out;
1430 } else
1431 #endif
1433 off2 = (file->pos + offset);
1434 off = here->in + (off2 - here->out);
1437 if (ws_lseek64(file->fd, off, SEEK_SET) == -1) {
1438 *err = errno;
1439 return -1;
1441 fast_seek_reset(file);
1443 file->raw_pos = off;
1444 buf_reset(&file->out);
1445 file->eof = false;
1446 file->seek_pending = false;
1447 file->err = 0;
1448 file->err_info = NULL;
1449 buf_reset(&file->in);
1451 #ifdef HAVE_ZLIB
1452 if (here->compression == ZLIB) {
1453 z_stream *strm = &file->strm;
1455 inflateReset(strm);
1456 strm->adler = here->data.zlib.adler;
1457 strm->total_out = here->data.zlib.total_out;
1458 #ifdef HAVE_INFLATEPRIME
1459 if (here->data.zlib.bits) {
1460 FILE_T state = file;
1461 int ret = GZ_GETC();
1463 if (ret == -1) {
1464 if (state->err == 0) {
1465 /* EOF */
1466 *err = WTAP_ERR_SHORT_READ;
1467 } else
1468 *err = state->err;
1469 return -1;
1471 (void)inflatePrime(strm, here->data.zlib.bits, ret >> (8 - here->data.zlib.bits));
1473 #endif
1474 (void)inflateSetDictionary(strm, here->data.zlib.window, ZLIB_WINSIZE);
1475 file->compression = ZLIB;
1476 } else if (here->compression == GZIP_AFTER_HEADER) {
1477 z_stream *strm = &file->strm;
1479 inflateReset(strm);
1480 strm->adler = crc32(0L, Z_NULL, 0);
1481 file->compression = ZLIB;
1482 } else
1483 #endif
1484 file->compression = here->compression;
1486 offset = (file->pos + offset) - off2;
1487 file->pos = off2;
1488 /* g_print("OK! %ld\n", offset); */
1490 if (offset) {
1491 /* Don't skip forward yet, wait until we want to read from
1492 the file; that way, if we do multiple seeks in a row,
1493 all involving forward skips, they will be combined. */
1494 file->seek_pending = true;
1495 file->skip = offset;
1497 return file->pos + offset;
1501 * Is this an uncompressed file, are we within the raw area,
1502 * are we either seeking backwards or seeking past the end
1503 * of the buffer, and are we set up for random access with
1504 * file_set_random_access()?
1506 * Again, note that this will never be true on a pipe, as
1507 * file_set_random_access() should never be called if we're
1508 * reading from a pipe.
1510 if (file->compression == UNCOMPRESSED && file->pos + offset >= file->raw
1511 && (offset < 0 || offset >= file->out.avail)
1512 && (file->fast_seek != NULL))
1515 * Yes. Just seek there within the file.
1517 if (ws_lseek64(file->fd, offset - file->out.avail, SEEK_CUR) == -1) {
1518 *err = errno;
1519 return -1;
1521 file->raw_pos += (offset - file->out.avail);
1522 buf_reset(&file->out);
1523 file->eof = false;
1524 file->seek_pending = false;
1525 file->err = 0;
1526 file->err_info = NULL;
1527 buf_reset(&file->in);
1528 file->pos += offset;
1529 return file->pos;
1533 * Are we seeking backwards?
1535 if (offset < 0) {
1537 * Yes. We have no fast seek data, so we have to rewind and
1538 * seek forward.
1539 * XXX - true only for compressed files.
1541 * Calculate the amount to skip forward after rewinding.
1543 offset += file->pos;
1544 if (offset < 0) { /* before start of file! */
1545 *err = EINVAL;
1546 return -1;
1548 /* rewind, then skip to offset */
1550 /* back up and start over */
1551 if (ws_lseek64(file->fd, file->start, SEEK_SET) == -1) {
1552 *err = errno;
1553 return -1;
1555 fast_seek_reset(file);
1556 file->raw_pos = file->start;
1557 gz_reset(file);
1561 * Either we're seeking backwards, but have rewound and now need to
1562 * skip forwards, or we're seeking forwards.
1564 * Skip what's in output buffer (one less gzgetc() check).
1566 n = (int64_t)file->out.avail > offset ? (unsigned)offset : file->out.avail;
1567 file->out.avail -= n;
1568 file->out.next += n;
1569 file->pos += n;
1570 offset -= n;
1572 /* request skip (if not zero) */
1573 if (offset) {
1574 /* Don't skip forward yet, wait until we want to read from
1575 the file; that way, if we do multiple seeks in a row,
1576 all involving forward skips, they will be combined. */
1577 file->seek_pending = true;
1578 file->skip = offset;
1580 return file->pos + offset;
1583 int64_t
1584 file_tell(FILE_T stream)
1586 /* return position */
1587 return stream->pos + (stream->seek_pending ? stream->skip : 0);
1590 int64_t
1591 file_tell_raw(FILE_T stream)
1593 return stream->raw_pos;
1597 file_fstat(FILE_T stream, ws_statb64 *statb, int *err)
1599 if (ws_fstat64(stream->fd, statb) == -1) {
1600 if (err != NULL)
1601 *err = errno;
1602 return -1;
1604 return 0;
1607 bool
1608 file_iscompressed(FILE_T stream)
1610 return stream->is_compressed;
1613 /* Returns a wtap compression type. If we don't know the compression type,
1614 * return WTAP_UNCOMPRESSED, but if our compression state is temporarily
1615 * UNKNOWN because we need to reread compression headers, return the last
1616 * known compression type.
1618 static wtap_compression_type
1619 file_get_compression_type(FILE_T stream)
1621 if (stream->is_compressed) {
1622 switch ((stream->compression == UNKNOWN) ? stream->last_compression : stream->compression) {
1624 case ZLIB:
1625 case GZIP_AFTER_HEADER:
1626 return WTAP_GZIP_COMPRESSED;
1628 case ZSTD:
1629 return WTAP_ZSTD_COMPRESSED;
1631 case LZ4:
1632 return WTAP_LZ4_COMPRESSED;
1634 case UNCOMPRESSED:
1635 return WTAP_UNCOMPRESSED;
1637 default: /* UNKNOWN, should never happen if is_compressed is set */
1638 ws_assert_not_reached();
1639 return WTAP_UNCOMPRESSED;
1642 return WTAP_UNCOMPRESSED;
1646 file_read(void *buf, unsigned int len, FILE_T file)
1648 unsigned got, n;
1650 /* if len is zero, avoid unnecessary operations */
1651 if (len == 0)
1652 return 0;
1654 /* process a skip request */
1655 if (file->seek_pending) {
1656 file->seek_pending = false;
1657 if (gz_skip(file, file->skip) == -1)
1658 return -1;
1662 * Get len bytes to buf, or less than len if at the end;
1663 * if buf is null, just throw the bytes away.
1665 got = 0;
1666 do {
1667 if (file->out.avail != 0) {
1668 /* We have stuff in the output buffer; copy
1669 what we have. */
1670 n = file->out.avail > len ? len : file->out.avail;
1671 if (buf != NULL) {
1672 memcpy(buf, file->out.next, n);
1673 buf = (char *)buf + n;
1675 file->out.next += n;
1676 file->out.avail -= n;
1677 len -= n;
1678 got += n;
1679 file->pos += n;
1680 } else if (file->err != 0) {
1681 /* We have nothing in the output buffer, and
1682 we have an error that may not have been
1683 reported yet; that means we can't generate
1684 any more data into the output buffer, so
1685 return an error indication. */
1686 return -1;
1687 } else if (file->eof && file->in.avail == 0) {
1688 /* We have nothing in the output buffer, and
1689 we're at the end of the input; just return
1690 with what we've gotten so far. */
1691 break;
1692 } else {
1693 /* We have nothing in the output buffer, and
1694 we can generate more data; get more output,
1695 looking for header if required, and
1696 keep looping to process the new stuff
1697 in the output buffer. */
1698 if (fill_out_buffer(file) == -1)
1699 return -1;
1701 } while (len);
1703 return (int)got;
1707 * XXX - this *peeks* at next byte, not a character.
1710 file_peekc(FILE_T file)
1712 int ret = 0;
1714 /* check that we're reading and that there's no error */
1715 if (file->err != 0)
1716 return -1;
1718 /* try output buffer (no need to check for skip request) */
1719 if (file->out.avail != 0) {
1720 return *(file->out.next);
1723 /* process a skip request */
1724 if (file->seek_pending) {
1725 file->seek_pending = false;
1726 if (gz_skip(file, file->skip) == -1)
1727 return -1;
1729 /* if we processed a skip request, there may be data in the buffer,
1730 * or an error could have occurred; likewise if we didn't do seek but
1731 * now call fill_out_buffer, the errors can occur. So we do this while
1732 * loop to check before and after - this is basically the logic from
1733 * file_read() but only for peeking not consuming a byte
1735 while (1) {
1736 if (file->out.avail != 0) {
1737 return *(file->out.next);
1739 else if (file->err != 0) {
1740 return -1;
1742 else if (file->eof && file->in.avail == 0) {
1743 return -1;
1745 else if (fill_out_buffer(file) == -1) {
1746 return -1;
1749 /* it's actually impossible to get here */
1750 return ret;
1754 * XXX - this gets a byte, not a character.
1757 file_getc(FILE_T file)
1759 unsigned char buf[1];
1760 int ret;
1762 /* check that we're reading and that there's no error */
1763 if (file->err != 0)
1764 return -1;
1766 /* try output buffer (no need to check for skip request) */
1767 if (file->out.avail != 0) {
1768 file->out.avail--;
1769 file->pos++;
1770 return *(file->out.next)++;
1773 ret = file_read(buf, 1, file);
1774 return ret < 1 ? -1 : buf[0];
1777 /* Like file_gets, but returns a pointer to the terminating NUL. */
1778 char *
1779 file_getsp(char *buf, int len, FILE_T file)
1781 unsigned left, n;
1782 char *str;
1783 unsigned char *eol;
1785 /* check parameters */
1786 if (buf == NULL || len < 1)
1787 return NULL;
1789 /* check that there's no error */
1790 if (file->err != 0)
1791 return NULL;
1793 /* process a skip request */
1794 if (file->seek_pending) {
1795 file->seek_pending = false;
1796 if (gz_skip(file, file->skip) == -1)
1797 return NULL;
1800 /* copy output bytes up to new line or len - 1, whichever comes first --
1801 append a terminating zero to the string (we don't check for a zero in
1802 the contents, let the user worry about that) */
1803 str = buf;
1804 left = (unsigned)len - 1;
1805 if (left) do {
1806 /* assure that something is in the output buffer */
1807 if (file->out.avail == 0) {
1808 /* We have nothing in the output buffer. */
1809 if (file->err != 0) {
1810 /* We have an error that may not have
1811 been reported yet; that means we
1812 can't generate any more data into
1813 the output buffer, so return an
1814 error indication. */
1815 return NULL;
1817 if (fill_out_buffer(file) == -1)
1818 return NULL; /* error */
1819 if (file->out.avail == 0) { /* end of file */
1820 if (buf == str) /* got bupkus */
1821 return NULL;
1822 break; /* got something -- return it */
1826 /* look for end-of-line in current output buffer */
1827 n = file->out.avail > left ? left : file->out.avail;
1828 eol = (unsigned char *)memchr(file->out.next, '\n', n);
1829 if (eol != NULL)
1830 n = (unsigned)(eol - file->out.next) + 1;
1832 /* copy through end-of-line, or remainder if not found */
1833 memcpy(buf, file->out.next, n);
1834 file->out.avail -= n;
1835 file->out.next += n;
1836 file->pos += n;
1837 left -= n;
1838 buf += n;
1839 } while (left && eol == NULL);
1841 /* found end-of-line or out of space -- add a terminator and return
1842 a pointer to it */
1843 buf[0] = 0;
1844 return buf;
1847 char *
1848 file_gets(char *buf, int len, FILE_T file)
1850 if (!file_getsp(buf, len, file)) return NULL;
1851 return buf;
1855 file_eof(FILE_T file)
1857 /* return end-of-file state */
1858 return (file->eof && file->in.avail == 0 && file->out.avail == 0);
1862 * Routine to return a Wiretap error code (0 for no error, an errno
1863 * for a file error, or a WTAP_ERR_ code for other errors) for an
1864 * I/O stream. Also returns an error string for some errors.
1867 file_error(FILE_T fh, char **err_info)
1869 if (fh->err!=0 && err_info) {
1870 /* g_strdup() returns NULL for NULL argument */
1871 *err_info = g_strdup(fh->err_info);
1873 return fh->err;
1876 void
1877 file_clearerr(FILE_T stream)
1879 /* clear error and end-of-file */
1880 stream->err = 0;
1881 stream->err_info = NULL;
1882 stream->eof = false;
1885 void
1886 file_fdclose(FILE_T file)
1888 if (file->fd != -1)
1889 ws_close(file->fd);
1890 file->fd = -1;
1893 bool
1894 file_fdreopen(FILE_T file, const char *path)
1896 int fd;
1898 if ((fd = ws_open(path, O_RDONLY|O_BINARY, 0000)) == -1)
1899 return false;
1900 file->fd = fd;
1901 return true;
1904 void
1905 file_close(FILE_T file)
1907 int fd = file->fd;
1909 /* free memory and close file */
1910 if (file->size) {
1911 #ifdef HAVE_ZLIB
1912 inflateEnd(&(file->strm));
1913 #endif
1914 #ifdef HAVE_ZSTD
1915 ZSTD_freeDCtx(file->zstd_dctx);
1916 #endif
1917 #ifdef USE_LZ4
1918 LZ4F_freeDecompressionContext(file->lz4_dctx);
1919 #endif
1920 g_free(file->out.buf);
1921 g_free(file->in.buf);
1923 g_free(file->fast_seek_cur);
1924 file->err = 0;
1925 file->err_info = NULL;
1926 g_free(file);
1928 * If fd is -1, somebody's done a file_closefd() on us, so
1929 * we don't need to close the FD itself, and shouldn't do
1930 * so.
1932 if (fd != -1)
1933 ws_close(fd);
1936 #ifdef HAVE_ZLIB
1937 /* internal gzip file state data structure for writing */
1938 struct wtap_writer {
1939 int fd; /* file descriptor */
1940 int64_t pos; /* current position in uncompressed data */
1941 unsigned size; /* buffer size, zero if not allocated yet */
1942 unsigned want; /* requested buffer size, default is GZBUFSIZE */
1943 unsigned char *in; /* input buffer */
1944 unsigned char *out; /* output buffer (double-sized when reading) */
1945 unsigned char *next; /* next output data to deliver or write */
1946 int level; /* compression level */
1947 int strategy; /* compression strategy */
1948 int err; /* error code */
1949 const char *err_info; /* additional error information string for some errors */
1950 /* zlib deflate stream */
1951 z_stream strm; /* stream structure in-place (not a pointer) */
1954 GZWFILE_T
1955 gzwfile_open(const char *path)
1957 int fd;
1958 GZWFILE_T state;
1959 int save_errno;
1961 fd = ws_open(path, O_BINARY|O_WRONLY|O_CREAT|O_TRUNC, 0666);
1962 if (fd == -1)
1963 return NULL;
1964 state = gzwfile_fdopen(fd);
1965 if (state == NULL) {
1966 save_errno = errno;
1967 ws_close(fd);
1968 errno = save_errno;
1970 return state;
1973 GZWFILE_T
1974 gzwfile_fdopen(int fd)
1976 GZWFILE_T state;
1978 /* allocate wtap_writer structure to return */
1979 state = (GZWFILE_T)g_try_malloc(sizeof *state);
1980 if (state == NULL)
1981 return NULL;
1982 state->fd = fd;
1983 state->size = 0; /* no buffers allocated yet */
1984 state->want = GZBUFSIZE; /* requested buffer size */
1986 state->level = Z_DEFAULT_COMPRESSION;
1987 state->strategy = Z_DEFAULT_STRATEGY;
1989 /* initialize stream */
1990 state->err = Z_OK; /* clear error */
1991 state->err_info = NULL; /* clear additional error information */
1992 state->pos = 0; /* no uncompressed data yet */
1993 state->strm.avail_in = 0; /* no input data yet */
1995 /* return stream */
1996 return state;
1999 /* Initialize state for writing a gzip file. Mark initialization by setting
2000 state->size to non-zero. Return -1, and set state->err and possibly
2001 state->err_info, on failure; return 0 on success. */
2002 static int
2003 gz_init(GZWFILE_T state)
2005 int ret;
2006 z_streamp strm = &(state->strm);
2008 /* allocate input and output buffers */
2009 state->in = (unsigned char *)g_try_malloc(state->want);
2010 state->out = (unsigned char *)g_try_malloc(state->want);
2011 if (state->in == NULL || state->out == NULL) {
2012 g_free(state->out);
2013 g_free(state->in);
2014 state->err = ENOMEM;
2015 return -1;
2018 /* allocate deflate memory, set up for gzip compression */
2019 strm->zalloc = Z_NULL;
2020 strm->zfree = Z_NULL;
2021 strm->opaque = Z_NULL;
2022 ret = deflateInit2(strm, state->level, Z_DEFLATED,
2023 15 + 16, 8, state->strategy);
2024 if (ret != Z_OK) {
2025 g_free(state->out);
2026 g_free(state->in);
2027 if (ret == Z_MEM_ERROR) {
2028 /* This means "not enough memory". */
2029 state->err = ENOMEM;
2030 } else {
2031 /* This "shouldn't happen". */
2032 state->err = WTAP_ERR_INTERNAL;
2033 state->err_info = "Unknown error from deflateInit2()";
2035 return -1;
2038 /* mark state as initialized */
2039 state->size = state->want;
2041 /* initialize write buffer */
2042 strm->avail_out = state->size;
2043 strm->next_out = state->out;
2044 state->next = strm->next_out;
2045 return 0;
2048 /* Compress whatever is at avail_in and next_in and write to the output file.
2049 Return -1, and set state->err and possibly state->err_info, if there is
2050 an error writing to the output file; return 0 on success.
2051 flush is assumed to be a valid deflate() flush value. If flush is Z_FINISH,
2052 then the deflate() state is reset to start a new gzip stream. */
2053 static int
2054 gz_comp(GZWFILE_T state, int flush)
2056 int ret;
2057 ssize_t got;
2058 ptrdiff_t have;
2059 z_streamp strm = &(state->strm);
2061 /* allocate memory if this is the first time through */
2062 if (state->size == 0 && gz_init(state) == -1)
2063 return -1;
2065 /* run deflate() on provided input until it produces no more output */
2066 ret = Z_OK;
2067 do {
2068 /* write out current buffer contents if full, or if flushing, but if
2069 doing Z_FINISH then don't write until we get to Z_STREAM_END */
2070 if (strm->avail_out == 0 || (flush != Z_NO_FLUSH &&
2071 (flush != Z_FINISH || ret == Z_STREAM_END))) {
2072 have = strm->next_out - state->next;
2073 if (have) {
2074 got = ws_write(state->fd, state->next, (unsigned int)have);
2075 if (got < 0) {
2076 state->err = errno;
2077 return -1;
2079 if ((ptrdiff_t)got != have) {
2080 state->err = WTAP_ERR_SHORT_WRITE;
2081 return -1;
2084 if (strm->avail_out == 0) {
2085 strm->avail_out = state->size;
2086 strm->next_out = state->out;
2088 state->next = strm->next_out;
2091 /* compress */
2092 have = strm->avail_out;
2093 ret = deflate(strm, flush);
2094 if (ret == Z_STREAM_ERROR) {
2095 /* This "shouldn't happen". */
2096 state->err = WTAP_ERR_INTERNAL;
2097 state->err_info = "Z_STREAM_ERROR from deflate()";
2098 return -1;
2100 have -= strm->avail_out;
2101 } while (have);
2103 /* if that completed a deflate stream, allow another to start */
2104 if (flush == Z_FINISH)
2105 deflateReset(strm);
2107 /* all done, no errors */
2108 return 0;
2111 /* Write out len bytes from buf. Return 0, and set state->err, on
2112 failure or on an attempt to write 0 bytes (in which case state->err
2113 is Z_OK); return the number of bytes written on success. */
2114 unsigned
2115 gzwfile_write(GZWFILE_T state, const void *buf, unsigned len)
2117 unsigned put = len;
2118 unsigned n;
2119 z_streamp strm;
2121 strm = &(state->strm);
2123 /* check that there's no error */
2124 if (state->err != Z_OK)
2125 return 0;
2127 /* if len is zero, avoid unnecessary operations */
2128 if (len == 0)
2129 return 0;
2131 /* allocate memory if this is the first time through */
2132 if (state->size == 0 && gz_init(state) == -1)
2133 return 0;
2135 /* for small len, copy to input buffer, otherwise compress directly */
2136 if (len < state->size) {
2137 /* copy to input buffer, compress when full */
2138 do {
2139 if (strm->avail_in == 0)
2140 strm->next_in = state->in;
2141 n = state->size - strm->avail_in;
2142 if (n > len)
2143 n = len;
2144 #ifdef z_const
2145 DIAG_OFF(cast-qual)
2146 memcpy((Bytef *)strm->next_in + strm->avail_in, buf, n);
2147 DIAG_ON(cast-qual)
2148 #else
2149 memcpy(strm->next_in + strm->avail_in, buf, n);
2150 #endif
2151 strm->avail_in += n;
2152 state->pos += n;
2153 buf = (const char *)buf + n;
2154 len -= n;
2155 if (len && gz_comp(state, Z_NO_FLUSH) == -1)
2156 return 0;
2157 } while (len);
2159 else {
2160 /* consume whatever's left in the input buffer */
2161 if (strm->avail_in != 0 && gz_comp(state, Z_NO_FLUSH) == -1)
2162 return 0;
2164 /* directly compress user buffer to file */
2165 strm->avail_in = len;
2166 #ifdef z_const
2167 strm->next_in = (z_const Bytef *)buf;
2168 #else
2169 DIAG_OFF(cast-qual)
2170 strm->next_in = (Bytef *)buf;
2171 DIAG_ON(cast-qual)
2172 #endif
2173 state->pos += len;
2174 if (gz_comp(state, Z_NO_FLUSH) == -1)
2175 return 0;
2178 /* input was all buffered or compressed (put will fit in int) */
2179 return (int)put;
2182 /* Flush out what we've written so far. Returns -1, and sets state->err,
2183 on failure; returns 0 on success. */
2185 gzwfile_flush(GZWFILE_T state)
2187 /* check that there's no error */
2188 if (state->err != Z_OK)
2189 return -1;
2191 /* compress remaining data with Z_SYNC_FLUSH */
2192 gz_comp(state, Z_SYNC_FLUSH);
2193 if (state->err != Z_OK)
2194 return -1;
2195 return 0;
2198 /* Flush out all data written, and close the file. Returns a Wiretap
2199 error on failure; returns 0 on success. */
2201 gzwfile_close(GZWFILE_T state)
2203 int ret = 0;
2205 /* flush, free memory, and close file */
2206 if (gz_comp(state, Z_FINISH) == -1)
2207 ret = state->err;
2208 (void)deflateEnd(&(state->strm));
2209 g_free(state->out);
2210 g_free(state->in);
2211 state->err = Z_OK;
2212 if (ws_close(state->fd) == -1 && ret == 0)
2213 ret = errno;
2214 g_free(state);
2215 return ret;
2219 gzwfile_geterr(GZWFILE_T state)
2221 return state->err;
2223 #endif
2226 * Editor modelines - https://www.wireshark.org/tools/modelines.html
2228 * Local variables:
2229 * c-basic-offset: 4
2230 * tab-width: 8
2231 * indent-tabs-mode: nil
2232 * End:
2234 * vi: set shiftwidth=4 tabstop=8 expandtab:
2235 * :indentSize=4:tabSize=8:noTabs=true: