wiretap/file_wrappers.c

   1 /* file_wrappers.c
   2  *
   3  * Wiretap Library
   4  * Copyright (c) 1998 by Gilbert Ramirez <gram@alumni.rice.edu>
   5  *
   6  * SPDX-License-Identifier: GPL-2.0-or-later
   7  */
   8
   9 /* file_access interface based heavily on zlib gzread.c and gzlib.c from zlib
  10  * Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
  11  * under licence:
  12  *
  13  * SPDX-License-Identifier: Zlib
  14  *
  15  */
  16
  17 #include "config.h"
  18
  19 #define WS_LOG_DOMAIN LOG_DOMAIN_WIRETAP
  20
  21 #include "file_wrappers.h"
  22
  23 #include <assert.h>
  24 #include <errno.h>
  25 #include <string.h>
  26 #include "wtap-int.h"
  27
  28 #include <wsutil/file_util.h>
  29
  30 #if defined(HAVE_ZLIB) && !defined(HAVE_ZLIBNG)
  31 #define USE_ZLIB_OR_ZLIBNG
  32 #define ZLIB_CONST
  33 #define ZLIB_PREFIX(x) x
  34 #include <zlib.h>
  35 typedef z_stream zlib_stream;
  36 #endif /* defined(HAVE_ZLIB) && !defined(HAVE_ZLIBNG) */
  37
  38 #ifdef HAVE_ZLIBNG
  39 #define USE_ZLIB_OR_ZLIBNG
  40 #define HAVE_INFLATEPRIME 1
  41 #define ZLIB_PREFIX(x) zng_ ## x
  42 #include <zlib-ng.h>
  43 typedef zng_stream zlib_stream;
  44 #endif /* HAVE_ZLIBNG */
  45
  46 #ifdef HAVE_ZSTD
  47 #include <zstd.h>
  48 #endif /* HAVE_ZSTD */
  49
  50 #ifdef HAVE_LZ4
  51 #include <lz4.h>
  52
  53 #if LZ4_VERSION_NUMBER >= 10703
  54 #define USE_LZ4
  55 #include <lz4frame.h>
  56 #ifndef LZ4F_BLOCK_HEADER_SIZE /* Added in LZ4_VERSION_NUMBER 10902 */
  57 #define LZ4F_BLOCK_HEADER_SIZE 4
  58 #endif /* LZ4F_BLOCK_HEADER_SIZE */
  59 #endif /* LZ4_VERSION_NUMBER >= 10703 */
  60 #endif /* HAVE_LZ4 */
  61
  62 /*
  63  * List of compression types supported.
  64  */
  65 static struct compression_type {
  66     wtap_compression_type  type;
  67     const char            *extension;
  68     const char            *description;
  69     const char            *name;
  70     const bool            can_write_compressed;
  71 } compression_types[] = {
  72 #ifdef USE_ZLIB_OR_ZLIBNG
  73     { WTAP_GZIP_COMPRESSED, "gz", "gzip compressed", "gzip", true },
  74 #endif /* USE_ZLIB_OR_ZLIBNG */
  75 #ifdef HAVE_ZSTD
  76     { WTAP_ZSTD_COMPRESSED, "zst", "zstd compressed", "zstd", false },
  77 #endif /* HAVE_ZSTD */
  78 #ifdef USE_LZ4
  79     { WTAP_LZ4_COMPRESSED, "lz4", "lz4 compressed", "lz4", true },
  80 #endif /* USE_LZ4 */
  81     { WTAP_UNCOMPRESSED, NULL, NULL, "none", true },
  82     { WTAP_UNKNOWN_COMPRESSION, NULL, NULL, NULL, false },
  83 };
  84
  85 static wtap_compression_type file_get_compression_type(FILE_T stream);
  86
  87 wtap_compression_type
  88 wtap_name_to_compression_type(const char *name)
  89 {
  90     for (struct compression_type *p = compression_types;
  91             p->type != WTAP_UNKNOWN_COMPRESSION; p++) {
  92                 if (!g_strcmp0(name, p->name))
  93                         return p->type;
  94         }
  95     return WTAP_UNKNOWN_COMPRESSION;
  96 }
  97
  98 wtap_compression_type
  99 wtap_extension_to_compression_type(const char *ext)
 100 {
 101     for (struct compression_type *p = compression_types;
 102             p->type != WTAP_UNKNOWN_COMPRESSION; p++) {
 103                 if (!g_strcmp0(ext, p->extension))
 104                         return p->type;
 105         }
 106     return WTAP_UNKNOWN_COMPRESSION;
 107 }
 108
 109 bool
 110 wtap_can_write_compression_type(wtap_compression_type compression_type)
 111 {
 112     for (struct compression_type *p = compression_types; p->type != WTAP_UNKNOWN_COMPRESSION; p++) {
 113                 if (compression_type == p->type)
 114                         return p->can_write_compressed;
 115         }
 116
 117     return false;
 118 }
 119
 120 wtap_compression_type
 121 wtap_get_compression_type(wtap *wth)
 122 {
 123         return file_get_compression_type((wth->fh == NULL) ? wth->random_fh : wth->fh);
 124 }
 125
 126 const char *
 127 wtap_compression_type_description(wtap_compression_type compression_type)
 128 {
 129         for (struct compression_type *p = compression_types;
 130             p->type != WTAP_UNCOMPRESSED; p++) {
 131                 if (p->type == compression_type)
 132                         return p->description;
 133         }
 134         return NULL;
 135 }
 136
 137 const char *
 138 wtap_compression_type_extension(wtap_compression_type compression_type)
 139 {
 140         for (struct compression_type *p = compression_types;
 141             p->type != WTAP_UNCOMPRESSED; p++) {
 142                 if (p->type == compression_type)
 143                         return p->extension;
 144         }
 145         return NULL;
 146 }
 147
 148 GSList *
 149 wtap_get_all_compression_type_extensions_list(void)
 150 {
 151         GSList *extensions;
 152
 153         extensions = NULL;      /* empty list, to start with */
 154
 155         for (struct compression_type *p = compression_types;
 156             p->type != WTAP_UNCOMPRESSED; p++)
 157                 extensions = g_slist_prepend(extensions, (void *)p->extension);
 158
 159         return extensions;
 160 }
 161
 162 GSList *
 163 wtap_get_all_output_compression_type_names_list(void)
 164 {
 165         GSList *names;
 166
 167         names = NULL;   /* empty list, to start with */
 168
 169         for (struct compression_type *p = compression_types;
 170             p->type != WTAP_UNCOMPRESSED; p++) {
 171             if (p->can_write_compressed)
 172                 names = g_slist_prepend(names, (void *)p->name);
 173         }
 174
 175         return names;
 176 }
 177
 178 /* #define GZBUFSIZE 8192 */
 179 #define GZBUFSIZE 4096
 180 #define LZ4BUFSIZE 4194304 // 4MiB, maximum block size
 181
 182 /* values for wtap_reader compression */
 183 typedef enum {
 184     UNKNOWN,       /* unknown - look for a compression header */
 185     UNCOMPRESSED,  /* uncompressed - copy input directly */
 186     ZLIB,          /* decompress a zlib stream */
 187     GZIP_AFTER_HEADER,
 188     ZSTD,
 189     LZ4,
 190 } compression_t;
 191
 192 /*
 193  * We limit the size of our input and output buffers to 2^30 bytes,
 194  * because:
 195  *
 196  *    1) on Windows with MSVC, the return value of _read() is int,
 197  *       so the biggest read you can do is INT_MAX, and the biggest
 198  *       power of 2 below that is 2^30;
 199  *
 200  *    2) the "avail_in" and "avail_out" values in a z_stream structure
 201  *       in zlib are uInts, and those are unsigned ints, and that
 202  *       imposes a limit on the buffer size when we're reading a
 203  *       gzipped file.
 204  *
 205  * Thus, we use unsigned for the buffer sizes, offsets, amount available
 206  * from the buffer, etc.
 207  *
 208  * If we want an even bigger buffer for uncompressed data, or for
 209  * some other form of compression, then the unsigned-sized values should
 210  * be in structure values used only for reading gzipped files, and
 211  * other values should be used for uncompressed data or data
 212  * compressed using other algorithms (e.g., in a union).
 213  */
 214 #define MAX_READ_BUF_SIZE       (1U << 30)
 215
 216 struct wtap_reader_buf {
 217     uint8_t *buf;  /* buffer */
 218     uint8_t *next; /* next byte to deliver from buffer */
 219     unsigned avail;  /* number of bytes available to deliver at next */
 220 };
 221
 222 struct wtap_reader {
 223     int fd;                     /* file descriptor */
 224     int64_t raw_pos;            /* current position in file (just to not call lseek()) */
 225     int64_t pos;                /* current position in uncompressed data */
 226     unsigned size;              /* buffer size */
 227
 228     struct wtap_reader_buf in;  /* input buffer, containing compressed data */
 229     struct wtap_reader_buf out; /* output buffer, containing uncompressed data */
 230
 231     bool eof;                   /* true if end of input file reached */
 232     int64_t start;              /* where the gzip data started, for rewinding */
 233     int64_t raw;                /* where the raw data started, for seeking */
 234     compression_t compression;  /* type of compression, if any */
 235     compression_t last_compression; /* last known compression type */
 236     bool is_compressed;         /* false if completely uncompressed, true otherwise */
 237
 238     /* seek request */
 239     int64_t skip;               /* amount to skip (already rewound if backwards) */
 240     bool seek_pending;          /* true if seek request pending */
 241
 242     /* error information */
 243     int err;                    /* error code */
 244     const char *err_info;       /* additional error information string for some errors */
 245
 246     /*
 247      * Decompression stream information.
 248      *
 249      * XXX - should this be a union?
 250      */
 251 #ifdef USE_ZLIB_OR_ZLIBNG
 252     /* zlib inflate stream */
 253     zlib_stream strm;           /* stream structure in-place (not a pointer) */
 254     bool dont_check_crc;        /* true if we aren't supposed to check the CRC */
 255 #endif /* USE_ZLIB_OR_ZLIBNG */
 256 #ifdef HAVE_ZSTD
 257     ZSTD_DCtx *zstd_dctx;
 258 #endif /* HAVE_ZSTD */
 259 #ifdef USE_LZ4
 260     LZ4F_dctx *lz4_dctx;
 261     LZ4F_frameInfo_t lz4_info;
 262     unsigned char lz4_hdr[LZ4F_HEADER_SIZE_MAX];
 263 #endif /* USE_LZ4 */
 264
 265     /* fast seeking */
 266     GPtrArray *fast_seek;
 267     void *fast_seek_cur;
 268 };
 269
 270 /* Current read offset within a buffer. */
 271 static unsigned
 272 offset_in_buffer(struct wtap_reader_buf *buf)
 273 {
 274     /* buf->next points to the next byte to read, and buf->buf points
 275        to the first byte in the buffer, so the difference between them
 276        is the offset.
 277
 278        This will fit in an unsigned int, because it can't be bigger
 279        than the size of the buffer, which is an unsigned int. */
 280     return (unsigned)(buf->next - buf->buf);
 281 }
 282
 283 /* Number of bytes of data that are in a buffer. */
 284 static unsigned
 285 bytes_in_buffer(struct wtap_reader_buf *buf)
 286 {
 287     /* buf->next + buf->avail points just past the last byte of data in
 288        the buffer.
 289        Thus, (buf->next + buf->avail) - buf->buf is the number of bytes
 290        of data in the buffer.
 291
 292        This will fit in an unsigned, because it can't be bigger
 293        than the size of the buffer, which is a unsigned. */
 294     return (unsigned)((buf->next + buf->avail) - buf->buf);
 295 }
 296
 297 /* Reset a buffer, discarding all data in the buffer, so we read into
 298    it starting at the beginning. */
 299 static void
 300 buf_reset(struct wtap_reader_buf *buf)
 301 {
 302     buf->next = buf->buf;
 303     buf->avail = 0;
 304 }
 305
 306 static int
 307 buf_read(FILE_T state, struct wtap_reader_buf *buf)
 308 {
 309     unsigned space_left, to_read;
 310     unsigned char *read_ptr;
 311     ssize_t ret;
 312
 313     /* How much space is left at the end of the buffer?
 314        XXX - the output buffer actually has state->size * 2 bytes. */
 315     space_left = state->size - bytes_in_buffer(buf);
 316     if (space_left == 0) {
 317         /* There's no space left, so we start fresh at the beginning
 318            of the buffer. */
 319         buf_reset(buf);
 320
 321         read_ptr = buf->buf;
 322         to_read = state->size;
 323     } else {
 324         /* There's some space left; try to read as much data as we
 325            can into that space.  We may get less than that if we're
 326            reading from a pipe or if we're near the end of the file. */
 327         read_ptr = buf->next + buf->avail;
 328         to_read = space_left;
 329     }
 330
 331     ret = ws_read(state->fd, read_ptr, to_read);
 332     if (ret < 0) {
 333         state->err = errno;
 334         state->err_info = NULL;
 335         return -1;
 336     }
 337     if (ret == 0)
 338         state->eof = true;
 339     state->raw_pos += ret;
 340     buf->avail += (unsigned)ret;
 341     return 0;
 342 }
 343
 344 static int /* gz_avail */
 345 fill_in_buffer(FILE_T state)
 346 {
 347     if (state->err != 0)
 348         return -1;
 349     if (!state->eof) {
 350         if (buf_read(state, &state->in) < 0)
 351             return -1;
 352     }
 353     return 0;
 354 }
 355
 356 #define ZLIB_WINSIZE 32768
 357 #define  LZ4_WINSIZE 65536
 358
 359 struct fast_seek_point {
 360     int64_t out;         /* corresponding offset in uncompressed data */
 361     int64_t in;          /* offset in input file of first full byte */
 362
 363     compression_t compression;
 364     union {
 365         struct {
 366 #ifdef HAVE_INFLATEPRIME
 367             int bits;   /* number of bits (1-7) from byte at in - 1, or 0 */
 368 #endif /* HAVE_INFLATEPRIME */
 369             unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
 370
 371             /* be gentle with Z_STREAM_END, 8 bytes more... Another solution would be to comment checks out */
 372             uint32_t adler;
 373             uint32_t total_out;
 374         } zlib;
 375 #ifdef USE_LZ4
 376         struct {
 377             LZ4F_frameInfo_t lz4_info;
 378             unsigned char lz4_hdr[LZ4F_HEADER_SIZE_MAX];
 379 #if 0
 380             unsigned char window[LZ4_WINSIZE]; /* preceding 64K of uncompressed data */
 381 #endif
 382         } lz4;
 383 #endif
 384     } data;
 385 };
 386
 387 struct zlib_cur_seek_point {
 388     unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
 389     unsigned int pos;
 390     unsigned int have;
 391 };
 392
 393 #define SPAN INT64_C(1048576)
 394 static struct fast_seek_point *
 395 fast_seek_find(FILE_T file, int64_t pos)
 396 {
 397     struct fast_seek_point *smallest = NULL;
 398     struct fast_seek_point *item;
 399     unsigned low, i, max;
 400
 401     if (!file->fast_seek)
 402         return NULL;
 403
 404     for (low = 0, max = file->fast_seek->len; low < max; ) {
 405         i = (low + max) / 2;
 406         item = (struct fast_seek_point *)file->fast_seek->pdata[i];
 407
 408         if (pos < item->out)
 409             max = i;
 410         else if (pos > item->out) {
 411             smallest = item;
 412             low = i + 1;
 413         } else {
 414             return item;
 415         }
 416     }
 417     return smallest;
 418 }
 419
 420 static void
 421 fast_seek_header(FILE_T file, int64_t in_pos, int64_t out_pos,
 422                  compression_t compression)
 423 {
 424     struct fast_seek_point *item = NULL;
 425
 426     if (!file->fast_seek) {
 427         return;
 428     }
 429
 430     if (file->fast_seek->len != 0)
 431         item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
 432
 433     /* fast_seek_header always adds a fast seek point, even if less than
 434      * SPAN from the last one. That is because it used for new streams
 435      * (including concatenated streams) where the compression type
 436      * or, for LZ4, compression options, may change.
 437      */
 438     if (!item || item->out < out_pos) {
 439         struct fast_seek_point *val = g_new(struct fast_seek_point,1);
 440         val->in = in_pos;
 441         val->out = out_pos;
 442         val->compression = compression;
 443
 444 #ifdef USE_LZ4
 445         if (compression == LZ4) {
 446             val->data.lz4.lz4_info = file->lz4_info;
 447             memcpy(val->data.lz4.lz4_hdr, file->lz4_hdr, LZ4F_HEADER_SIZE_MAX);
 448         }
 449 #endif /* USE_LZ4 */
 450         g_ptr_array_add(file->fast_seek, val);
 451     }
 452 }
 453
 454 static void
 455 fast_seek_reset(FILE_T state)
 456 {
 457     switch (state->compression) {
 458
 459     case UNKNOWN:
 460         break;
 461
 462     case UNCOMPRESSED:
 463         /* Nothing to do */
 464         break;
 465
 466     case ZLIB:
 467 #ifdef USE_ZLIB_OR_ZLIBNG
 468         if (state->fast_seek_cur != NULL) {
 469             struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
 470
 471             cur->have = 0;
 472         }
 473 #else
 474         /* This "cannot happen" */
 475         ws_assert_not_reached();
 476 #endif /* USE_ZLIB_OR_ZLIBNG */
 477         break;
 478
 479     case GZIP_AFTER_HEADER:
 480         break;
 481
 482     case ZSTD:
 483 #ifdef HAVE_ZSTD
 484         /* Anything to do? */
 485 #else
 486         /* This "cannot happen" */
 487         ws_assert_not_reached();
 488 #endif /* HAVE_ZSTD */
 489         break;
 490
 491     case LZ4:
 492 #ifdef HAVE_LZ4
 493         /* Anything to do? */
 494 #else
 495         /* This "cannot happen" */
 496         ws_assert_not_reached();
 497 #endif /* HAVE_LZ4 */
 498         break;
 499
 500     /* Add other compression types here */
 501
 502     default:
 503         /* This "cannot happen" */
 504         ws_assert_not_reached();
 505         break;
 506     }
 507 }
 508
 509 static bool
 510 uncompressed_fill_out_buffer(FILE_T state)
 511 {
 512     if (buf_read(state, &state->out) < 0)
 513         return false;
 514     return true;
 515 }
 516
 517 /* Get next byte from input, or -1 if end or error.
 518  *
 519  * Note:
 520  *
 521  *      1) errors from buf_read(), and thus from fill_in_buffer(), are
 522  *      "sticky", and fill_in_buffer() won't do any reading if there's
 523  *      an error;
 524  *
 525  *      2) GZ_GETC() returns -1 on an EOF;
 526  *
 527  * so it's safe to make multiple GZ_GETC() calls and only check the
 528  * last one for an error. */
 529 #define GZ_GETC() ((state->in.avail == 0 && fill_in_buffer(state) == -1) ? -1 : \
 530                    (state->in.avail == 0 ? -1 :                         \
 531                     (state->in.avail--, *(state->in.next)++)))
 532
 533
 534 /*
 535  * Gzipped files, using compression from zlib or zlib-ng.
 536  *
 537  * https://tools.ietf.org/html/rfc1952 (RFC 1952)
 538  */
 539 #ifdef USE_ZLIB_OR_ZLIBNG
 540
 541 /* Get a one-byte integer and return 0 on success and the value in *ret.
 542    Otherwise -1 is returned, state->err is set, and *ret is not modified. */
 543 static int
 544 gz_next1(FILE_T state, uint8_t *ret)
 545 {
 546     int ch;
 547
 548     ch = GZ_GETC();
 549     if (ch == -1) {
 550         if (state->err == 0) {
 551             /* EOF */
 552             state->err = WTAP_ERR_SHORT_READ;
 553             state->err_info = NULL;
 554         }
 555         return -1;
 556     }
 557     *ret = ch;
 558     return 0;
 559 }
 560
 561 /* Get a two-byte little-endian integer and return 0 on success and the value
 562    in *ret.  Otherwise -1 is returned, state->err is set, and *ret is not
 563    modified. */
 564 static int
 565 gz_next2(FILE_T state, uint16_t *ret)
 566 {
 567     uint16_t val;
 568     int ch;
 569
 570     val = GZ_GETC();
 571     ch = GZ_GETC();
 572     if (ch == -1) {
 573         if (state->err == 0) {
 574             /* EOF */
 575             state->err = WTAP_ERR_SHORT_READ;
 576             state->err_info = NULL;
 577         }
 578         return -1;
 579     }
 580     val += (uint16_t)ch << 8;
 581     *ret = val;
 582     return 0;
 583 }
 584
 585 /* Get a four-byte little-endian integer and return 0 on success and the value
 586    in *ret.  Otherwise -1 is returned, state->err is set, and *ret is not
 587    modified. */
 588 static int
 589 gz_next4(FILE_T state, uint32_t *ret)
 590 {
 591     uint32_t val;
 592     int ch;
 593
 594     val = GZ_GETC();
 595     val += (unsigned)GZ_GETC() << 8;
 596     val += (uint32_t)GZ_GETC() << 16;
 597     ch = GZ_GETC();
 598     if (ch == -1) {
 599         if (state->err == 0) {
 600             /* EOF */
 601             state->err = WTAP_ERR_SHORT_READ;
 602             state->err_info = NULL;
 603         }
 604         return -1;
 605     }
 606     val += (uint32_t)ch << 24;
 607     *ret = val;
 608     return 0;
 609 }
 610
 611 /* Skip the specified number of bytes and return 0 on success.  Otherwise -1
 612    is returned. */
 613 static int
 614 gz_skipn(FILE_T state, size_t n)
 615 {
 616     while (n != 0) {
 617         if (GZ_GETC() == -1) {
 618             if (state->err == 0) {
 619                 /* EOF */
 620                 state->err = WTAP_ERR_SHORT_READ;
 621                 state->err_info = NULL;
 622             }
 623             return -1;
 624         }
 625         n--;
 626     }
 627     return 0;
 628 }
 629
 630 /* Skip a null-terminated string and return 0 on success.  Otherwise -1
 631    is returned. */
 632 static int
 633 gz_skipzstr(FILE_T state)
 634 {
 635     int ch;
 636
 637     /* It's null-terminated, so scan until we read a byte with
 638        the value 0 or get an error. */
 639     while ((ch = GZ_GETC()) > 0)
 640         ;
 641     if (ch == -1) {
 642         if (state->err == 0) {
 643             /* EOF */
 644             state->err = WTAP_ERR_SHORT_READ;
 645             state->err_info = NULL;
 646         }
 647         return -1;
 648     }
 649     return 0;
 650 }
 651
 652 static void
 653 zlib_fast_seek_add(FILE_T file, struct zlib_cur_seek_point *point, int bits, int64_t in_pos, int64_t out_pos)
 654 {
 655     /* it's for sure after gzip header, so file->fast_seek->len != 0 */
 656     struct fast_seek_point *item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
 657
 658 #ifndef HAVE_INFLATEPRIME
 659     if (bits)
 660         return;
 661 #endif /* HAVE_INFLATEPRIME */
 662
 663     /* Glib has got Balanced Binary Trees (GTree) but I couldn't find a way to do quick search for nearest (and smaller) value to seek (It's what fast_seek_find() do)
 664      *      Inserting value in middle of sorted array is expensive, so we want to add only in the end.
 665      *      It's not big deal, cause first-read don't usually invoke seeking
 666      */
 667     if (item->out + SPAN < out_pos) {
 668         struct fast_seek_point *val = g_new(struct fast_seek_point,1);
 669         val->in = in_pos;
 670         val->out = out_pos;
 671         val->compression = ZLIB;
 672 #ifdef HAVE_INFLATEPRIME
 673         val->data.zlib.bits = bits;
 674 #endif /* HAVE_INFLATEPRIME */
 675         if (point->pos != 0) {
 676             unsigned int left = ZLIB_WINSIZE - point->pos;
 677
 678             memcpy(val->data.zlib.window, point->window + point->pos, left);
 679             memcpy(val->data.zlib.window + left, point->window, point->pos);
 680         } else
 681             memcpy(val->data.zlib.window, point->window, ZLIB_WINSIZE);
 682
 683         /*
 684          * XXX - strm.adler is a uLong in at least some versions
 685          * of zlib, and uLong is an unsigned long in at least
 686          * some of those versions, which means it's 64-bit
 687          * on LP64 platforms, even though the checksum is
 688          * 32-bit.  We assume the actual Adler checksum
 689          * is in the lower 32 bits of strm.adler; as the
 690          * checksum in the file is only 32 bits, we save only
 691          * those lower 32 bits, and cast away any additional
 692          * bits to squelch warnings.
 693          *
 694          * The same applies to strm.total_out.
 695          */
 696         val->data.zlib.adler = (uint32_t) file->strm.adler;
 697         val->data.zlib.total_out = (uint32_t) file->strm.total_out;
 698         g_ptr_array_add(file->fast_seek, val);
 699     }
 700 }
 701
 702 /*
 703  * Based on what gz_decomp() in zlib does.
 704  */
 705 static void
 706 zlib_fill_out_buffer(FILE_T state)
 707 {
 708     int ret = 0;        /* XXX */
 709     uint32_t crc, len;
 710 #ifdef HAVE_ZLIBNG
 711     zng_streamp strm = &(state->strm);
 712 #else /* HAVE_ZLIBNG */
 713     z_streamp strm = &(state->strm);
 714 #endif /* HAVE_ZLIBNG */
 715     unsigned char *buf = state->out.buf;
 716     unsigned int count = state->size << 1;
 717
 718     unsigned char *buf2 = buf;
 719     unsigned int count2 = count;
 720
 721     strm->avail_out = count;
 722     strm->next_out = buf;
 723
 724     /* fill output buffer up to end of deflate stream or error */
 725     do {
 726         /* get more input for inflate() */
 727         if (state->in.avail == 0 && fill_in_buffer(state) == -1)
 728             break;
 729         if (state->in.avail == 0) {
 730             /* EOF */
 731             state->err = WTAP_ERR_SHORT_READ;
 732             state->err_info = NULL;
 733             break;
 734         }
 735
 736         strm->avail_in = state->in.avail;
 737         strm->next_in = state->in.next;
 738         /* decompress and handle errors */
 739 #ifdef Z_BLOCK
 740         ret = ZLIB_PREFIX(inflate)(strm, Z_BLOCK);
 741 #else /* Z_BLOCK */
 742         ret = ZLIB_PREFIX(inflate)(strm, Z_NO_FLUSH);
 743 #endif /* Z_BLOCK */
 744         state->in.avail = strm->avail_in;
 745 #ifdef z_const
 746 DIAG_OFF(cast-qual)
 747         state->in.next = (unsigned char *)strm->next_in;
 748 DIAG_ON(cast-qual)
 749 #else /* z_const */
 750         state->in.next = strm->next_in;
 751 #endif /* z_const */
 752         if (ret == Z_STREAM_ERROR) {
 753             state->err = WTAP_ERR_DECOMPRESS;
 754             state->err_info = strm->msg;
 755             break;
 756         }
 757         if (ret == Z_NEED_DICT) {
 758             state->err = WTAP_ERR_DECOMPRESS;
 759             state->err_info = "preset dictionary needed";
 760             break;
 761         }
 762         if (ret == Z_MEM_ERROR) {
 763             /* This means "not enough memory". */
 764             state->err = ENOMEM;
 765             state->err_info = NULL;
 766             break;
 767         }
 768         if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
 769             state->err = WTAP_ERR_DECOMPRESS;
 770             state->err_info = strm->msg;
 771             break;
 772         }
 773         /*
 774          * XXX - Z_BUF_ERROR?
 775          */
 776
 777         strm->adler = ZLIB_PREFIX(crc32)(strm->adler, buf2, count2 - strm->avail_out);
 778 #ifdef Z_BLOCK
 779         if (state->fast_seek_cur != NULL) {
 780             struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
 781             unsigned int ready = count2 - strm->avail_out;
 782
 783             if (ready < ZLIB_WINSIZE) {
 784                 unsigned left = ZLIB_WINSIZE - cur->pos;
 785
 786                 if (ready >= left) {
 787                     memcpy(cur->window + cur->pos, buf2, left);
 788                     if (ready != left)
 789                         memcpy(cur->window, buf2 + left, ready - left);
 790
 791                     cur->pos = ready - left;
 792                     cur->have += ready;
 793                 } else {
 794                     memcpy(cur->window + cur->pos, buf2, ready);
 795                     cur->pos += ready;
 796                     cur->have += ready;
 797                 }
 798
 799                 if (cur->have >= ZLIB_WINSIZE)
 800                     cur->have = ZLIB_WINSIZE;
 801
 802             } else {
 803                 memcpy(cur->window, buf2 + (ready - ZLIB_WINSIZE), ZLIB_WINSIZE);
 804                 cur->pos = 0;
 805                 cur->have = ZLIB_WINSIZE;
 806             }
 807
 808             if (cur->have >= ZLIB_WINSIZE && ret != Z_STREAM_END && (strm->data_type & 128) && !(strm->data_type & 64))
 809                 zlib_fast_seek_add(state, cur, (strm->data_type & 7), state->raw_pos - strm->avail_in, state->pos + (count - strm->avail_out));
 810         }
 811 #endif /* Z_BLOCK */
 812         buf2 = (buf2 + count2 - strm->avail_out);
 813         count2 = strm->avail_out;
 814
 815     } while (strm->avail_out && ret != Z_STREAM_END);
 816
 817     /* update available output and crc check value */
 818     state->out.next = buf;
 819     state->out.avail = count - strm->avail_out;
 820
 821     /* Check gzip trailer if at end of deflate stream.
 822        We don't fail immediately here, we just set an error
 823        indication, so that we try to process what data we
 824        got before the error.  The next attempt to read
 825        something past that data will get the error. */
 826     if (ret == Z_STREAM_END) {
 827         if (gz_next4(state, &crc) != -1 &&
 828             gz_next4(state, &len) != -1) {
 829             if (crc != strm->adler && !state->dont_check_crc) {
 830                 state->err = WTAP_ERR_DECOMPRESS;
 831                 state->err_info = "bad CRC";
 832             } else if (len != (strm->total_out & 0xffffffffUL)) {
 833                 state->err = WTAP_ERR_DECOMPRESS;
 834                 state->err_info = "length field wrong";
 835             }
 836         }
 837         state->last_compression = state->compression;
 838         state->compression = UNKNOWN;      /* ready for next stream, once have is 0 */
 839         g_free(state->fast_seek_cur);
 840         state->fast_seek_cur = NULL;
 841     }
 842 }
 843 #endif /* USE_ZLIB_OR_ZLIBNG */
 844
 845 /*
 846  * Check for a gzip header.
 847  *
 848  * Based on the gzip-specific stuff gz_head() from zlib does.
 849  */
 850 static int
 851 check_for_zlib_compression(FILE_T state)
 852 {
 853     /*
 854      * Look for the gzip header.  The first two bytes are 31 and 139,
 855      * and if we find it, return success if we support gzip and an
 856      * error if we don't.
 857      */
 858     if (state->in.next[0] == 31) {
 859         state->in.avail--;
 860         state->in.next++;
 861
 862         /* Make sure the byte after the first byte is present */
 863         if (state->in.avail == 0 && fill_in_buffer(state) == -1) {
 864             /* Read error. */
 865             return -1;
 866         }
 867         if (state->in.avail != 0) {
 868             if (state->in.next[0] == 139) {
 869                 /*
 870                  * We have what looks like the ID1 and ID2 bytes of a gzip
 871                  * header.
 872                  * Continue processing the file.
 873                  *
 874                  * XXX - some capture file formats (I'M LOOKING AT YOU,
 875                  * ENDACE!) can have 31 in the first byte of the file
 876                  * and 139 in the second byte of the file.  For now, in
 877                  * those cases, you lose.
 878                  */
 879 #ifdef USE_ZLIB_OR_ZLIBNG
 880                 uint8_t cm;
 881                 uint8_t flags;
 882                 uint16_t len;
 883                 uint16_t hcrc;
 884
 885                 state->in.avail--;
 886                 state->in.next++;
 887
 888                 /* read rest of header */
 889
 890                 /* compression method (CM) */
 891                 if (gz_next1(state, &cm) == -1)
 892                     return -1;
 893                 if (cm != 8) {
 894                     state->err = WTAP_ERR_DECOMPRESS;
 895                     state->err_info = "unknown compression method";
 896                     return -1;
 897                 }
 898
 899                 /* flags (FLG) */
 900                 if (gz_next1(state, &flags) == -1) {
 901                     /* Read error. */
 902                     return -1;
 903                 }
 904                 if (flags & 0xe0) {     /* reserved flag bits */
 905                     state->err = WTAP_ERR_DECOMPRESS;
 906                     state->err_info = "reserved flag bits set";
 907                     return -1;
 908                 }
 909
 910                 /* modification time (MTIME) */
 911                 if (gz_skipn(state, 4) == -1) {
 912                     /* Read error. */
 913                     return -1;
 914                 }
 915
 916                 /* extra flags (XFL) */
 917                 if (gz_skipn(state, 1) == -1) {
 918                     /* Read error. */
 919                     return -1;
 920                 }
 921
 922                 /* operating system (OS) */
 923                 if (gz_skipn(state, 1) == -1) {
 924                     /* Read error. */
 925                     return -1;
 926                 }
 927
 928                 if (flags & 4) {
 929                     /* extra field - get XLEN */
 930                     if (gz_next2(state, &len) == -1) {
 931                         /* Read error. */
 932                         return -1;
 933                     }
 934
 935                     /* skip the extra field */
 936                     if (gz_skipn(state, len) == -1) {
 937                         /* Read error. */
 938                         return -1;
 939                     }
 940                 }
 941                 if (flags & 8) {
 942                     /* file name */
 943                     if (gz_skipzstr(state) == -1) {
 944                         /* Read error. */
 945                         return -1;
 946                     }
 947                 }
 948                 if (flags & 16) {
 949                     /* comment */
 950                     if (gz_skipzstr(state) == -1) {
 951                         /* Read error. */
 952                         return -1;
 953                     }
 954                 }
 955                 if (flags & 2) {
 956                     /* header crc */
 957                     if (gz_next2(state, &hcrc) == -1) {
 958                         /* Read error. */
 959                         return -1;
 960                     }
 961                     /* XXX - check the CRC? */
 962                 }
 963
 964                 /* set up for decompression */
 965                 ZLIB_PREFIX(inflateReset)(&(state->strm));
 966                 state->strm.adler = ZLIB_PREFIX(crc32)(0L, Z_NULL, 0);
 967                 state->compression = ZLIB;
 968                 state->is_compressed = true;
 969 #ifdef Z_BLOCK
 970                 if (state->fast_seek) {
 971                     struct zlib_cur_seek_point *cur = g_new(struct zlib_cur_seek_point,1);
 972
 973                     cur->pos = cur->have = 0;
 974                     g_free(state->fast_seek_cur);
 975                     state->fast_seek_cur = cur;
 976                     fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, GZIP_AFTER_HEADER);
 977                 }
 978 #endif /* Z_BLOCK */
 979                 return 1;
 980 #else /* USE_ZLIB_OR_ZLIBNG */
 981                 state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
 982                 state->err_info = "reading gzip-compressed files isn't supported";
 983                 return -1;
 984 #endif /* USE_ZLIB_OR_ZLIBNG */
 985             }
 986
 987             /*
 988              * Not a gzip file.  "Unget" the first character; either:
 989              *
 990              *    1) we read both of the first two bytes into the
 991              *    buffer with the first ws_read, so we can just back
 992              *    up by one byte;
 993              *
 994              *    2) we only read the first byte into the buffer with
 995              *    the first ws_read (e.g., because we're reading from
 996              *    a pipe and only the first byte had been written to
 997              *    the pipe at that point), and read the second byte
 998              *    into the buffer after the first byte in the
 999              *    fill_in_buffer call, so we now have two bytes in
1000              *    the buffer, and can just back up by one byte.
1001              */
1002             state->in.avail++;
1003             state->in.next--;
1004         }
1005     }
1006     return 0;
1007 }
1008
1009
1010 /*
1011  * Zstandard compression.
1012  *
1013  * https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
1014  */
1015 #ifdef HAVE_ZSTD
1016 static bool
1017 zstd_fill_out_buffer(FILE_T state)
1018 {
1019     ws_assert(state->out.avail == 0);
1020
1021     if (state->in.avail == 0 && fill_in_buffer(state) == -1)
1022         return false;
1023
1024     ZSTD_outBuffer output = {state->out.buf, state->size << 1, 0};
1025     ZSTD_inBuffer input = {state->in.next, state->in.avail, 0};
1026     const size_t ret = ZSTD_decompressStream(state->zstd_dctx, &output, &input);
1027     if (ZSTD_isError(ret)) {
1028         state->err = WTAP_ERR_DECOMPRESS;
1029         state->err_info = ZSTD_getErrorName(ret);
1030         return false;
1031     }
1032
1033     state->in.next = state->in.next + input.pos;
1034     state->in.avail -= (unsigned)input.pos;
1035
1036     state->out.next = output.dst;
1037     state->out.avail = (unsigned)output.pos;
1038
1039     if (ret == 0) {
1040         state->last_compression = state->compression;
1041         state->compression = UNKNOWN;
1042     }
1043     return true;
1044 }
1045 #endif /* HAVE_ZSTD */
1046
1047 /*
1048  * Check for a Zstandard header.
1049  */
1050 static int
1051 check_for_zstd_compression(FILE_T state)
1052 {
1053     /*
1054      * Look for the Zstandard header, and, if we find it, return
1055      * success if we support Zstandard and an error if we don't.
1056      */
1057     if (state->in.avail >= 4
1058         && state->in.next[0] == 0x28 && state->in.next[1] == 0xb5
1059         && state->in.next[2] == 0x2f && state->in.next[3] == 0xfd) {
1060 #ifdef HAVE_ZSTD
1061         const size_t ret = ZSTD_initDStream(state->zstd_dctx);
1062         if (ZSTD_isError(ret)) {
1063             state->err = WTAP_ERR_DECOMPRESS;
1064             state->err_info = ZSTD_getErrorName(ret);
1065             return -1;
1066         }
1067
1068         fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, ZSTD);
1069         state->compression = ZSTD;
1070         state->is_compressed = true;
1071         return 1;
1072 #else /* HAVE_ZSTD */
1073         state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
1074         state->err_info = "reading zstd-compressed files isn't supported";
1075         return -1;
1076 #endif /* HAVE_ZSTD */
1077     }
1078     return 0;
1079 }
1080
1081 /*
1082  * lz4 compression.
1083  *
1084  * https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
1085  */
1086 #ifdef USE_LZ4
1087 static void
1088 lz4_fast_seek_add(FILE_T file, struct zlib_cur_seek_point *point _U_, int64_t in_pos, int64_t out_pos)
1089 {
1090     if (!file->fast_seek) {
1091         return;
1092     }
1093
1094     struct fast_seek_point *item = NULL;
1095
1096     if (file->fast_seek->len != 0)
1097         item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
1098
1099     /* As of Glib 2.68 GTree has g_tree_upper_bound, or we could use a
1100      * wmem_tree. However, since our initial read is usually sequential
1101      * only adding seek points at the end of the ptr array is fast and fine.
1102      */
1103
1104     /* don't bother adding jump points between very small blocks (min SPAN) */
1105     if (!item || item->out + SPAN < out_pos) {
1106         struct fast_seek_point *val = g_new(struct fast_seek_point,1);
1107         val->in = in_pos;
1108         val->out = out_pos;
1109         val->compression = LZ4;
1110 #if 0
1111         if (point->pos != 0) {
1112             unsigned int left = LZ4_WINSIZE - point->pos;
1113
1114             memcpy(val->data.zlib.window, point->window + point->pos, left);
1115             memcpy(val->data.zlib.window + left, point->window, point->pos);
1116         } else
1117             memcpy(val->data.zlib.window, point->window, ZZ4_WINSIZE);
1118 #endif
1119
1120         val->data.lz4.lz4_info = file->lz4_info;
1121         memcpy(val->data.lz4.lz4_hdr, file->lz4_hdr, LZ4F_HEADER_SIZE_MAX);
1122         g_ptr_array_add(file->fast_seek, val);
1123     }
1124 }
1125
1126 static bool
1127 lz4_fill_out_buffer(FILE_T state)
1128 {
1129     ws_assert(state->out.avail == 0);
1130
1131     if (state->in.avail == 0 && fill_in_buffer(state) == -1)
1132         return false;
1133
1134     /*
1135      * We should be at the start of a block. First, determine the size of
1136      * the block. We tell LZ4F_decompress that there's no room to put
1137      * the decompressed block; this will make it read the block size
1138      * header and stop, returning the size of the block (plus next
1139      * header) as hint of how much input to hand next.
1140      */
1141
1142     size_t outBufSize = 0; // Zero so we don't actually consume the block
1143     size_t inBufSize;
1144
1145     size_t compressedSize = 0;
1146
1147     do {
1148         /* get more input for decompress() */
1149         if (state->in.avail == 0 && fill_in_buffer(state) == -1)
1150             break;
1151         if (state->eof) {
1152             state->err = WTAP_ERR_SHORT_READ;
1153             state->err_info = NULL;
1154             break;
1155         }
1156
1157         inBufSize = state->in.avail;
1158         compressedSize = LZ4F_decompress(state->lz4_dctx, state->out.buf, &outBufSize, state->in.next, &inBufSize, NULL);
1159
1160         if (LZ4F_isError(compressedSize)) {
1161             state->err = WTAP_ERR_DECOMPRESS;
1162             state->err_info = LZ4F_getErrorName(compressedSize);
1163             return false;
1164         }
1165
1166         state->in.next  += (unsigned)inBufSize;
1167         state->in.avail -= (unsigned)inBufSize;
1168
1169         if (compressedSize == 0) {
1170             /* End of Frame */
1171             state->last_compression = state->compression;
1172             state->compression = UNKNOWN;
1173             return true;
1174         }
1175
1176         ws_assert(outBufSize == 0);
1177
1178     } while (compressedSize < LZ4F_BLOCK_HEADER_SIZE);
1179
1180     /*
1181      * We don't want to add a fast seek point for the end of frame,
1182      * especially if there's another frame or other stream after it,
1183      * which would have the same out position. So add it after the
1184      * reading the block size - but point to where the block size
1185      * is so that we'll fast seek to the block size again.
1186      */
1187     if (state->lz4_info.blockMode == LZ4F_blockIndependent) {
1188         /*
1189          * XXX - If state->lz4_info.blockMode == LZ4F_blockLinked, it doesn't
1190          * seem like the LZ4 Frame API can handle this, we would need to use
1191          * the low level Block API and pass the last 64KiB window of data to
1192          * LZ4_setStreamDecode and use LZ4_decompress_safe_continue (similar
1193          * to gzip). So for now we can't do fast seek with it (we do add one
1194          * header at the frame beginning so that concatenated frames and other
1195          * decompression streams work.)
1196          */
1197         lz4_fast_seek_add(state, NULL, state->raw_pos - state->in.avail - LZ4F_BLOCK_HEADER_SIZE, state->pos);
1198     }
1199
1200     // Now actually read the entire next block - but not the next header
1201     compressedSize -= LZ4F_BLOCK_HEADER_SIZE;
1202     state->out.next = state->out.buf;
1203
1204     if (compressedSize > state->size) {
1205         /*
1206          * What is this? Either bogus, or some new variant of LZ4 Frames with
1207          * a larger block size we don't support. We could have a buffer
1208          * overrun if we try to process it.
1209          *
1210          * TODO - We could realloc here.
1211          */
1212         state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
1213         state->err_info = "lz4 compressed block size too large";
1214         return false;
1215     }
1216
1217     size_t ret;
1218     do {
1219         /* get more input for decompress() */
1220         if (state->in.avail == 0 && fill_in_buffer(state) == -1)
1221             break;
1222         if (state->eof) {
1223             state->err = WTAP_ERR_SHORT_READ;
1224             state->err_info = NULL;
1225             break;
1226         }
1227
1228         outBufSize = (state->size << 1) - offset_in_buffer(&state->out);
1229         inBufSize = MIN(state->in.avail, compressedSize);
1230         ret = LZ4F_decompress(state->lz4_dctx, state->out.next, &outBufSize, state->in.next, &inBufSize, NULL);
1231
1232         if (LZ4F_isError(ret)) {
1233             state->err = WTAP_ERR_DECOMPRESS;
1234             state->err_info = LZ4F_getErrorName(ret);
1235             return false;
1236         }
1237         state->in.next  += (unsigned)inBufSize;
1238         state->in.avail -= (unsigned)inBufSize;
1239         compressedSize -= inBufSize;
1240
1241         state->out.next += (unsigned)outBufSize;
1242         state->out.avail += (unsigned)outBufSize;
1243     } while (compressedSize != 0);
1244
1245     state->out.next  = state->out.buf;
1246
1247 #if 0
1248     /* This is an alternative implementation using the lower-level
1249      * LZ4 Block API. Doing something like this might be necessary
1250      * to handle linked blocks, because the Frame API doesn't have
1251      * a method to reset the dictionary / window.
1252      */
1253     int outBufSize = state->size << 1;
1254     uint32_t compressedSize;
1255     if (gz_next4(state, &compressedSize) == -1) {
1256         return false;
1257     }
1258     if (compressedSize == 0) {
1259         /* EndMark */
1260         if (state->lz4_info.contentChecksumFlag) {
1261             uint32_t xxHash;
1262             if (gz_next4(state, &xxHash) == -1) {
1263                 return false;
1264             }
1265             /* XXX - check hash? */
1266         }
1267         state->last_compression = state->compression;
1268         state->compression = UNKNOWN;
1269         return true;
1270     }
1271     bool uncompressed = compressedSize >> 31;
1272     compressedSize &= 0x7FFFFFFF;
1273     if (compressedSize > state->size) {
1274         // TODO - we could realloc here
1275         state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
1276         state->err_info = "lz4 compressed block size too large";
1277         return false;
1278     }
1279
1280     /*
1281      * We have to read an entire block as we're using the low-level
1282      * Block API instead of the LZ4 Frame API.
1283      */
1284     if (compressedSize > (unsigned)state->in.avail) {
1285         memmove(state->in.buf, state->in.next, state->in.avail);
1286         state->in.next = state->in.buf;
1287         while ((unsigned)state->in.avail < compressedSize) {
1288             if (state->eof) {
1289                 state->err = WTAP_ERR_SHORT_READ;
1290                 state->err_info = NULL;
1291                 return false;
1292             }
1293             if (fill_in_buffer(state) == -1) {
1294                 return false;
1295             }
1296         }
1297     }
1298
1299     int decompressedSize;
1300     if (uncompressed) {
1301         memcpy(state->out.buf, state->in.buf, compressedSize);
1302         decompressedSize = compressedSize;
1303     } else {
1304         decompressedSize = LZ4_decompress_safe(state->in.next, state->out.buf, compressedSize, outBufSize);
1305         //const size_t ret = LZ4F_decompress(state->lz4_dctx, state->out.buf, &outBufSize, state->in.next, &inBufSize, NULL);
1306         if (LZ4F_isError(decompressedSize)) {
1307             state->err = WTAP_ERR_DECOMPRESS;
1308             state->err_info = LZ4F_getErrorName(decompressedSize);
1309             return false;
1310         }
1311     }
1312
1313     /*
1314      * We assume LZ4F_decompress() will not set inBufSize to a
1315      * value > state->in.avail.
1316      */
1317     state->in.next  += compressedSize;
1318     state->in.avail -= compressedSize;
1319
1320     state->out.next  = state->out.buf;
1321     state->out.avail = (unsigned)decompressedSize;
1322
1323     if (state->lz4_info.blockChecksumFlag == LZ4F_blockChecksumEnabled) {
1324         uint32_t xxHash;
1325         if (gz_next4(state, &xxHash) == -1) {
1326             return false;
1327         }
1328         /* XXX - check hash? */
1329     }
1330 #endif
1331     return true;
1332 }
1333 #endif /* USE_LZ4 */
1334
1335 /*
1336  * Check for an lz4 header.
1337  */
1338 static int
1339 check_for_lz4_compression(FILE_T state)
1340 {
1341     /*
1342      * Look for the lz4 header, and, if we find it, return success
1343      * if we support lz4 and an error if we don't.
1344      */
1345     if (state->in.avail >= 4
1346         && state->in.next[0] == 0x04 && state->in.next[1] == 0x22
1347         && state->in.next[2] == 0x4d && state->in.next[3] == 0x18) {
1348 #ifdef USE_LZ4
1349 #if LZ4_VERSION_NUMBER >= 10800
1350         LZ4F_resetDecompressionContext(state->lz4_dctx);
1351 #else /* LZ4_VERSION_NUMBER >= 10800 */
1352         LZ4F_freeDecompressionContext(state->lz4_dctx);
1353         const LZ4F_errorCode_t ret = LZ4F_createDecompressionContext(&state->lz4_dctx, LZ4F_VERSION);
1354         if (LZ4F_isError(ret)) {
1355             state->err = WTAP_ERR_INTERNAL;
1356             state->err_info = LZ4F_getErrorName(ret);
1357             return -1;
1358         }
1359 #endif /* LZ4_VERSION_NUMBER >= 10800 */
1360         size_t headerSize = LZ4F_HEADER_SIZE_MAX;
1361 #if LZ4_VERSION_NUMBER >= 10903
1362         /*
1363          * In 1.9.3+ we can handle a silly edge case of a tiny valid
1364          * frame at the end of a file that is smaller than the maximum
1365          * header size. (lz4frame.h added the function in 1.9.0, but
1366          * only for the static library; it wasn't exported until 1.9.3)
1367          */
1368         while (state->in.avail < LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH) {
1369             if (fill_in_buffer(state) == -1) {
1370                 return -1;
1371             }
1372             if (state->eof) {
1373                 state->err = WTAP_ERR_SHORT_READ;
1374                 state->err_info = NULL;
1375                 return 0;
1376             }
1377         }
1378         headerSize = LZ4F_headerSize(state->in.next, state->in.avail);
1379         if (LZ4F_isError(headerSize)) {
1380             state->err = WTAP_ERR_DECOMPRESS;
1381             state->err_info = LZ4F_getErrorName(headerSize);
1382             return -1;
1383         }
1384 #endif /* LZ4_VERSION_NUMBER >= 10903 */
1385         while (state->in.avail < headerSize) {
1386             if (fill_in_buffer(state) == -1) {
1387                 return -1;
1388             }
1389             if (state->eof) {
1390                 state->err = WTAP_ERR_SHORT_READ;
1391                 state->err_info = NULL;
1392                 return 0;
1393             }
1394         }
1395         size_t inBufSize = state->in.avail;
1396         memcpy(state->lz4_hdr, state->in.next, headerSize);
1397         const LZ4F_errorCode_t err = LZ4F_getFrameInfo(state->lz4_dctx, &state->lz4_info, state->in.next, &inBufSize);
1398         if (LZ4F_isError(err)) {
1399             state->err = WTAP_ERR_DECOMPRESS;
1400             state->err_info = LZ4F_getErrorName(err);
1401             return -1;
1402         }
1403
1404         /*
1405          * XXX - We could check state->lz4_info.blockSizeID here, and
1406          * only realloc the buffers to a larger value if the max
1407          * block size is bigger than state->size. Also we could fail
1408          * on unknown values?
1409          */
1410         state->in.avail -= (unsigned)inBufSize;
1411         state->in.next += (unsigned)inBufSize;
1412
1413         fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, LZ4);
1414         state->compression = LZ4;
1415         state->is_compressed = true;
1416         return 1;
1417 #else /* USE_LZ4 */
1418         state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
1419         state->err_info = "reading lz4-compressed files isn't supported";
1420         return -1;
1421 #endif /* USE_LZ4 */
1422     }
1423     return 0;
1424 }
1425
1426 typedef int (*compression_type_test)(FILE_T);
1427
1428 static compression_type_test const compression_type_tests[] = {
1429     check_for_zlib_compression,
1430     check_for_zstd_compression,
1431     check_for_lz4_compression,
1432 };
1433
1434 /*
1435  * Used when we haven't yet determined whether we have a compressed file
1436  * and, if we do, what sort of compressed file it is.
1437  *
1438  * Based on the non-gzip-specific stuff that gz_head() from zlib does.
1439  */
1440 static int
1441 check_for_compression(FILE_T state)
1442 {
1443     /*
1444      * If this isn't the first frame / compressed stream, ensure that
1445      * we're starting at the beginning of the buffer. This shouldn't
1446      * get called much.
1447      *
1448      * This is to avoid edge cases where a previous frame finished but
1449      * state->in.next is close to the end of the buffer so there isn't
1450      * much room to put the start of the next frame.
1451      * This also lets us put back bytes if things go wrong.
1452      */
1453     if (state->in.next != state->in.buf) {
1454         memmove(state->in.buf, state->in.next, state->in.avail);
1455         state->in.next = state->in.buf;
1456     }
1457
1458     /* get some data in the input buffer */
1459     if (state->in.avail == 0) {
1460         if (fill_in_buffer(state) == -1)
1461             return -1;
1462         if (state->in.avail == 0)
1463             return 0;
1464     }
1465
1466     /*
1467      * Check for the compression types we support.
1468      */
1469     for (size_t i = 0; i < G_N_ELEMENTS(compression_type_tests); i++) {
1470         int ret;
1471
1472         ret = compression_type_tests[i](state);
1473         if (ret == -1)
1474             return -1;    /* error */
1475         if (ret == 1)
1476             return 0;     /* found it */
1477     }
1478
1479     /*
1480      * Some other compressed file formats we might want to support:
1481      *
1482      *   XZ format:
1483      *     https://tukaani.org/xz/
1484      *     https://github.com/tukaani-project/xz
1485      *     https://github.com/tukaani-project/xz/blob/master/doc/xz-file-format.txt
1486      *
1487      *    Bzip2 format:
1488      *      https://www.sourceware.org/bzip2/
1489      *      https://gitlab.com/bzip2/bzip2/
1490      *      https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
1491      *        (GitHub won't render it; download and open it)
1492      *
1493      *    Lzip format:
1494      *      https://www.nongnu.org/lzip/
1495      */
1496
1497     /*
1498      * We didn't see anything that looks like a header for any type of
1499      * compressed file that we support, so just do uncompressed I/O.
1500      *
1501      * XXX - This fast seek data is for the case where a compressed stream
1502      * ends and is followed by an uncompressed portion.  It only works if
1503      * the uncompressed portion is at the end, as we don't constantly scan
1504      * for magic bytes in the middle of uncompressed data. (Concatenated
1505      * compressed streams _do_ work, even streams of different compression types.)
1506      */
1507     if (state->fast_seek)
1508         fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, UNCOMPRESSED);
1509
1510
1511     /* doing raw i/o, save start of raw data for seeking, copy any leftover
1512        input to output -- this assumes that the output buffer is larger than
1513        the input buffer, which also assures space for gzungetc() */
1514     state->raw = state->pos;
1515     state->out.next = state->out.buf;
1516     /* not a compressed file -- copy everything we've read into the
1517        input buffer to the output buffer and fall to raw i/o */
1518     if (state->in.avail) {
1519         memcpy(state->out.buf, state->in.next, state->in.avail);
1520         state->out.avail = state->in.avail;
1521
1522         /* Now discard everything in the input buffer */
1523         buf_reset(&state->in);
1524     }
1525     state->compression = UNCOMPRESSED;
1526     return 0;
1527 }
1528
1529 /*
1530  * Based on what gz_make() in zlib does.
1531  */
1532 static int
1533 fill_out_buffer(FILE_T state)
1534 {
1535     if (state->compression == UNKNOWN) {
1536         /*
1537          * We don't yet know whether the file is compressed,
1538          * so check for a compressed-file header.
1539          */
1540         if (check_for_compression(state) == -1)
1541             return -1;
1542         if (state->out.avail != 0)                /* got some data from check_for_compression() */
1543             return 0;
1544     }
1545
1546     /*
1547      * We got no data from check_for_compression(), or we didn't call
1548      * it as we already know the compression type, so read some more
1549      * data.
1550      */
1551     switch (state->compression) {
1552
1553     case UNCOMPRESSED:
1554         /* straight copy */
1555         if (!uncompressed_fill_out_buffer(state))
1556             return -1;
1557         break;
1558
1559 #ifdef USE_ZLIB_OR_ZLIBNG
1560     case ZLIB:
1561         /* zlib (gzip) decompress */
1562         zlib_fill_out_buffer(state);
1563         break;
1564 #endif /* USE_ZLIB_OR_ZLIBNG */
1565
1566 #ifdef HAVE_ZSTD
1567     case ZSTD:
1568         /* zstd decompress */
1569         if (!zstd_fill_out_buffer(state))
1570             return -1;
1571         break;
1572 #endif /* HAVE_ZSTD */
1573
1574 #ifdef USE_LZ4
1575     case LZ4:
1576         /* lz4 decompress */
1577         if (!lz4_fill_out_buffer(state))
1578             return -1;
1579         break;
1580 #endif /* USE_LZ4 */
1581
1582     default:
1583         /* Unknown compression type; keep reading */
1584         break;
1585     }
1586     return 0;
1587 }
1588
1589 static int
1590 gz_skip(FILE_T state, int64_t len)
1591 {
1592     unsigned n;
1593
1594     /* skip over len bytes or reach end-of-file, whichever comes first */
1595     while (len)
1596         if (state->out.avail != 0) {
1597             /* We have stuff in the output buffer; skip over
1598                it. */
1599             n = (int64_t)state->out.avail > len ? (unsigned)len : state->out.avail;
1600             state->out.avail -= n;
1601             state->out.next += n;
1602             state->pos += n;
1603             len -= n;
1604         } else if (state->err != 0) {
1605             /* We have nothing in the output buffer, and
1606                we have an error that may not have been
1607                reported yet; that means we can't generate
1608                any more data into the output buffer, so
1609                return an error indication. */
1610             return -1;
1611         } else if (state->eof && state->in.avail == 0) {
1612             /* We have nothing in the output buffer, and
1613                we're at the end of the input; just return. */
1614             break;
1615         } else {
1616             /* We have nothing in the output buffer, and
1617                we can generate more data; get more output,
1618                looking for header if required. */
1619             if (fill_out_buffer(state) == -1)
1620                 return -1;
1621         }
1622     return 0;
1623 }
1624
1625 static void
1626 gz_reset(FILE_T state)
1627 {
1628     buf_reset(&state->out);       /* no output data available */
1629     state->eof = false;           /* not at end of file */
1630     state->compression = UNKNOWN; /* look for compression header */
1631
1632     state->seek_pending = false;  /* no seek request pending */
1633     state->err = 0;               /* clear error */
1634     state->err_info = NULL;
1635     state->pos = 0;               /* no uncompressed data yet */
1636     buf_reset(&state->in);        /* no input data yet */
1637 }
1638
1639 FILE_T
1640 file_fdopen(int fd)
1641 {
1642     /*
1643      * XXX - we now check whether we have st_blksize in struct stat;
1644      * it's not available on all platforms.
1645      *
1646      * I'm not sure why we're testing _STATBUF_ST_BLKSIZE; it's not
1647      * set on all platforms that have st_blksize in struct stat.
1648      * (Not all platforms have st_blksize in struct stat.)
1649      *
1650      * Is there some reason *not* to make the buffer size the maximum
1651      * of GBUFSIZE and st_blksize?  On most UN*Xes, the standard I/O
1652      * library does I/O with st_blksize as the buffer size; on others,
1653      * and on Windows, it's a 4K buffer size.  If st_blksize is bigger
1654      * than GBUFSIZE (which is currently 4KB), that's probably a
1655      * hint that reading in st_blksize chunks is considered a good
1656      * idea (e.g., an 8K/1K Berkeley fast file system with st_blksize
1657      * being 8K, or APFS, where st_blksize is big on at least some
1658      * versions of macOS).
1659      */
1660 #ifdef _STATBUF_ST_BLKSIZE
1661     ws_statb64 st;
1662 #endif /* _STATBUF_ST_BLKSIZE */
1663 #ifdef HAVE_ZSTD
1664     size_t zstd_buf_size;
1665 #endif /* HAVE_ZSTD */
1666     unsigned want = GZBUFSIZE;
1667     FILE_T state;
1668 #ifdef USE_LZ4
1669     size_t ret;
1670 #endif /* USE_LZ4 */
1671
1672     if (fd == -1)
1673         return NULL;
1674
1675     /* allocate FILE_T structure to return */
1676     state = (FILE_T)g_try_malloc0(sizeof *state);
1677     if (state == NULL)
1678         return NULL;
1679
1680     state->fast_seek_cur = NULL;
1681     state->fast_seek = NULL;
1682
1683     /* open the file with the appropriate mode (or just use fd) */
1684     state->fd = fd;
1685
1686     /* we don't yet know whether it's compressed */
1687     state->is_compressed = false;
1688     state->last_compression = UNKNOWN;
1689
1690     /* save the current position for rewinding (only if reading) */
1691     state->start = ws_lseek64(state->fd, 0, SEEK_CUR);
1692     if (state->start == -1) state->start = 0;
1693     state->raw_pos = state->start;
1694
1695     /* initialize stream */
1696     gz_reset(state);
1697
1698 #ifdef _STATBUF_ST_BLKSIZE
1699     /*
1700      * See what I/O size the file system recommends using, and if
1701      * it's bigger than what we're using and isn't too big, use
1702      * it.
1703      */
1704     if (ws_fstat64(fd, &st) >= 0) {
1705         /*
1706          * Yes, st_blksize can be bigger than an int; apparently,
1707          * it's a long on LP64 Linux, for example.
1708          *
1709          * If the value is too big to fit into a unsigned,
1710          * just use the maximum read buffer size.
1711          *
1712          * On top of that, the Single UNIX Speification says that
1713          * st_blksize is of type blksize_t, which is a *signed*
1714          * integer type, and, at minimum, macOS 11.6 and Linux 5.14.11's
1715          * include/uapi/asm-generic/stat.h define it as such.
1716          *
1717          * However, other OSes might make it unsigned, and older versions
1718          * of OSes that currently make it signed might make it unsigned,
1719          * so we try to avoid warnings from that.
1720          *
1721          * We cast MAX_READ_BUF_SIZE to long in order to avoid the
1722          * warning, although it might introduce warnings on platforms
1723          * where st_blocksize is unsigned; we'll deal with that if
1724          * it ever shows up as an issue.
1725          *
1726          * MAX_READ_BUF_SIZE is < the largest *signed* 32-bt integer,
1727          * so casting it to long won't turn it into a negative number.
1728          * (We only support 32-bit and 64-bit 2's-complement platforms.)
1729          */
1730         if (st.st_blksize <= (long)MAX_READ_BUF_SIZE)
1731             want = (unsigned)st.st_blksize;
1732         else
1733             want = MAX_READ_BUF_SIZE;
1734         /* XXX, verify result? */
1735     }
1736 #endif /* _STATBUF_ST_BLKSIZE */
1737 #ifdef HAVE_ZSTD
1738     /* we should have separate input and output buf sizes */
1739     zstd_buf_size = ZSTD_DStreamInSize();
1740     if (zstd_buf_size > want) {
1741         if (zstd_buf_size <= MAX_READ_BUF_SIZE)
1742             want = (unsigned)zstd_buf_size;
1743         else
1744             want = MAX_READ_BUF_SIZE;
1745     }
1746     zstd_buf_size = ZSTD_DStreamOutSize();
1747     if (zstd_buf_size > want) {
1748         if (zstd_buf_size <= MAX_READ_BUF_SIZE)
1749             want = (unsigned)zstd_buf_size;
1750         else
1751             want = MAX_READ_BUF_SIZE;
1752     }
1753 #endif /* HAVE_ZSTD */
1754 #ifdef USE_LZ4
1755     if (LZ4BUFSIZE > want) {
1756         if (LZ4BUFSIZE <= MAX_READ_BUF_SIZE) {
1757             want = LZ4BUFSIZE;
1758         } else {
1759             goto err;
1760         }
1761     }
1762 #endif /* USE_LZ4 */
1763
1764     /* allocate buffers */
1765     state->in.buf = (unsigned char *)g_try_malloc(want);
1766     state->in.next = state->in.buf;
1767     state->in.avail = 0;
1768     state->out.buf = (unsigned char *)g_try_malloc(want << 1);
1769     state->out.next = state->out.buf;
1770     state->out.avail = 0;
1771     state->size = want;
1772     if (state->in.buf == NULL || state->out.buf == NULL) {
1773        goto err;
1774     }
1775
1776 #ifdef USE_ZLIB_OR_ZLIBNG
1777     /* allocate inflate memory */
1778     state->strm.zalloc = Z_NULL;
1779     state->strm.zfree = Z_NULL;
1780     state->strm.opaque = Z_NULL;
1781     state->strm.avail_in = 0;
1782     state->strm.next_in = Z_NULL;
1783     if (ZLIB_PREFIX(inflateInit2)(&(state->strm), -15) != Z_OK) {    /* raw inflate */
1784         goto err;
1785     }
1786
1787     /* for now, assume we should check the crc */
1788     state->dont_check_crc = false;
1789 #endif /* USE_ZLIB_OR_ZLIBNG */
1790
1791 #ifdef HAVE_ZSTD
1792     state->zstd_dctx = ZSTD_createDCtx();
1793     if (state->zstd_dctx == NULL) {
1794         goto err;
1795     }
1796 #endif /* HAVE_ZSTD */
1797
1798 #ifdef USE_LZ4
1799     ret = LZ4F_createDecompressionContext(&state->lz4_dctx, LZ4F_VERSION);
1800     if (LZ4F_isError(ret)) {
1801         goto err;
1802     }
1803 #endif /* USE_LZ4 */
1804
1805     /* return stream */
1806     return state;
1807
1808 err:
1809 #ifdef USE_ZLIB_OR_ZLIBNG
1810     ZLIB_PREFIX(inflateEnd)(&state->strm);
1811 #endif /* USE_ZLIB_OR_ZLIBNG */
1812 #ifdef HAVE_ZSTD
1813     ZSTD_freeDCtx(state->zstd_dctx);
1814 #endif /* HAVE_ZSTD */
1815 #ifdef USE_LZ4
1816     LZ4F_freeDecompressionContext(state->lz4_dctx);
1817 #endif /* USE_LZ4 */
1818     g_free(state->out.buf);
1819     g_free(state->in.buf);
1820     g_free(state);
1821     errno = ENOMEM;
1822     return NULL;
1823 }
1824
1825 FILE_T
1826 file_open(const char *path)
1827 {
1828     int fd;
1829     FILE_T ft;
1830 #ifdef USE_ZLIB_OR_ZLIBNG
1831     const char *suffixp;
1832 #endif /* USE_ZLIB_OR_ZLIBNG */
1833
1834     /* open file and do correct filename conversions.
1835
1836        XXX - do we need O_LARGEFILE?  On UN*X, if we need to do
1837        something special to get large file support, the configure
1838        script should have set us up with the appropriate #defines,
1839        so we should be getting a large-file-enabled file descriptor
1840        here.  Pre-Large File Summit UN*Xes, and possibly even some
1841        post-LFS UN*Xes, might require O_LARGEFILE here, though.
1842        If so, we should probably handle that in ws_open(). */
1843     if ((fd = ws_open(path, O_RDONLY|O_BINARY, 0000)) == -1)
1844         return NULL;
1845
1846     /* open file handle */
1847     ft = file_fdopen(fd);
1848     if (ft == NULL) {
1849         ws_close(fd);
1850         return NULL;
1851     }
1852
1853 #ifdef USE_ZLIB_OR_ZLIBNG
1854     /*
1855      * If this file's name ends in ".caz", it's probably a compressed
1856      * Windows Sniffer file.  The compression is gzip, but if we
1857      * process the CRC as specified by RFC 1952, the computed CRC
1858      * doesn't match the stored CRC.
1859      *
1860      * Compressed Windows Sniffer files don't all have the same CRC
1861      * value; is it just random crap, or are they running the CRC on
1862      * a different set of data than you're supposed to (e.g., not
1863      * CRCing some of the data), or something such as that?
1864      *
1865      * For now, we just set a flag to ignore CRC errors.
1866      */
1867     suffixp = strrchr(path, '.');
1868     if (suffixp != NULL) {
1869         if (g_ascii_strcasecmp(suffixp, ".caz") == 0)
1870             ft->dont_check_crc = true;
1871     }
1872 #endif /* USE_ZLIB_OR_ZLIBNG */
1873
1874     return ft;
1875 }
1876
1877 void
1878 file_set_random_access(FILE_T stream, bool random_flag _U_, GPtrArray *seek)
1879 {
1880     stream->fast_seek = seek;
1881 }
1882
1883 int64_t
1884 file_seek(FILE_T file, int64_t offset, int whence, int *err)
1885 {
1886     struct fast_seek_point *here;
1887     unsigned n;
1888
1889     if (whence != SEEK_SET && whence != SEEK_CUR && whence != SEEK_END) {
1890         ws_assert_not_reached();
1891 /*
1892  *err = EINVAL;
1893  return -1;
1894 */
1895     }
1896
1897     /* Normalize offset to a SEEK_CUR specification */
1898     if (whence == SEEK_END) {
1899         /* Seek relative to the end of the file; given that we might be
1900            reading from a compressed file, we do that by seeking to the
1901            end of the file, making an offset relative to the end of
1902            the file an offset relative to the current position.
1903
1904            XXX - we don't actually use this yet, but, for uncompressed
1905            files, we could optimize it, if desired, by directly using
1906            ws_lseek64(). */
1907         if (gz_skip(file, INT64_MAX) == -1) {
1908             *err = file->err;
1909             return -1;
1910         }
1911         if (offset == 0) {
1912             /* We are done */
1913             return file->pos;
1914         }
1915     } else if (whence == SEEK_SET)
1916         offset -= file->pos;
1917     else if (file->seek_pending) {
1918         /* There's a forward-skip pending, so file->pos doesn't reflect
1919            the actual file position, it represents the position from
1920            which we're skipping; update the offset to include that. */
1921         offset += file->skip;
1922     }
1923     file->seek_pending = false;
1924
1925     /*
1926      * Are we moving at all?
1927      */
1928     if (offset == 0) {
1929         /* No.  Just return the current position. */
1930         return file->pos;
1931     }
1932
1933     /*
1934      * Are we seeking backwards?
1935      */
1936     if (offset < 0) {
1937         /*
1938          * Yes.
1939          *
1940          * Do we have enough data before the current position in the
1941          * buffer that we can seek backwards within the buffer?
1942          */
1943         if (-offset <= offset_in_buffer(&file->out)) {
1944             /*
1945              * Yes.  Adjust appropriately.
1946              *
1947              * offset is negative, so -offset is non-negative, and
1948              * -offset is <= an unsigned and thus fits in an unsigned.
1949              * Get that value and adjust appropriately.
1950              *
1951              * (Casting offset to unsigned makes it positive, which
1952              * is not what we would want, so we cast -offset instead.)
1953              *
1954              * XXX - this won't work with -offset = 2^63, as its
1955              * negative isn't a valid 64-bit integer, but we are
1956              * not at all likely to see files big enough to ever
1957              * see a negative offset that large.
1958              */
1959             unsigned adjustment = (unsigned)(-offset);
1960
1961             file->out.avail += adjustment;
1962             file->out.next -= adjustment;
1963             file->pos -= adjustment;
1964             return file->pos;
1965         }
1966     } else {
1967         /*
1968          * No.  Offset is positive; we're seeking forwards.
1969          *
1970          * Do we have enough data after the current position in the
1971          * buffer that we can seek forwards within the buffer?
1972          */
1973         if (offset < file->out.avail) {
1974             /*
1975              * Yes.  Adjust appropriately.
1976              *
1977              * offset is < an unsigned and thus fits in an unsigned,
1978              * so we can cast it to unsigned safely.
1979              */
1980             file->out.avail -= (unsigned)offset;
1981             file->out.next += offset;
1982             file->pos += offset;
1983             return file->pos;
1984         }
1985     }
1986
1987     /*
1988      * We're not seeking within the buffer.  Do we have "fast seek" data
1989      * for the location to which we will be seeking, and are we either
1990      * seeking backwards or is the fast seek point past what is in the
1991      * buffer? (We don't want to "fast seek" backwards to a point that
1992      * we've already read and buffered if we're actually seeking forwards.)
1993      *
1994      * It might in certain cases be faster to continue reading linearly
1995      * foward rather than jump to the fast seek point if the distance
1996      * to the fast seek point is small, but we might only be able to do that
1997      * if the compression context doesn't change (which for LZ4 includes if
1998      * we jump to a LZ4 with different options.)
1999      * XXX - profile different buffer and SPAN sizes
2000      */
2001     if ((here = fast_seek_find(file, file->pos + offset)) &&
2002         (offset < 0 || here->out >= file->pos + file->out.avail)) {
2003         int64_t off, off2;
2004
2005         /*
2006          * Yes.  Use that data to do the seek.
2007          * Note that this will be true only if file_set_random_access()
2008          * has been called on this file, which should never be the case
2009          * for a pipe.
2010          */
2011         switch (here->compression) {
2012
2013 #ifdef USE_ZLIB_OR_ZLIBNG
2014         case ZLIB:
2015 #ifdef HAVE_INFLATEPRIME
2016             off = here->in - (here->data.zlib.bits ? 1 : 0);
2017 #else /* HAVE_INFLATEPRIME */
2018             off = here->in;
2019 #endif /* HAVE_INFLATEPRIME */
2020             off2 = here->out;
2021             break;
2022
2023         case GZIP_AFTER_HEADER:
2024             off = here->in;
2025             off2 = here->out;
2026             break;
2027 #endif /* USE_ZLIB_OR_ZLIBNG */
2028
2029 #ifdef USE_LZ4
2030         case LZ4:
2031             ws_debug("fast seek lz4");
2032             off = here->in;
2033             off2 = here->out;
2034             break;
2035 #endif /* USE_LZ4 */
2036
2037         case UNCOMPRESSED:
2038             /* In an uncompressed portion, seek directly to the offset */
2039             off2 = (file->pos + offset);
2040             off = here->in + (off2 - here->out);
2041             break;
2042
2043         default:
2044             /* Otherwise, seek to the fast seek point to do any needed setup. */
2045             off = here->in;
2046             off2 = here->out;
2047             break;
2048         }
2049
2050         if (ws_lseek64(file->fd, off, SEEK_SET) == -1) {
2051             *err = errno;
2052             return -1;
2053         }
2054         fast_seek_reset(file);
2055
2056         file->raw_pos = off;
2057         buf_reset(&file->out);
2058         file->eof = false;
2059         file->seek_pending = false;
2060         file->err = 0;
2061         file->err_info = NULL;
2062         buf_reset(&file->in);
2063
2064         switch (here->compression) {
2065
2066 #ifdef USE_ZLIB_OR_ZLIBNG
2067         case ZLIB: {
2068             zlib_stream*strm = &file->strm;
2069             ZLIB_PREFIX(inflateReset)(strm);
2070             strm->adler = here->data.zlib.adler;
2071             strm->total_out = here->data.zlib.total_out;
2072 #ifdef HAVE_INFLATEPRIME
2073             if (here->data.zlib.bits) {
2074                 FILE_T state = file;
2075                 int ret = GZ_GETC();
2076
2077                 if (ret == -1) {
2078                     if (state->err == 0) {
2079                         /* EOF */
2080                         *err = WTAP_ERR_SHORT_READ;
2081                     } else
2082                         *err = state->err;
2083                     return -1;
2084                 }
2085                 (void)ZLIB_PREFIX(inflatePrime)(strm, here->data.zlib.bits, ret >> (8 - here->data.zlib.bits));
2086             }
2087 #endif /* HAVE_INFLATEPRIME */
2088             (void)ZLIB_PREFIX(inflateSetDictionary)(strm, here->data.zlib.window, ZLIB_WINSIZE);
2089             file->compression = ZLIB;
2090             break;
2091         }
2092
2093         case GZIP_AFTER_HEADER: {
2094             zlib_stream* strm = &file->strm;
2095             ZLIB_PREFIX(inflateReset)(strm);
2096             strm->adler = ZLIB_PREFIX(crc32)(0L, Z_NULL, 0);
2097             file->compression = ZLIB;
2098             break;
2099         }
2100 #endif /* USE_ZLIB_OR_ZLIBNG */
2101
2102 #ifdef USE_LZ4
2103         case LZ4:
2104             /* If the frame information seems to have changed (i.e., we fast
2105              * seeked into a different frame that also has different flags
2106              * and options), then reset the context and re-read it.
2107              * Unfortunately the API doesn't provide a method to set the
2108              * context options explicitly based on an already read
2109              * LZ4F_frameInfo_t.
2110              */
2111             if (memcmp(&file->lz4_info, &here->data.lz4.lz4_info, sizeof(LZ4F_frameInfo_t)) != 0) {
2112 #if LZ4_VERSION_NUMBER >= 10800
2113                 LZ4F_resetDecompressionContext(file->lz4_dctx);
2114 #else /* LZ4_VERSION_NUMBER >= 10800 */
2115                 LZ4F_freeDecompressionContext(file->lz4_dctx);
2116                 const LZ4F_errorCode_t ret = LZ4F_createDecompressionContext(&file->lz4_dctx, LZ4F_VERSION);
2117                 if (LZ4F_isError(ret)) {
2118                     file->err = WTAP_ERR_INTERNAL;
2119                     file->err_info = LZ4F_getErrorName(ret);
2120                     return -1;
2121                 }
2122 #endif /* LZ4_VERSION_NUMBER >= 10800 */
2123                 size_t hdr_size = LZ4F_HEADER_SIZE_MAX;
2124                 const LZ4F_errorCode_t frame_err = LZ4F_getFrameInfo(file->lz4_dctx, &file->lz4_info, here->data.lz4.lz4_hdr, &hdr_size);
2125                 if (LZ4F_isError(frame_err)) {
2126                     file->err = WTAP_ERR_DECOMPRESS;
2127                     file->err_info = LZ4F_getErrorName(frame_err);
2128                     return -1;
2129                 }
2130             }
2131             file->lz4_info = here->data.lz4.lz4_info;
2132             file->compression = LZ4;
2133             break;
2134 #endif /* USE_LZ4 */
2135
2136 #ifdef HAVE_ZSTD
2137         case ZSTD:
2138         {
2139             const size_t ret = ZSTD_initDStream(file->zstd_dctx);
2140             if (ZSTD_isError(ret)) {
2141                 file->err = WTAP_ERR_DECOMPRESS;
2142                 file->err_info = ZSTD_getErrorName(ret);
2143                 return -1;
2144             }
2145             file->compression = ZSTD;
2146             break;
2147         }
2148 #endif /* HAVE_ZSTD */
2149
2150         default:
2151             file->compression = here->compression;
2152             break;
2153         }
2154
2155         offset = (file->pos + offset) - off2;
2156         file->pos = off2;
2157         ws_debug("Fast seek OK! %"PRId64, offset);
2158
2159         if (offset) {
2160             /* Don't skip forward yet, wait until we want to read from
2161                the file; that way, if we do multiple seeks in a row,
2162                all involving forward skips, they will be combined. */
2163             file->seek_pending = true;
2164             file->skip = offset;
2165         }
2166         return file->pos + offset;
2167     }
2168
2169     /*
2170      * Is this an uncompressed file, are we within the raw area,
2171      * are we either seeking backwards or seeking past the end
2172      * of the buffer, and are we set up for random access with
2173      * file_set_random_access()?
2174      *
2175      * Again, note that this will never be true on a pipe, as
2176      * file_set_random_access() should never be called if we're
2177      * reading from a pipe.
2178      */
2179     if (file->compression == UNCOMPRESSED && file->pos + offset >= file->raw
2180         && (offset < 0 || offset >= file->out.avail)
2181         && (file->fast_seek != NULL))
2182     {
2183         /*
2184          * Yes.  Just seek there within the file.
2185          */
2186         if (ws_lseek64(file->fd, offset - file->out.avail, SEEK_CUR) == -1) {
2187             *err = errno;
2188             return -1;
2189         }
2190         file->raw_pos += (offset - file->out.avail);
2191         buf_reset(&file->out);
2192         file->eof = false;
2193         file->seek_pending = false;
2194         file->err = 0;
2195         file->err_info = NULL;
2196         buf_reset(&file->in);
2197         file->pos += offset;
2198         return file->pos;
2199     }
2200
2201     /*
2202      * Are we seeking backwards?
2203      */
2204     if (offset < 0) {
2205         /*
2206          * Yes.  We have no fast seek data, so we have to rewind and
2207          * seek forward.
2208          * XXX - true only for compressed files.
2209          *
2210          * Calculate the amount to skip forward after rewinding.
2211          */
2212         offset += file->pos;
2213         if (offset < 0) {                    /* before start of file! */
2214             *err = EINVAL;
2215             return -1;
2216         }
2217         /* rewind, then skip to offset */
2218
2219         /* back up and start over */
2220         if (ws_lseek64(file->fd, file->start, SEEK_SET) == -1) {
2221             *err = errno;
2222             return -1;
2223         }
2224         fast_seek_reset(file);
2225         file->raw_pos = file->start;
2226         gz_reset(file);
2227     }
2228
2229     /*
2230      * Either we're seeking backwards, but have rewound and now need to
2231      * skip forwards, or we're seeking forwards.
2232      *
2233      * Skip what's in output buffer (one less gzgetc() check).
2234      */
2235     n = (int64_t)file->out.avail > offset ? (unsigned)offset : file->out.avail;
2236     file->out.avail -= n;
2237     file->out.next += n;
2238     file->pos += n;
2239     offset -= n;
2240
2241     /* request skip (if not zero) */
2242     if (offset) {
2243         /* Don't skip forward yet, wait until we want to read from
2244            the file; that way, if we do multiple seeks in a row,
2245            all involving forward skips, they will be combined. */
2246         file->seek_pending = true;
2247         file->skip = offset;
2248     }
2249     return file->pos + offset;
2250 }
2251
2252 int64_t
2253 file_tell(FILE_T stream)
2254 {
2255     /* return position */
2256     return stream->pos + (stream->seek_pending ? stream->skip : 0);
2257 }
2258
2259 int64_t
2260 file_tell_raw(FILE_T stream)
2261 {
2262     return stream->raw_pos;
2263 }
2264
2265 int
2266 file_fstat(FILE_T stream, ws_statb64 *statb, int *err)
2267 {
2268     if (ws_fstat64(stream->fd, statb) == -1) {
2269         if (err != NULL)
2270             *err = errno;
2271         return -1;
2272     }
2273     return 0;
2274 }
2275
2276 bool
2277 file_iscompressed(FILE_T stream)
2278 {
2279     return stream->is_compressed;
2280 }
2281
2282 /* Returns a wtap compression type. If we don't know the compression type,
2283  * return WTAP_UNCOMPRESSED, but if our compression state is temporarily
2284  * UNKNOWN because we need to reread compression headers, return the last
2285  * known compression type.
2286  */
2287 static wtap_compression_type
2288 file_get_compression_type(FILE_T stream)
2289 {
2290     if (stream->is_compressed) {
2291         switch ((stream->compression == UNKNOWN) ? stream->last_compression : stream->compression) {
2292
2293         case ZLIB:
2294         case GZIP_AFTER_HEADER:
2295             return WTAP_GZIP_COMPRESSED;
2296
2297         case ZSTD:
2298             return WTAP_ZSTD_COMPRESSED;
2299
2300         case LZ4:
2301             return WTAP_LZ4_COMPRESSED;
2302
2303         case UNCOMPRESSED:
2304             return WTAP_UNCOMPRESSED;
2305
2306         default: /* UNKNOWN, should never happen if is_compressed is set */
2307             ws_assert_not_reached();
2308             return WTAP_UNCOMPRESSED;
2309         }
2310     }
2311     return WTAP_UNCOMPRESSED;
2312 }
2313
2314 int
2315 file_read(void *buf, unsigned int len, FILE_T file)
2316 {
2317     unsigned got, n;
2318
2319     /* if len is zero, avoid unnecessary operations */
2320     if (len == 0)
2321         return 0;
2322
2323     /* process a skip request */
2324     if (file->seek_pending) {
2325         file->seek_pending = false;
2326         if (gz_skip(file, file->skip) == -1)
2327             return -1;
2328     }
2329
2330     /*
2331      * Get len bytes to buf, or less than len if at the end;
2332      * if buf is null, just throw the bytes away.
2333      */
2334     got = 0;
2335     do {
2336         if (file->out.avail != 0) {
2337             /* We have stuff in the output buffer; copy
2338                what we have. */
2339             n = file->out.avail > len ? len : file->out.avail;
2340             if (buf != NULL) {
2341                 memcpy(buf, file->out.next, n);
2342                 buf = (char *)buf + n;
2343             }
2344             file->out.next += n;
2345             file->out.avail -= n;
2346             len -= n;
2347             got += n;
2348             file->pos += n;
2349         } else if (file->err != 0) {
2350             /* We have nothing in the output buffer, and
2351                we have an error that may not have been
2352                reported yet; that means we can't generate
2353                any more data into the output buffer, so
2354                return an error indication. */
2355             return -1;
2356         } else if (file->eof && file->in.avail == 0) {
2357             /* We have nothing in the output buffer, and
2358                we're at the end of the input; just return
2359                with what we've gotten so far. */
2360             break;
2361         } else {
2362             /* We have nothing in the output buffer, and
2363                we can generate more data; get more output,
2364                looking for header if required, and
2365                keep looping to process the new stuff
2366                in the output buffer. */
2367             if (fill_out_buffer(file) == -1)
2368                 return -1;
2369         }
2370     } while (len);
2371
2372     return (int)got;
2373 }
2374
2375 /*
2376  * XXX - this *peeks* at next byte, not a character.
2377  */
2378 int
2379 file_peekc(FILE_T file)
2380 {
2381     int ret = 0;
2382
2383     /* check that we're reading and that there's no error */
2384     if (file->err != 0)
2385         return -1;
2386
2387     /* try output buffer (no need to check for skip request) */
2388     if (file->out.avail != 0) {
2389         return *(file->out.next);
2390     }
2391
2392     /* process a skip request */
2393     if (file->seek_pending) {
2394         file->seek_pending = false;
2395         if (gz_skip(file, file->skip) == -1)
2396             return -1;
2397     }
2398     /* if we processed a skip request, there may be data in the buffer,
2399      * or an error could have occurred; likewise if we didn't do seek but
2400      * now call fill_out_buffer, the errors can occur.  So we do this while
2401      * loop to check before and after - this is basically the logic from
2402      * file_read() but only for peeking not consuming a byte
2403      */
2404     while (1) {
2405         if (file->out.avail != 0) {
2406             return *(file->out.next);
2407         }
2408         else if (file->err != 0) {
2409             return -1;
2410         }
2411         else if (file->eof && file->in.avail == 0) {
2412             return -1;
2413         }
2414         else if (fill_out_buffer(file) == -1) {
2415             return -1;
2416         }
2417     }
2418     /* it's actually impossible to get here */
2419     return ret;
2420 }
2421
2422 /*
2423  * XXX - this gets a byte, not a character.
2424  */
2425 int
2426 file_getc(FILE_T file)
2427 {
2428     unsigned char buf[1];
2429     int ret;
2430
2431     /* check that we're reading and that there's no error */
2432     if (file->err != 0)
2433         return -1;
2434
2435     /* try output buffer (no need to check for skip request) */
2436     if (file->out.avail != 0) {
2437         file->out.avail--;
2438         file->pos++;
2439         return *(file->out.next)++;
2440     }
2441
2442     ret = file_read(buf, 1, file);
2443     return ret < 1 ? -1 : buf[0];
2444 }
2445
2446 /*
2447  * Like file_gets, but returns a pointer to the terminating NUL
2448  * on success and NULL on failure.
2449  */
2450 char *
2451 file_getsp(char *buf, int len, FILE_T file)
2452 {
2453     unsigned left, n;
2454     char *curp;
2455     unsigned char *eol;
2456
2457     /* check parameters */
2458     if (buf == NULL || len < 1)
2459         return NULL;
2460
2461     /* check that there's no error */
2462     if (file->err != 0)
2463         return NULL;
2464
2465     /* process a skip request */
2466     if (file->seek_pending) {
2467         file->seek_pending = false;
2468         if (gz_skip(file, file->skip) == -1)
2469             return NULL;
2470     }
2471
2472     /* copy output bytes up to new line or len - 1, whichever comes first --
2473        append a terminating zero to the string (we don't check for a zero in
2474        the contents, let the user worry about that) */
2475     curp = buf;
2476     left = (unsigned)len - 1;
2477     if (left) do {
2478             /* assure that something is in the output buffer */
2479             if (file->out.avail == 0) {
2480                 /* We have nothing in the output buffer. */
2481                 if (file->err != 0) {
2482                     /* We have an error that may not have
2483                        been reported yet; that means we
2484                        can't generate any more data into
2485                        the output buffer, so return an
2486                        error indication. */
2487                     return NULL;
2488                 }
2489                 if (fill_out_buffer(file) == -1)
2490                     return NULL;            /* error */
2491                 if (file->out.avail == 0)  {     /* end of file */
2492                     if (curp == buf)        /* got bupkus */
2493                         return NULL;
2494                     break;                  /* got something -- return it */
2495                 }
2496             }
2497
2498             /* look for end-of-line in current output buffer */
2499             n = file->out.avail > left ? left : file->out.avail;
2500             eol = (unsigned char *)memchr(file->out.next, '\n', n);
2501             if (eol != NULL)
2502                 n = (unsigned)(eol - file->out.next) + 1;
2503
2504             /* copy through end-of-line, or remainder if not found */
2505             memcpy(curp, file->out.next, n);
2506             file->out.avail -= n;
2507             file->out.next += n;
2508             file->pos += n;
2509             left -= n;
2510             curp += n;
2511         } while (left && eol == NULL);
2512
2513     /* found end-of-line or out of space -- add a terminator and return
2514        a pointer to it */
2515     *curp = '\0';
2516     return curp;
2517 }
2518
2519 /*
2520  * Returns a pointer to the beginning of the buffer on success
2521  * and NULL on failure.
2522  */
2523 char *
2524 file_gets(char *buf, int len, FILE_T file)
2525 {
2526     if (!file_getsp(buf, len, file)) return NULL;
2527     return buf;
2528 }
2529
2530 int
2531 file_eof(FILE_T file)
2532 {
2533     /* return end-of-file state */
2534     return (file->eof && file->in.avail == 0 && file->out.avail == 0);
2535 }
2536
2537 /*
2538  * Routine to return a Wiretap error code (0 for no error, an errno
2539  * for a file error, or a WTAP_ERR_ code for other errors) for an
2540  * I/O stream.  Also returns an error string for some errors.
2541  */
2542 int
2543 file_error(FILE_T fh, char **err_info)
2544 {
2545     if (fh->err!=0 && err_info) {
2546         /* g_strdup() returns NULL for NULL argument */
2547         *err_info = g_strdup(fh->err_info);
2548     }
2549     return fh->err;
2550 }
2551
2552 void
2553 file_clearerr(FILE_T stream)
2554 {
2555     /* clear error and end-of-file */
2556     stream->err = 0;
2557     stream->err_info = NULL;
2558     stream->eof = false;
2559 }
2560
2561 void
2562 file_fdclose(FILE_T file)
2563 {
2564     if (file->fd != -1)
2565         ws_close(file->fd);
2566     file->fd = -1;
2567 }
2568
2569 bool
2570 file_fdreopen(FILE_T file, const char *path)
2571 {
2572     int fd;
2573
2574     if ((fd = ws_open(path, O_RDONLY|O_BINARY, 0000)) == -1)
2575         return false;
2576     file->fd = fd;
2577     return true;
2578 }
2579
2580 void
2581 file_close(FILE_T file)
2582 {
2583     int fd = file->fd;
2584
2585     /* free memory and close file */
2586     if (file->size) {
2587 #ifdef USE_ZLIB_OR_ZLIBNG
2588         ZLIB_PREFIX(inflateEnd)(&(file->strm));
2589 #endif /* USE_ZLIB_OR_ZLIBNG */
2590 #ifdef HAVE_ZSTD
2591         ZSTD_freeDCtx(file->zstd_dctx);
2592 #endif /* HAVE_ZSTD */
2593 #ifdef USE_LZ4
2594         LZ4F_freeDecompressionContext(file->lz4_dctx);
2595 #endif /* USE_LZ4 */
2596         g_free(file->out.buf);
2597         g_free(file->in.buf);
2598     }
2599     g_free(file->fast_seek_cur);
2600     file->err = 0;
2601     file->err_info = NULL;
2602     g_free(file);
2603     /*
2604      * If fd is -1, somebody's done a file_closefd() on us, so
2605      * we don't need to close the FD itself, and shouldn't do
2606      * so.
2607      */
2608     if (fd != -1)
2609         ws_close(fd);
2610 }
2611
2612 #ifdef USE_ZLIB_OR_ZLIBNG
2613 /* internal gzip file state data structure for writing */
2614 struct wtap_writer {
2615     int fd;                 /* file descriptor */
2616     int64_t pos;            /* current position in uncompressed data */
2617     unsigned size;          /* buffer size, zero if not allocated yet */
2618     unsigned want;          /* requested buffer size, default is GZBUFSIZE */
2619     unsigned char *in;      /* input buffer */
2620     unsigned char *out;     /* output buffer (double-sized when reading) */
2621     unsigned char *next;    /* next output data to deliver or write */
2622     int level;              /* compression level */
2623     int strategy;           /* compression strategy */
2624     int err;                /* error code */
2625     const char *err_info;   /* additional error information string for some errors */
2626     /* zlib deflate stream */
2627     zlib_stream strm;          /* stream structure in-place (not a pointer) */
2628 };
2629
2630 GZWFILE_T
2631 gzwfile_open(const char *path)
2632 {
2633     int fd;
2634     GZWFILE_T state;
2635     int save_errno;
2636
2637     fd = ws_open(path, O_BINARY|O_WRONLY|O_CREAT|O_TRUNC, 0666);
2638     if (fd == -1)
2639         return NULL;
2640     state = gzwfile_fdopen(fd);
2641     if (state == NULL) {
2642         save_errno = errno;
2643         ws_close(fd);
2644         errno = save_errno;
2645     }
2646     return state;
2647 }
2648
2649 GZWFILE_T
2650 gzwfile_fdopen(int fd)
2651 {
2652     GZWFILE_T state;
2653
2654     /* allocate wtap_writer structure to return */
2655     state = (GZWFILE_T)g_try_malloc(sizeof *state);
2656     if (state == NULL)
2657         return NULL;
2658     state->fd = fd;
2659     state->size = 0;            /* no buffers allocated yet */
2660     state->want = GZBUFSIZE;    /* requested buffer size */
2661
2662     state->level = Z_DEFAULT_COMPRESSION;
2663     state->strategy = Z_DEFAULT_STRATEGY;
2664
2665     /* initialize stream */
2666     state->err = Z_OK;              /* clear error */
2667     state->err_info = NULL;         /* clear additional error information */
2668     state->pos = 0;                 /* no uncompressed data yet */
2669     state->strm.avail_in = 0;       /* no input data yet */
2670
2671     /* return stream */
2672     return state;
2673 }
2674
2675 /* Initialize state for writing a gzip file.  Mark initialization by setting
2676    state->size to non-zero.  Return -1, and set state->err and possibly
2677    state->err_info, on failure; return 0 on success. */
2678 static int
2679 gz_init(GZWFILE_T state)
2680 {
2681     int ret;
2682 #ifdef HAVE_ZLIBNG
2683     zng_streamp strm = &(state->strm);
2684 #else /* HAVE_ZLIBNG */
2685     z_streamp strm = &(state->strm);
2686 #endif /* HAVE_ZLIBNG */
2687
2688     /* allocate input and output buffers */
2689     state->in = (unsigned char *)g_try_malloc(state->want);
2690     state->out = (unsigned char *)g_try_malloc(state->want);
2691     if (state->in == NULL || state->out == NULL) {
2692         g_free(state->out);
2693         g_free(state->in);
2694         state->err = ENOMEM;
2695         return -1;
2696     }
2697
2698     /* allocate deflate memory, set up for gzip compression */
2699     strm->zalloc = Z_NULL;
2700     strm->zfree = Z_NULL;
2701     strm->opaque = Z_NULL;
2702     ret = ZLIB_PREFIX(deflateInit2)(strm, state->level, Z_DEFLATED,
2703                        15 + 16, 8, state->strategy);
2704     if (ret != Z_OK) {
2705         g_free(state->out);
2706         g_free(state->in);
2707         if (ret == Z_MEM_ERROR) {
2708             /* This means "not enough memory". */
2709             state->err = ENOMEM;
2710         } else {
2711             /* This "shouldn't happen". */
2712             state->err = WTAP_ERR_INTERNAL;
2713             state->err_info = "Unknown error from deflateInit2()";
2714         }
2715         return -1;
2716     }
2717
2718     /* mark state as initialized */
2719     state->size = state->want;
2720
2721     /* initialize write buffer */
2722     strm->avail_out = state->size;
2723     strm->next_out = state->out;
2724     state->next = strm->next_out;
2725     return 0;
2726 }
2727
2728 /* Compress whatever is at avail_in and next_in and write to the output file.
2729    Return -1, and set state->err and possibly state->err_info, if there is
2730    an error writing to the output file; return 0 on success.
2731    flush is assumed to be a valid deflate() flush value.  If flush is Z_FINISH,
2732    then the deflate() state is reset to start a new gzip stream. */
2733 static int
2734 gz_comp(GZWFILE_T state, int flush)
2735 {
2736     int ret;
2737     ssize_t got;
2738     ptrdiff_t have;
2739 #ifdef HAVE_ZLIBNG
2740     zng_streamp strm = &(state->strm);
2741 #else /* HAVE_ZLIBNG */
2742     z_streamp strm = &(state->strm);
2743 #endif /* HAVE_ZLIBNG */
2744     /* allocate memory if this is the first time through */
2745     if (state->size == 0 && gz_init(state) == -1)
2746         return -1;
2747
2748     /* run deflate() on provided input until it produces no more output */
2749     ret = Z_OK;
2750     do {
2751         /* write out current buffer contents if full, or if flushing, but if
2752            doing Z_FINISH then don't write until we get to Z_STREAM_END */
2753         if (strm->avail_out == 0 || (flush != Z_NO_FLUSH &&
2754                                      (flush != Z_FINISH || ret == Z_STREAM_END))) {
2755             have = strm->next_out - state->next;
2756             if (have) {
2757                 got = ws_write(state->fd, state->next, (unsigned int)have);
2758                 if (got < 0) {
2759                     state->err = errno;
2760                     return -1;
2761                 }
2762                 if ((ptrdiff_t)got != have) {
2763                     state->err = WTAP_ERR_SHORT_WRITE;
2764                     return -1;
2765                 }
2766             }
2767             if (strm->avail_out == 0) {
2768                 strm->avail_out = state->size;
2769                 strm->next_out = state->out;
2770             }
2771             state->next = strm->next_out;
2772         }
2773
2774         /* compress */
2775         have = strm->avail_out;
2776         ret = ZLIB_PREFIX(deflate)(strm, flush);
2777         if (ret == Z_STREAM_ERROR) {
2778             /* This "shouldn't happen". */
2779             state->err = WTAP_ERR_INTERNAL;
2780             state->err_info = "Z_STREAM_ERROR from deflate()";
2781             return -1;
2782         }
2783         have -= strm->avail_out;
2784     } while (have);
2785
2786     /* if that completed a deflate stream, allow another to start */
2787     if (flush == Z_FINISH)
2788         ZLIB_PREFIX(deflateReset)(strm);
2789
2790     /* all done, no errors */
2791     return 0;
2792 }
2793
2794 /* Write out len bytes from buf.  Return 0, and set state->err, on
2795    failure or on an attempt to write 0 bytes (in which case state->err
2796    is Z_OK); return the number of bytes written on success. */
2797 unsigned
2798 gzwfile_write(GZWFILE_T state, const void *buf, unsigned len)
2799 {
2800     unsigned put = len;
2801     unsigned n;
2802 #ifdef HAVE_ZLIBNG
2803     zng_streamp strm;
2804 #else /* HAVE_ZLIBNG */
2805     z_streamp strm;
2806 #endif /* HAVE_ZLIBNG */
2807
2808     strm = &(state->strm);
2809
2810     /* check that there's no error */
2811     if (state->err != Z_OK)
2812         return 0;
2813
2814     /* if len is zero, avoid unnecessary operations */
2815     if (len == 0)
2816         return 0;
2817
2818     /* allocate memory if this is the first time through */
2819     if (state->size == 0 && gz_init(state) == -1)
2820         return 0;
2821
2822     /* for small len, copy to input buffer, otherwise compress directly */
2823     if (len < state->size) {
2824         /* copy to input buffer, compress when full */
2825         do {
2826             if (strm->avail_in == 0)
2827                 strm->next_in = state->in;
2828             n = state->size - strm->avail_in;
2829             if (n > len)
2830                 n = len;
2831 #ifdef z_const
2832 DIAG_OFF(cast-qual)
2833             memcpy((Bytef *)strm->next_in + strm->avail_in, buf, n);
2834 DIAG_ON(cast-qual)
2835 #else /* z_const */
2836             memcpy(strm->next_in + strm->avail_in, buf, n);
2837 #endif /* z_const */
2838             strm->avail_in += n;
2839             state->pos += n;
2840             buf = (const char *)buf + n;
2841             len -= n;
2842             if (len && gz_comp(state, Z_NO_FLUSH) == -1)
2843                 return 0;
2844         } while (len);
2845     }
2846     else {
2847         /* consume whatever's left in the input buffer */
2848         if (strm->avail_in != 0 && gz_comp(state, Z_NO_FLUSH) == -1)
2849             return 0;
2850
2851         /* directly compress user buffer to file */
2852         strm->avail_in = len;
2853 #ifdef z_const
2854         strm->next_in = (z_const Bytef *)buf;
2855 #else /* z_const */
2856 DIAG_OFF(cast-qual)
2857         strm->next_in = (Bytef *)buf;
2858 DIAG_ON(cast-qual)
2859 #endif /* z_const */
2860         state->pos += len;
2861         if (gz_comp(state, Z_NO_FLUSH) == -1)
2862             return 0;
2863     }
2864
2865     /* input was all buffered or compressed (put will fit in int) */
2866     return (int)put;
2867 }
2868
2869 /* Flush out what we've written so far.  Returns -1, and sets state->err,
2870    on failure; returns 0 on success. */
2871 int
2872 gzwfile_flush(GZWFILE_T state)
2873 {
2874     /* check that there's no error */
2875     if (state->err != Z_OK)
2876         return -1;
2877
2878     /* compress remaining data with Z_SYNC_FLUSH */
2879     gz_comp(state, Z_SYNC_FLUSH);
2880     if (state->err != Z_OK)
2881         return -1;
2882     return 0;
2883 }
2884
2885 /* Flush out all data written, and close the file.  Returns a Wiretap
2886    error on failure; returns 0 on success. */
2887 int
2888 gzwfile_close(GZWFILE_T state)
2889 {
2890     int ret = 0;
2891
2892     /* flush, free memory, and close file */
2893     if (gz_comp(state, Z_FINISH) == -1)
2894         ret = state->err;
2895     (void)ZLIB_PREFIX(deflateEnd)(&(state->strm));
2896     g_free(state->out);
2897     g_free(state->in);
2898     state->err = Z_OK;
2899     if (ws_close(state->fd) == -1 && ret == 0)
2900         ret = errno;
2901     g_free(state);
2902     return ret;
2903 }
2904
2905 int
2906 gzwfile_geterr(GZWFILE_T state)
2907 {
2908     return state->err;
2909 }
2910 #endif /* USE_ZLIB_OR_ZLIBNG */
2911
2912 #ifdef USE_LZ4
2913 /* internal lz4 file state data structure for writing */
2914 struct lz4_writer {
2915     int fd;                 /* file descriptor */
2916     int64_t pos;            /* current position in uncompressed data */
2917     int64_t pos_out;
2918     size_t size_out;      /* buffer size, zero if not allocated yet */
2919     size_t want;          /* requested buffer size, default is LZ4BUFSIZE */
2920     size_t want_out;      /* requested output buffer size, determined from want */
2921     unsigned char *out; /* output buffer, containing uncompressed data */
2922     int err;                /* error code */
2923     const char *err_info;   /* additional error information string for some errors */
2924     LZ4F_preferences_t lz4_prefs;
2925     LZ4F_cctx *lz4_cctx;
2926 };
2927
2928 LZ4WFILE_T
2929 lz4wfile_open(const char *path)
2930 {
2931     int fd;
2932     LZ4WFILE_T state;
2933     int save_errno;
2934
2935     fd = ws_open(path, O_BINARY|O_WRONLY|O_CREAT|O_TRUNC, 0666);
2936     if (fd == -1)
2937         return NULL;
2938     state = lz4wfile_fdopen(fd);
2939     if (state == NULL) {
2940         save_errno = errno;
2941         ws_close(fd);
2942         errno = save_errno;
2943     }
2944     return state;
2945 }
2946
2947 LZ4WFILE_T
2948 lz4wfile_fdopen(int fd)
2949 {
2950     LZ4WFILE_T state;
2951
2952     /* allocate wtap_writer structure to return */
2953     state = (LZ4WFILE_T)g_try_malloc(sizeof *state);
2954     if (state == NULL)
2955         return NULL;
2956     state->fd = fd;
2957     state->size_out = 0;         /* no buffer allocated yet */
2958     state->want = LZ4BUFSIZE;    /* max input size (a block) */
2959     state->want_out = LZ4F_compressBound(state->want, &state->lz4_prefs);
2960     /*
2961      * This size guarantees that we will always have enough room to
2962      * write the result of LZ4F_compressUpdate (or Flush or End),
2963      * so long as the output buffer is empty (i.e., we immediately
2964      * write to the output file anything the compressor hands back
2965      * instead of buffering.)
2966      */
2967
2968     memset(&state->lz4_prefs, 0, sizeof(LZ4F_preferences_t));
2969     /* Use the same prefs as the lz4 command line utility defaults. */
2970     state->lz4_prefs.frameInfo.blockMode = LZ4F_blockIndependent; /* Allows fast seek */
2971     state->lz4_prefs.frameInfo.contentChecksumFlag = 1;
2972     state->lz4_prefs.frameInfo.blockSizeID = LZ4F_max4MB;
2973     /* XXX - What should we set state->lz4_prefs.compressionLevel to?
2974      * The command line utility uses 1, recommends 9 as another option, and
2975      * also there's 12 (max).
2976      *
2977      * We could provide an API call or perhaps two or three preset options.
2978      */
2979     state->lz4_prefs.compressionLevel = 1;
2980
2981     /* initialize stream */
2982     state->err = 0;              /* clear error */
2983     state->err_info = NULL;         /* clear additional error information */
2984     state->pos = 0;                 /* no uncompressed data yet */
2985     state->pos_out = 0;
2986
2987     /* return stream */
2988     return state;
2989 }
2990
2991 /* Writes len bytes from the output buffer to the file.
2992  * Return true on success; returns false and sets state->err on failure.
2993  */
2994 static bool
2995 lz4_write_out(LZ4WFILE_T state, size_t len)
2996 {
2997     if (len > 0) {
2998         ssize_t got = ws_write(state->fd, state->out, (unsigned)len);
2999         if (got < 0) {
3000             state->err = errno;
3001             return false;
3002         }
3003         if ((unsigned)got != len) {
3004             state->err = WTAP_ERR_SHORT_WRITE;
3005             return false;
3006         }
3007         state->pos_out += got;
3008     }
3009     return true;
3010 }
3011
3012 /* Initialize state for writing an lz4 file.  Mark initialization by setting
3013    state->size to non-zero.  Return -1, and set state->err and possibly
3014    state->err_info, on failure; return 0 on success. */
3015 static int
3016 lz4_init(LZ4WFILE_T state)
3017 {
3018     LZ4F_errorCode_t ret;
3019
3020     /* create Compression context */
3021     ret = LZ4F_createCompressionContext(&state->lz4_cctx, LZ4F_VERSION);
3022     if (LZ4F_isError(ret)) {
3023         state->err = WTAP_ERR_CANT_WRITE; // XXX - WTAP_ERR_COMPRESS?
3024         state->err_info = LZ4F_getErrorName(ret);
3025         return -1;
3026     }
3027
3028     /* allocate buffer */
3029     state->out = (unsigned char *)g_try_malloc(state->want_out);
3030     if (state->out == NULL) {
3031         g_free(state->out);
3032         LZ4F_freeCompressionContext(state->lz4_cctx);
3033         state->err = ENOMEM;
3034         return -1;
3035     }
3036
3037     ret = LZ4F_compressBegin(state->lz4_cctx, state->out, state->want_out, &state->lz4_prefs);
3038     if (LZ4F_isError(ret)) {
3039         state->err = WTAP_ERR_CANT_WRITE; // XXX - WTAP_ERR_COMPRESS?
3040         state->err_info = LZ4F_getErrorName(ret);
3041         return -1;
3042     }
3043     if (!lz4_write_out(state, ret)) {
3044         return -1;
3045     }
3046
3047     /* mark state as initialized */
3048     state->size_out = state->want_out;
3049
3050     return 0;
3051 }
3052
3053 /* Write out len bytes from buf.  Return 0, and set state->err, on
3054    failure or on an attempt to write 0 bytes (in which case state->err
3055    is 0); return the number of bytes written on success. */
3056 size_t
3057 lz4wfile_write(LZ4WFILE_T state, const void *buf, size_t len)
3058 {
3059     size_t to_write;
3060     size_t put = len;
3061
3062     /* check that there's no error */
3063     if (state->err != 0)
3064         return 0;
3065
3066     /* if len is zero, avoid unnecessary operations */
3067     if (len == 0)
3068         return 0;
3069
3070     /* allocate memory if this is the first time through */
3071     if (state->size_out == 0 && lz4_init(state) == -1)
3072         return 0;
3073
3074     do {
3075         to_write = MIN(len, state->want);
3076         size_t bytesWritten = LZ4F_compressUpdate(state->lz4_cctx, state->out, state->size_out,
3077             buf, to_write, NULL);
3078         if (LZ4F_isError(bytesWritten)) {
3079             state->err = WTAP_ERR_CANT_WRITE; // XXX - WTAP_ERR_COMPRESS?
3080             state->err_info = LZ4F_getErrorName(bytesWritten);
3081             return 0;
3082         }
3083         if (!lz4_write_out(state, bytesWritten)) {
3084             return 0;
3085         }
3086         state->pos += to_write;
3087         len -= to_write;
3088     } while (len);
3089
3090     /* input was all buffered or compressed */
3091     return put;
3092 }
3093
3094 /* Flush out what we've written so far.  Returns -1, and sets state->err,
3095    on failure; returns 0 on success. */
3096 int
3097 lz4wfile_flush(LZ4WFILE_T state)
3098 {
3099     size_t bytesWritten;
3100     /* check that there's no error */
3101     if (state->err != 0)
3102         return -1;
3103
3104     bytesWritten = LZ4F_flush(state->lz4_cctx, state->out, state->size_out, NULL);
3105     if (LZ4F_isError(bytesWritten)) {
3106         // Should never happen if size_out >= LZ4F_compressBound(0, prefsPtr)
3107         state->err = WTAP_ERR_INTERNAL;
3108         return -1;
3109     }
3110     if (!lz4_write_out(state, bytesWritten)) {
3111         return -1;
3112     }
3113     return 0;
3114 }
3115
3116 /* Flush out all data written, and close the file.  Returns a Wiretap
3117    error on failure; returns 0 on success. */
3118 int
3119 lz4wfile_close(LZ4WFILE_T state)
3120 {
3121     int ret = 0;
3122
3123     /* flush, free memory, and close file */
3124     size_t bytesWritten = LZ4F_compressEnd(state->lz4_cctx, state->out, state->size_out, NULL);
3125     if (LZ4F_isError(bytesWritten)) {
3126         // Should never happen if size_out >= LZ4F_compressBound(0, prefsPtr)
3127         ret = WTAP_ERR_INTERNAL;
3128     }
3129     if (!lz4_write_out(state, bytesWritten)) {
3130         ret = state->err;
3131     }
3132     g_free(state->out);
3133     LZ4F_freeCompressionContext(state->lz4_cctx);
3134     if (ws_close(state->fd) == -1 && ret == 0)
3135         ret = errno;
3136     g_free(state);
3137     return ret;
3138 }
3139
3140 int
3141 lz4wfile_geterr(LZ4WFILE_T state)
3142 {
3143     return state->err;
3144 }
3145 #endif /* USE_LZ4 */
3146 /*
3147  * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
3148  *
3149  * Local variables:
3150  * c-basic-offset: 4
3151  * tab-width: 8
3152  * indent-tabs-mode: nil
3153  * End:
3154  *
3155  * vi: set shiftwidth=4 tabstop=8 expandtab:
3156  * :indentSize=4:tabSize=8:noTabs=true:
3157  */