coregrind/m_debuginfo/zstddeclib.c

   1 /**
   2  * \file zstddeclib.c
   3  * Single-file Zstandard decompressor.
   4  *
   5  * Generate using:
   6  * \code
   7  *      python combine.py -r ../../lib -x legacy/zstd_legacy.h -o zstddeclib.c zstddeclib-in.c
   8  * \endcode
   9  */
  10 /*
  11  * Copyright (c) Meta Platforms, Inc. and affiliates.
  12  * All rights reserved.
  13  *
  14  * This source code is licensed under both the BSD-style license (copied below) and
  15  * the GPLv2 (found in the COPYING file in the root directory of this source tree).
  16  * You may select, at your option, one of the above-listed licenses.
  17  */
  18
  19 /*
  20 BSD License
  21
  22 For Zstandard software
  23
  24 Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.
  25
  26 Redistribution and use in source and binary forms, with or without modification,
  27 are permitted provided that the following conditions are met:
  28
  29  * Redistributions of source code must retain the above copyright notice, this
  30    list of conditions and the following disclaimer.
  31
  32  * Redistributions in binary form must reproduce the above copyright notice,
  33    this list of conditions and the following disclaimer in the documentation
  34    and/or other materials provided with the distribution.
  35
  36  * Neither the name Facebook, nor Meta, nor the names of its contributors may
  37    be used to endorse or promote products derived from this software without
  38    specific prior written permission.
  39
  40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  41 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  42 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  43 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
  44 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  45 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  46 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  47 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  48 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  49 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  50  */
  51
  52 /*
  53  * Settings to bake for the standalone decompressor.
  54  *
  55  * Note: It's important that none of these affects 'zstd.h' (only the
  56  * implementation files we're amalgamating).
  57  *
  58  * Note: MEM_MODULE stops xxhash redefining BYTE, U16, etc., which are also
  59  * defined in mem.h (breaking C99 compatibility).
  60  *
  61  * Note: the undefs for xxHash allow Zstd's implementation to coincide with
  62  * standalone xxHash usage (with global defines).
  63  *
  64  * Note: if you enable ZSTD_LEGACY_SUPPORT the combine.py script will need
  65  * re-running without the "-x legacy/zstd_legacy.h" option (it excludes the
  66  * legacy support at the source level).
  67  */
  68 #define DEBUGLEVEL 0
  69 #define MEM_MODULE
  70 #undef  XXH_NAMESPACE
  71 #define XXH_NAMESPACE ZSTD_
  72 #undef  XXH_PRIVATE_API
  73 #define XXH_PRIVATE_API
  74 #undef  XXH_INLINE_ALL
  75 #define XXH_INLINE_ALL
  76 #define ZSTD_LEGACY_SUPPORT 0
  77 #define ZSTD_STRIP_ERROR_STRINGS
  78 #define ZSTD_TRACE 0
  79 /* TODO: Can't amalgamate ASM function */
  80 #define ZSTD_DISABLE_ASM 1
  81
  82 /* Include zstd_deps.h first with all the options we need enabled. */
  83 #define ZSTD_DEPS_NEED_MALLOC
  84 /**** start inlining common/zstd_deps.h ****/
  85 /*
  86  * Copyright (c) Meta Platforms, Inc. and affiliates.
  87  * All rights reserved.
  88  *
  89  * This source code is licensed under both the BSD-style license (found in the
  90  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  91  * in the COPYING file in the root directory of this source tree).
  92  * You may select, at your option, one of the above-listed licenses.
  93  */
  94
  95 /* This file provides common libc dependencies that zstd requires.
  96  * The purpose is to allow replacing this file with a custom implementation
  97  * to compile zstd without libc support.
  98  */
  99
 100 /* Need:
 101  * NULL
 102  * INT_MAX
 103  * UINT_MAX
 104  * ZSTD_memcpy()
 105  * ZSTD_memset()
 106  * ZSTD_memmove()
 107  */
 108 #ifndef ZSTD_DEPS_COMMON
 109 #define ZSTD_DEPS_COMMON
 110
 111 #include "pub_core_basics.h"
 112 #include "pub_core_mallocfree.h"
 113 #include "pub_core_libcbase.h"
 114
 115 # define ZSTD_memcpy(d,s,l) VG_(memcpy)((d),(s),(l))
 116 # define ZSTD_memmove(d,s,l) VG_(memmove)((d),(s),(l))
 117 # define ZSTD_memset(p,v,l) VG_(memset)((p),(v),(l))
 118
 119
 120 #endif /* ZSTD_DEPS_COMMON */
 121
 122 /* Need:
 123  * ZSTD_malloc()
 124  * ZSTD_free()
 125  */
 126 #ifdef ZSTD_DEPS_NEED_MALLOC
 127 #ifndef ZSTD_DEPS_MALLOC
 128 #define ZSTD_DEPS_MALLOC
 129
 130 #include <stdlib.h>
 131
 132 #define ZSTD_free(p) VG_(free)((p))
 133
 134 #endif /* ZSTD_DEPS_MALLOC */
 135 #endif /* ZSTD_DEPS_NEED_MALLOC */
 136
 137 /*
 138  * Provides 64-bit math support.
 139  * Need:
 140  * U64 ZSTD_div64(U64 dividend, U32 divisor)
 141  */
 142 #ifdef ZSTD_DEPS_NEED_MATH64
 143 #ifndef ZSTD_DEPS_MATH64
 144 #define ZSTD_DEPS_MATH64
 145
 146 #define ZSTD_div64(dividend, divisor) ((dividend) / (divisor))
 147
 148 #endif /* ZSTD_DEPS_MATH64 */
 149 #endif /* ZSTD_DEPS_NEED_MATH64 */
 150
 151 /* Need:
 152  * assert()
 153  */
 154 #ifdef ZSTD_DEPS_NEED_ASSERT
 155 #ifndef ZSTD_DEPS_ASSERT
 156 #define ZSTD_DEPS_ASSERT
 157
 158 #include <assert.h>
 159
 160 #endif /* ZSTD_DEPS_ASSERT */
 161 #endif /* ZSTD_DEPS_NEED_ASSERT */
 162
 163 /* Need:
 164  * ZSTD_DEBUG_PRINT()
 165  */
 166 #ifdef ZSTD_DEPS_NEED_IO
 167 #ifndef ZSTD_DEPS_IO
 168 #define ZSTD_DEPS_IO
 169
 170 #include <stdio.h>
 171 #define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
 172
 173 #endif /* ZSTD_DEPS_IO */
 174 #endif /* ZSTD_DEPS_NEED_IO */
 175
 176 /* Only requested when <stdint.h> is known to be present.
 177  * Need:
 178  * intptr_t
 179  */
 180 #ifdef ZSTD_DEPS_NEED_STDINT
 181 #ifndef ZSTD_DEPS_STDINT
 182 #define ZSTD_DEPS_STDINT
 183
 184 #include <stdint.h>
 185
 186 #endif /* ZSTD_DEPS_STDINT */
 187 #endif /* ZSTD_DEPS_NEED_STDINT */
 188 /**** ended inlining common/zstd_deps.h ****/
 189
 190 /**** start inlining common/debug.c ****/
 191 /* ******************************************************************
 192  * debug
 193  * Part of FSE library
 194  * Copyright (c) Meta Platforms, Inc. and affiliates.
 195  *
 196  * You can contact the author at :
 197  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
 198  *
 199  * This source code is licensed under both the BSD-style license (found in the
 200  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 201  * in the COPYING file in the root directory of this source tree).
 202  * You may select, at your option, one of the above-listed licenses.
 203 ****************************************************************** */
 204
 205
 206 /*
 207  * This module only hosts one global variable
 208  * which can be used to dynamically influence the verbosity of traces,
 209  * such as DEBUGLOG and RAWLOG
 210  */
 211
 212 /**** start inlining debug.h ****/
 213 /* ******************************************************************
 214  * debug
 215  * Part of FSE library
 216  * Copyright (c) Meta Platforms, Inc. and affiliates.
 217  *
 218  * You can contact the author at :
 219  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
 220  *
 221  * This source code is licensed under both the BSD-style license (found in the
 222  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 223  * in the COPYING file in the root directory of this source tree).
 224  * You may select, at your option, one of the above-listed licenses.
 225 ****************************************************************** */
 226
 227
 228 /*
 229  * The purpose of this header is to enable debug functions.
 230  * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
 231  * and DEBUG_STATIC_ASSERT() for compile-time.
 232  *
 233  * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
 234  *
 235  * Level 1 enables assert() only.
 236  * Starting level 2, traces can be generated and pushed to stderr.
 237  * The higher the level, the more verbose the traces.
 238  *
 239  * It's possible to dynamically adjust level using variable g_debug_level,
 240  * which is only declared if DEBUGLEVEL>=2,
 241  * and is a global variable, not multi-thread protected (use with care)
 242  */
 243
 244 #ifndef DEBUG_H_12987983217
 245 #define DEBUG_H_12987983217
 246
 247 #if defined (__cplusplus)
 248 extern "C" {
 249 #endif
 250
 251
 252 /* static assert is triggered at compile time, leaving no runtime artefact.
 253  * static assert only works with compile-time constants.
 254  * Also, this variant can only be used inside a function. */
 255 #define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
 256
 257
 258 /* DEBUGLEVEL is expected to be defined externally,
 259  * typically through compiler command line.
 260  * Value must be a number. */
 261 #ifndef DEBUGLEVEL
 262 #  define DEBUGLEVEL 0
 263 #endif
 264
 265
 266 /* recommended values for DEBUGLEVEL :
 267  * 0 : release mode, no debug, all run-time checks disabled
 268  * 1 : enables assert() only, no display
 269  * 2 : reserved, for currently active debug path
 270  * 3 : events once per object lifetime (CCtx, CDict, etc.)
 271  * 4 : events once per frame
 272  * 5 : events once per block
 273  * 6 : events once per sequence (verbose)
 274  * 7+: events at every position (*very* verbose)
 275  *
 276  * It's generally inconvenient to output traces > 5.
 277  * In which case, it's possible to selectively trigger high verbosity levels
 278  * by modifying g_debug_level.
 279  */
 280
 281 #if (DEBUGLEVEL>=1)
 282 #  define ZSTD_DEPS_NEED_ASSERT
 283 /**** skipping file: zstd_deps.h ****/
 284 #else
 285 #  ifndef assert   /* assert may be already defined, due to prior #include <assert.h> */
 286 #    define assert(condition) ((void)0)   /* disable assert (default) */
 287 #  endif
 288 #endif
 289
 290 #if (DEBUGLEVEL>=2)
 291 #  define ZSTD_DEPS_NEED_IO
 292 /**** skipping file: zstd_deps.h ****/
 293 extern int g_debuglevel; /* the variable is only declared,
 294                             it actually lives in debug.c,
 295                             and is shared by the whole process.
 296                             It's not thread-safe.
 297                             It's useful when enabling very verbose levels
 298                             on selective conditions (such as position in src) */
 299
 300 #  define RAWLOG(l, ...)                   \
 301     do {                                   \
 302         if (l<=g_debuglevel) {             \
 303             ZSTD_DEBUG_PRINT(__VA_ARGS__); \
 304         }                                  \
 305     } while (0)
 306
 307 #define STRINGIFY(x) #x
 308 #define TOSTRING(x) STRINGIFY(x)
 309 #define LINE_AS_STRING TOSTRING(__LINE__)
 310
 311 #  define DEBUGLOG(l, ...)                               \
 312     do {                                                 \
 313         if (l<=g_debuglevel) {                           \
 314             ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \
 315             ZSTD_DEBUG_PRINT(" \n");                     \
 316         }                                                \
 317     } while (0)
 318 #else
 319 #  define RAWLOG(l, ...)   do { } while (0)    /* disabled */
 320 #  define DEBUGLOG(l, ...) do { } while (0)    /* disabled */
 321 #endif
 322
 323
 324 #if defined (__cplusplus)
 325 }
 326 #endif
 327
 328 #endif /* DEBUG_H_12987983217 */
 329 /**** ended inlining debug.h ****/
 330
 331 #if !defined(ZSTD_LINUX_KERNEL) || (DEBUGLEVEL>=2)
 332 /* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a
 333  * translation unit is empty. So remove this from Linux kernel builds, but
 334  * otherwise just leave it in.
 335  */
 336 int g_debuglevel = DEBUGLEVEL;
 337 #endif
 338 /**** ended inlining common/debug.c ****/
 339 /**** start inlining common/entropy_common.c ****/
 340 /* ******************************************************************
 341  * Common functions of New Generation Entropy library
 342  * Copyright (c) Meta Platforms, Inc. and affiliates.
 343  *
 344  *  You can contact the author at :
 345  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
 346  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
 347  *
 348  * This source code is licensed under both the BSD-style license (found in the
 349  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 350  * in the COPYING file in the root directory of this source tree).
 351  * You may select, at your option, one of the above-listed licenses.
 352 ****************************************************************** */
 353
 354 /* *************************************
 355 *  Dependencies
 356 ***************************************/
 357 /**** start inlining mem.h ****/
 358 /*
 359  * Copyright (c) Meta Platforms, Inc. and affiliates.
 360  * All rights reserved.
 361  *
 362  * This source code is licensed under both the BSD-style license (found in the
 363  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 364  * in the COPYING file in the root directory of this source tree).
 365  * You may select, at your option, one of the above-listed licenses.
 366  */
 367
 368 #ifndef MEM_H_MODULE
 369 #define MEM_H_MODULE
 370
 371 #if defined (__cplusplus)
 372 extern "C" {
 373 #endif
 374
 375 /*-****************************************
 376 *  Dependencies
 377 ******************************************/
 378 #include <stddef.h>  /* size_t, ptrdiff_t */
 379 /**** start inlining compiler.h ****/
 380 /*
 381  * Copyright (c) Meta Platforms, Inc. and affiliates.
 382  * All rights reserved.
 383  *
 384  * This source code is licensed under both the BSD-style license (found in the
 385  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 386  * in the COPYING file in the root directory of this source tree).
 387  * You may select, at your option, one of the above-listed licenses.
 388  */
 389
 390 #ifndef ZSTD_COMPILER_H
 391 #define ZSTD_COMPILER_H
 392
 393 #include <stddef.h>
 394
 395 /**** start inlining portability_macros.h ****/
 396 /*
 397  * Copyright (c) Meta Platforms, Inc. and affiliates.
 398  * All rights reserved.
 399  *
 400  * This source code is licensed under both the BSD-style license (found in the
 401  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 402  * in the COPYING file in the root directory of this source tree).
 403  * You may select, at your option, one of the above-listed licenses.
 404  */
 405
 406 #ifndef ZSTD_PORTABILITY_MACROS_H
 407 #define ZSTD_PORTABILITY_MACROS_H
 408
 409 /**
 410  * This header file contains macro definitions to support portability.
 411  * This header is shared between C and ASM code, so it MUST only
 412  * contain macro definitions. It MUST not contain any C code.
 413  *
 414  * This header ONLY defines macros to detect platforms/feature support.
 415  *
 416  */
 417
 418
 419 /* compat. with non-clang compilers */
 420 #ifndef __has_attribute
 421   #define __has_attribute(x) 0
 422 #endif
 423
 424 /* compat. with non-clang compilers */
 425 #ifndef __has_builtin
 426 #  define __has_builtin(x) 0
 427 #endif
 428
 429 /* compat. with non-clang compilers */
 430 #ifndef __has_feature
 431 #  define __has_feature(x) 0
 432 #endif
 433
 434 /* detects whether we are being compiled under msan */
 435 #ifndef ZSTD_MEMORY_SANITIZER
 436 #  if __has_feature(memory_sanitizer)
 437 #    define ZSTD_MEMORY_SANITIZER 1
 438 #  else
 439 #    define ZSTD_MEMORY_SANITIZER 0
 440 #  endif
 441 #endif
 442
 443 /* detects whether we are being compiled under asan */
 444 #ifndef ZSTD_ADDRESS_SANITIZER
 445 #  if __has_feature(address_sanitizer)
 446 #    define ZSTD_ADDRESS_SANITIZER 1
 447 #  elif defined(__SANITIZE_ADDRESS__)
 448 #    define ZSTD_ADDRESS_SANITIZER 1
 449 #  else
 450 #    define ZSTD_ADDRESS_SANITIZER 0
 451 #  endif
 452 #endif
 453
 454 /* detects whether we are being compiled under dfsan */
 455 #ifndef ZSTD_DATAFLOW_SANITIZER
 456 # if __has_feature(dataflow_sanitizer)
 457 #  define ZSTD_DATAFLOW_SANITIZER 1
 458 # else
 459 #  define ZSTD_DATAFLOW_SANITIZER 0
 460 # endif
 461 #endif
 462
 463 /* Mark the internal assembly functions as hidden  */
 464 #ifdef __ELF__
 465 # define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
 466 #elif defined(__APPLE__)
 467 # define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func
 468 #else
 469 # define ZSTD_HIDE_ASM_FUNCTION(func)
 470 #endif
 471
 472 /* Enable runtime BMI2 dispatch based on the CPU.
 473  * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
 474  */
 475 #ifndef DYNAMIC_BMI2
 476   #if ((defined(__clang__) && __has_attribute(__target__)) \
 477       || (defined(__GNUC__) \
 478           && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
 479       && (defined(__x86_64__) || defined(_M_X64)) \
 480       && !defined(__BMI2__)
 481   #  define DYNAMIC_BMI2 1
 482   #else
 483   #  define DYNAMIC_BMI2 0
 484   #endif
 485 #endif
 486
 487 /**
 488  * Only enable assembly for GNUC compatible compilers,
 489  * because other platforms may not support GAS assembly syntax.
 490  *
 491  * Only enable assembly for Linux / MacOS, other platforms may
 492  * work, but they haven't been tested. This could likely be
 493  * extended to BSD systems.
 494  *
 495  * Disable assembly when MSAN is enabled, because MSAN requires
 496  * 100% of code to be instrumented to work.
 497  */
 498 #if defined(__GNUC__)
 499 #  if defined(__linux__) || defined(__linux) || defined(__APPLE__)
 500 #    if ZSTD_MEMORY_SANITIZER
 501 #      define ZSTD_ASM_SUPPORTED 0
 502 #    elif ZSTD_DATAFLOW_SANITIZER
 503 #      define ZSTD_ASM_SUPPORTED 0
 504 #    else
 505 #      define ZSTD_ASM_SUPPORTED 1
 506 #    endif
 507 #  else
 508 #    define ZSTD_ASM_SUPPORTED 0
 509 #  endif
 510 #else
 511 #  define ZSTD_ASM_SUPPORTED 0
 512 #endif
 513
 514 /**
 515  * Determines whether we should enable assembly for x86-64
 516  * with BMI2.
 517  *
 518  * Enable if all of the following conditions hold:
 519  * - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM
 520  * - Assembly is supported
 521  * - We are compiling for x86-64 and either:
 522  *   - DYNAMIC_BMI2 is enabled
 523  *   - BMI2 is supported at compile time
 524  */
 525 #if !defined(ZSTD_DISABLE_ASM) &&                                 \
 526     ZSTD_ASM_SUPPORTED &&                                         \
 527     defined(__x86_64__) &&                                        \
 528     (DYNAMIC_BMI2 || defined(__BMI2__))
 529 # define ZSTD_ENABLE_ASM_X86_64_BMI2 1
 530 #else
 531 # define ZSTD_ENABLE_ASM_X86_64_BMI2 0
 532 #endif
 533
 534 /*
 535  * For x86 ELF targets, add .note.gnu.property section for Intel CET in
 536  * assembly sources when CET is enabled.
 537  *
 538  * Additionally, any function that may be called indirectly must begin
 539  * with ZSTD_CET_ENDBRANCH.
 540  */
 541 #if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \
 542     && defined(__has_include)
 543 # if __has_include(<cet.h>)
 544 #  include <cet.h>
 545 #  define ZSTD_CET_ENDBRANCH _CET_ENDBR
 546 # endif
 547 #endif
 548
 549 #ifndef ZSTD_CET_ENDBRANCH
 550 # define ZSTD_CET_ENDBRANCH
 551 #endif
 552
 553 #endif /* ZSTD_PORTABILITY_MACROS_H */
 554 /**** ended inlining portability_macros.h ****/
 555
 556 /*-*******************************************************
 557 *  Compiler specifics
 558 *********************************************************/
 559 /* force inlining */
 560
 561 #if !defined(ZSTD_NO_INLINE)
 562 #if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 563 #  define INLINE_KEYWORD inline
 564 #else
 565 #  define INLINE_KEYWORD
 566 #endif
 567
 568 #if defined(__GNUC__) || defined(__ICCARM__)
 569 #  define FORCE_INLINE_ATTR __attribute__((always_inline))
 570 #elif defined(_MSC_VER)
 571 #  define FORCE_INLINE_ATTR __forceinline
 572 #else
 573 #  define FORCE_INLINE_ATTR
 574 #endif
 575
 576 #else
 577
 578 #define INLINE_KEYWORD
 579 #define FORCE_INLINE_ATTR
 580
 581 #endif
 582
 583 /**
 584   On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
 585   This explicitly marks such functions as __cdecl so that the code will still compile
 586   if a CC other than __cdecl has been made the default.
 587 */
 588 #if  defined(_MSC_VER)
 589 #  define WIN_CDECL __cdecl
 590 #else
 591 #  define WIN_CDECL
 592 #endif
 593
 594 /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
 595 #if defined(__GNUC__)
 596 #  define UNUSED_ATTR __attribute__((unused))
 597 #else
 598 #  define UNUSED_ATTR
 599 #endif
 600
 601 /**
 602  * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
 603  * parameters. They must be inlined for the compiler to eliminate the constant
 604  * branches.
 605  */
 606 #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR
 607 /**
 608  * HINT_INLINE is used to help the compiler generate better code. It is *not*
 609  * used for "templates", so it can be tweaked based on the compilers
 610  * performance.
 611  *
 612  * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
 613  * always_inline attribute.
 614  *
 615  * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
 616  * attribute.
 617  */
 618 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
 619 #  define HINT_INLINE static INLINE_KEYWORD
 620 #else
 621 #  define HINT_INLINE FORCE_INLINE_TEMPLATE
 622 #endif
 623
 624 /* "soft" inline :
 625  * The compiler is free to select if it's a good idea to inline or not.
 626  * The main objective is to silence compiler warnings
 627  * when a defined function in included but not used.
 628  *
 629  * Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit.
 630  * Updating the prefix is probably preferable, but requires a fairly large codemod,
 631  * since this name is used everywhere.
 632  */
 633 #ifndef MEM_STATIC  /* already defined in Linux Kernel mem.h */
 634 #if defined(__GNUC__)
 635 #  define MEM_STATIC static __inline UNUSED_ATTR
 636 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 637 #  define MEM_STATIC static inline
 638 #elif defined(_MSC_VER)
 639 #  define MEM_STATIC static __inline
 640 #else
 641 #  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
 642 #endif
 643 #endif
 644
 645 /* force no inlining */
 646 #ifdef _MSC_VER
 647 #  define FORCE_NOINLINE static __declspec(noinline)
 648 #else
 649 #  if defined(__GNUC__) || defined(__ICCARM__)
 650 #    define FORCE_NOINLINE static __attribute__((__noinline__))
 651 #  else
 652 #    define FORCE_NOINLINE static
 653 #  endif
 654 #endif
 655
 656
 657 /* target attribute */
 658 #if defined(__GNUC__) || defined(__ICCARM__)
 659 #  define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
 660 #else
 661 #  define TARGET_ATTRIBUTE(target)
 662 #endif
 663
 664 /* Target attribute for BMI2 dynamic dispatch.
 665  * Enable lzcnt, bmi, and bmi2.
 666  * We test for bmi1 & bmi2. lzcnt is included in bmi1.
 667  */
 668 #define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
 669
 670 /* prefetch
 671  * can be disabled, by declaring NO_PREFETCH build macro */
 672 #if defined(NO_PREFETCH)
 673 #  define PREFETCH_L1(ptr)  do { (void)(ptr); } while (0)  /* disabled */
 674 #  define PREFETCH_L2(ptr)  do { (void)(ptr); } while (0)  /* disabled */
 675 #else
 676 #  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC)  /* _mm_prefetch() is not defined outside of x86/x64 */
 677 #    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
 678 #    define PREFETCH_L1(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
 679 #    define PREFETCH_L2(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
 680 #  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
 681 #    define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
 682 #    define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
 683 #  elif defined(__aarch64__)
 684 #    define PREFETCH_L1(ptr)  do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
 685 #    define PREFETCH_L2(ptr)  do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
 686 #  else
 687 #    define PREFETCH_L1(ptr) do { (void)(ptr); } while (0)  /* disabled */
 688 #    define PREFETCH_L2(ptr) do { (void)(ptr); } while (0)  /* disabled */
 689 #  endif
 690 #endif  /* NO_PREFETCH */
 691
 692 #define CACHELINE_SIZE 64
 693
 694 #define PREFETCH_AREA(p, s)                              \
 695     do {                                                 \
 696         const char* const _ptr = (const char*)(p);       \
 697         size_t const _size = (size_t)(s);                \
 698         size_t _pos;                                     \
 699         for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
 700             PREFETCH_L2(_ptr + _pos);                    \
 701         }                                                \
 702     } while (0)
 703
 704 /* vectorization
 705  * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
 706  * and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
 707 #if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
 708 #  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
 709 #    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
 710 #  else
 711 #    define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
 712 #  endif
 713 #else
 714 #  define DONT_VECTORIZE
 715 #endif
 716
 717 /* Tell the compiler that a branch is likely or unlikely.
 718  * Only use these macros if it causes the compiler to generate better code.
 719  * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
 720  * and clang, please do.
 721  */
 722
 723
 724 #if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
 725 #  define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0)
 726 #else
 727 #  define ZSTD_UNREACHABLE do { assert(0); } while (0)
 728 #endif
 729
 730 /* disable warnings */
 731 #ifdef _MSC_VER    /* Visual Studio */
 732 #  include <intrin.h>                    /* For Visual 2005 */
 733 #  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
 734 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
 735 #  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
 736 #  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
 737 #  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
 738 #endif
 739
 740 /*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
 741 #ifndef STATIC_BMI2
 742 #  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
 743 #    ifdef __AVX2__  //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
 744 #       define STATIC_BMI2 1
 745 #    endif
 746 #  elif defined(__BMI2__) && defined(__x86_64__) && defined(__GNUC__)
 747 #    define STATIC_BMI2 1
 748 #  endif
 749 #endif
 750
 751 #ifndef STATIC_BMI2
 752     #define STATIC_BMI2 0
 753 #endif
 754
 755 /* compile time determination of SIMD support */
 756 #if !defined(ZSTD_NO_INTRINSICS)
 757 #  if defined(__SSE2__) || defined(_M_AMD64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
 758 #    define ZSTD_ARCH_X86_SSE2
 759 #  endif
 760 #  if defined(__ARM_NEON) || defined(_M_ARM64)
 761 #    define ZSTD_ARCH_ARM_NEON
 762 #  endif
 763 #
 764 #  if defined(ZSTD_ARCH_X86_SSE2)
 765 #    include <emmintrin.h>
 766 #  elif defined(ZSTD_ARCH_ARM_NEON)
 767 #    include <arm_neon.h>
 768 #  endif
 769 #endif
 770
 771 /* C-language Attributes are added in C23. */
 772 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
 773 # define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
 774 #else
 775 # define ZSTD_HAS_C_ATTRIBUTE(x) 0
 776 #endif
 777
 778 /* Only use C++ attributes in C++. Some compilers report support for C++
 779  * attributes when compiling with C.
 780  */
 781 #if defined(__cplusplus) && defined(__has_cpp_attribute)
 782 # define ZSTD_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
 783 #else
 784 # define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
 785 #endif
 786
 787 /* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute.
 788  * - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough
 789  * - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough
 790  * - Else: __attribute__((__fallthrough__))
 791  */
 792 #ifndef ZSTD_FALLTHROUGH
 793 # if ZSTD_HAS_C_ATTRIBUTE(fallthrough)
 794 #  define ZSTD_FALLTHROUGH [[fallthrough]]
 795 # elif ZSTD_HAS_CPP_ATTRIBUTE(fallthrough)
 796 #  define ZSTD_FALLTHROUGH [[fallthrough]]
 797 # elif __has_attribute(__fallthrough__)
 798 /* Leading semicolon is to satisfy gcc-11 with -pedantic. Without the semicolon
 799  * gcc complains about: a label can only be part of a statement and a declaration is not a statement.
 800  */
 801 #  define ZSTD_FALLTHROUGH ; __attribute__((__fallthrough__))
 802 # else
 803 #  define ZSTD_FALLTHROUGH
 804 # endif
 805 #endif
 806
 807 /*-**************************************************************
 808 *  Alignment check
 809 *****************************************************************/
 810
 811 /* this test was initially positioned in mem.h,
 812  * but this file is removed (or replaced) for linux kernel
 813  * so it's now hosted in compiler.h,
 814  * which remains valid for both user & kernel spaces.
 815  */
 816
 817 #ifndef ZSTD_ALIGNOF
 818 # if defined(__GNUC__) || defined(_MSC_VER)
 819 /* covers gcc, clang & MSVC */
 820 /* note : this section must come first, before C11,
 821  * due to a limitation in the kernel source generator */
 822 #  define ZSTD_ALIGNOF(T) __alignof(T)
 823
 824 # elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
 825 /* C11 support */
 826 #  include <stdalign.h>
 827 #  define ZSTD_ALIGNOF(T) alignof(T)
 828
 829 # else
 830 /* No known support for alignof() - imperfect backup */
 831 #  define ZSTD_ALIGNOF(T) (sizeof(void*) < sizeof(T) ? sizeof(void*) : sizeof(T))
 832
 833 # endif
 834 #endif /* ZSTD_ALIGNOF */
 835
 836 /*-**************************************************************
 837 *  Sanitizer
 838 *****************************************************************/
 839
 840 /**
 841  * Zstd relies on pointer overflow in its decompressor.
 842  * We add this attribute to functions that rely on pointer overflow.
 843  */
 844 #ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 845 #  if __has_attribute(no_sanitize)
 846 #    if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8
 847        /* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */
 848 #      define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow")))
 849 #    else
 850        /* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */
 851 #      define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow")))
 852 #    endif
 853 #  else
 854 #    define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 855 #  endif
 856 #endif
 857
 858 /**
 859  * Helper function to perform a wrapped pointer difference without trigging
 860  * UBSAN.
 861  *
 862  * @returns lhs - rhs with wrapping
 863  */
 864 MEM_STATIC
 865 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 866 ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs)
 867 {
 868     return lhs - rhs;
 869 }
 870
 871 /**
 872  * Helper function to perform a wrapped pointer add without triggering UBSAN.
 873  *
 874  * @return ptr + add with wrapping
 875  */
 876 MEM_STATIC
 877 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 878 unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add)
 879 {
 880     return ptr + add;
 881 }
 882
 883 /**
 884  * Helper function to perform a wrapped pointer subtraction without triggering
 885  * UBSAN.
 886  *
 887  * @return ptr - sub with wrapping
 888  */
 889 MEM_STATIC
 890 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 891 unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub)
 892 {
 893     return ptr - sub;
 894 }
 895
 896 /**
 897  * Helper function to add to a pointer that works around C's undefined behavior
 898  * of adding 0 to NULL.
 899  *
 900  * @returns `ptr + add` except it defines `NULL + 0 == NULL`.
 901  */
 902 MEM_STATIC
 903 unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add)
 904 {
 905     return add > 0 ? ptr + add : ptr;
 906 }
 907
 908 /* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
 909  * abundance of caution, disable our custom poisoning on mingw. */
 910 #ifdef __MINGW32__
 911 #ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE
 912 #define ZSTD_ASAN_DONT_POISON_WORKSPACE 1
 913 #endif
 914 #ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE
 915 #define ZSTD_MSAN_DONT_POISON_WORKSPACE 1
 916 #endif
 917 #endif
 918
 919 #if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
 920 /* Not all platforms that support msan provide sanitizers/msan_interface.h.
 921  * We therefore declare the functions we need ourselves, rather than trying to
 922  * include the header file... */
 923 #include <stddef.h>  /* size_t */
 924 #define ZSTD_DEPS_NEED_STDINT
 925 /**** skipping file: zstd_deps.h ****/
 926
 927 /* Make memory region fully initialized (without changing its contents). */
 928 void __msan_unpoison(const volatile void *a, size_t size);
 929
 930 /* Make memory region fully uninitialized (without changing its contents).
 931    This is a legacy interface that does not update origin information. Use
 932    __msan_allocated_memory() instead. */
 933 void __msan_poison(const volatile void *a, size_t size);
 934
 935 /* Returns the offset of the first (at least partially) poisoned byte in the
 936    memory range, or -1 if the whole range is good. */
 937 intptr_t __msan_test_shadow(const volatile void *x, size_t size);
 938
 939 /* Print shadow and origin for the memory range to stderr in a human-readable
 940    format. */
 941 void __msan_print_shadow(const volatile void *x, size_t size);
 942 #endif
 943
 944 #if ZSTD_ADDRESS_SANITIZER && !defined(ZSTD_ASAN_DONT_POISON_WORKSPACE)
 945 /* Not all platforms that support asan provide sanitizers/asan_interface.h.
 946  * We therefore declare the functions we need ourselves, rather than trying to
 947  * include the header file... */
 948 #include <stddef.h>  /* size_t */
 949
 950 /**
 951  * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
 952  *
 953  * This memory must be previously allocated by your program. Instrumented
 954  * code is forbidden from accessing addresses in this region until it is
 955  * unpoisoned. This function is not guaranteed to poison the entire region -
 956  * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
 957  * alignment restrictions.
 958  *
 959  * \note This function is not thread-safe because no two threads can poison or
 960  * unpoison memory in the same memory region simultaneously.
 961  *
 962  * \param addr Start of memory region.
 963  * \param size Size of memory region. */
 964 void __asan_poison_memory_region(void const volatile *addr, size_t size);
 965
 966 /**
 967  * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
 968  *
 969  * This memory must be previously allocated by your program. Accessing
 970  * addresses in this region is allowed until this region is poisoned again.
 971  * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
 972  * to ASan alignment restrictions.
 973  *
 974  * \note This function is not thread-safe because no two threads can
 975  * poison or unpoison memory in the same memory region simultaneously.
 976  *
 977  * \param addr Start of memory region.
 978  * \param size Size of memory region. */
 979 void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
 980 #endif
 981
 982 #endif /* ZSTD_COMPILER_H */
 983 /**** ended inlining compiler.h ****/
 984 /**** skipping file: debug.h ****/
 985 /**** skipping file: zstd_deps.h ****/
 986
 987
 988 /*-****************************************
 989 *  Compiler specifics
 990 ******************************************/
 991 #if defined(_MSC_VER)   /* Visual Studio */
 992 #   include <stdlib.h>  /* _byteswap_ulong */
 993 #   include <intrin.h>  /* _byteswap_* */
 994 #endif
 995
 996 /*-**************************************************************
 997 *  Basic Types
 998 *****************************************************************/
 999 #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
1000 #  if defined(_AIX)
1001 #    include <inttypes.h>
1002 #  else
1003 #    include <stdint.h> /* intptr_t */
1004 #  endif
1005   typedef   uint8_t BYTE;
1006   typedef   uint8_t U8;
1007   typedef    int8_t S8;
1008   typedef  uint16_t U16;
1009   typedef   int16_t S16;
1010   typedef  uint32_t U32;
1011   typedef   int32_t S32;
1012   typedef  uint64_t U64;
1013   typedef   int64_t S64;
1014 #else
1015 # include <limits.h>
1016 #if CHAR_BIT != 8
1017 #  error "this implementation requires char to be exactly 8-bit type"
1018 #endif
1019   typedef unsigned char      BYTE;
1020   typedef unsigned char      U8;
1021   typedef   signed char      S8;
1022 #if USHRT_MAX != 65535
1023 #  error "this implementation requires short to be exactly 16-bit type"
1024 #endif
1025   typedef unsigned short      U16;
1026   typedef   signed short      S16;
1027 #if UINT_MAX != 4294967295
1028 #  error "this implementation requires int to be exactly 32-bit type"
1029 #endif
1030   typedef unsigned int        U32;
1031   typedef   signed int        S32;
1032 /* note : there are no limits defined for long long type in C90.
1033  * limits exist in C99, however, in such case, <stdint.h> is preferred */
1034   typedef unsigned long long  U64;
1035   typedef   signed long long  S64;
1036 #endif
1037
1038
1039 /*-**************************************************************
1040 *  Memory I/O API
1041 *****************************************************************/
1042 /*=== Static platform detection ===*/
1043 MEM_STATIC unsigned MEM_32bits(void);
1044 MEM_STATIC unsigned MEM_64bits(void);
1045 MEM_STATIC unsigned MEM_isLittleEndian(void);
1046
1047 /*=== Native unaligned read/write ===*/
1048 MEM_STATIC U16 MEM_read16(const void* memPtr);
1049 MEM_STATIC U32 MEM_read32(const void* memPtr);
1050 MEM_STATIC U64 MEM_read64(const void* memPtr);
1051 MEM_STATIC size_t MEM_readST(const void* memPtr);
1052
1053 MEM_STATIC void MEM_write16(void* memPtr, U16 value);
1054 MEM_STATIC void MEM_write32(void* memPtr, U32 value);
1055 MEM_STATIC void MEM_write64(void* memPtr, U64 value);
1056
1057 /*=== Little endian unaligned read/write ===*/
1058 MEM_STATIC U16 MEM_readLE16(const void* memPtr);
1059 MEM_STATIC U32 MEM_readLE24(const void* memPtr);
1060 MEM_STATIC U32 MEM_readLE32(const void* memPtr);
1061 MEM_STATIC U64 MEM_readLE64(const void* memPtr);
1062 MEM_STATIC size_t MEM_readLEST(const void* memPtr);
1063
1064 MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
1065 MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
1066 MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
1067 MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
1068 MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
1069
1070 /*=== Big endian unaligned read/write ===*/
1071 MEM_STATIC U32 MEM_readBE32(const void* memPtr);
1072 MEM_STATIC U64 MEM_readBE64(const void* memPtr);
1073 MEM_STATIC size_t MEM_readBEST(const void* memPtr);
1074
1075 MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
1076 MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
1077 MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
1078
1079 /*=== Byteswap ===*/
1080 MEM_STATIC U32 MEM_swap32(U32 in);
1081 MEM_STATIC U64 MEM_swap64(U64 in);
1082 MEM_STATIC size_t MEM_swapST(size_t in);
1083
1084
1085 /*-**************************************************************
1086 *  Memory I/O Implementation
1087 *****************************************************************/
1088 /* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory:
1089  * Method 0 : always use `memcpy()`. Safe and portable.
1090  * Method 1 : Use compiler extension to set unaligned access.
1091  * Method 2 : direct access. This method is portable but violate C standard.
1092  *            It can generate buggy code on targets depending on alignment.
1093  * Default  : method 1 if supported, else method 0
1094  */
1095 #ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
1096 #  ifdef __GNUC__
1097 #    define MEM_FORCE_MEMORY_ACCESS 1
1098 #  endif
1099 #endif
1100
1101 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
1102 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
1103
1104 MEM_STATIC unsigned MEM_isLittleEndian(void)
1105 {
1106 #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
1107     return 1;
1108 #elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
1109     return 0;
1110 #elif defined(__clang__) && __LITTLE_ENDIAN__
1111     return 1;
1112 #elif defined(__clang__) && __BIG_ENDIAN__
1113     return 0;
1114 #elif defined(_MSC_VER) && (_M_AMD64 || _M_IX86)
1115     return 1;
1116 #elif defined(__DMC__) && defined(_M_IX86)
1117     return 1;
1118 #else
1119     const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
1120     return one.c[0];
1121 #endif
1122 }
1123
1124 #if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
1125
1126 /* violates C standard, by lying on structure alignment.
1127 Only use if no other choice to achieve best performance on target platform */
1128 MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
1129 MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
1130 MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
1131 MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
1132
1133 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
1134 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
1135 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
1136
1137 #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
1138
1139 typedef __attribute__((aligned(1))) U16 unalign16;
1140 typedef __attribute__((aligned(1))) U32 unalign32;
1141 typedef __attribute__((aligned(1))) U64 unalign64;
1142 typedef __attribute__((aligned(1))) size_t unalignArch;
1143
1144 MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; }
1145 MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; }
1146 MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; }
1147 MEM_STATIC size_t MEM_readST(const void* ptr) { return *(const unalignArch*)ptr; }
1148
1149 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; }
1150 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; }
1151 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; }
1152
1153 #else
1154
1155 /* default method, safe and standard.
1156    can sometimes prove slower */
1157
1158 MEM_STATIC U16 MEM_read16(const void* memPtr)
1159 {
1160     U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
1161 }
1162
1163 MEM_STATIC U32 MEM_read32(const void* memPtr)
1164 {
1165     U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
1166 }
1167
1168 MEM_STATIC U64 MEM_read64(const void* memPtr)
1169 {
1170     U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
1171 }
1172
1173 MEM_STATIC size_t MEM_readST(const void* memPtr)
1174 {
1175     size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
1176 }
1177
1178 MEM_STATIC void MEM_write16(void* memPtr, U16 value)
1179 {
1180     ZSTD_memcpy(memPtr, &value, sizeof(value));
1181 }
1182
1183 MEM_STATIC void MEM_write32(void* memPtr, U32 value)
1184 {
1185     ZSTD_memcpy(memPtr, &value, sizeof(value));
1186 }
1187
1188 MEM_STATIC void MEM_write64(void* memPtr, U64 value)
1189 {
1190     ZSTD_memcpy(memPtr, &value, sizeof(value));
1191 }
1192
1193 #endif /* MEM_FORCE_MEMORY_ACCESS */
1194
1195 MEM_STATIC U32 MEM_swap32_fallback(U32 in)
1196 {
1197     return  ((in << 24) & 0xff000000 ) |
1198             ((in <<  8) & 0x00ff0000 ) |
1199             ((in >>  8) & 0x0000ff00 ) |
1200             ((in >> 24) & 0x000000ff );
1201 }
1202
1203 MEM_STATIC U32 MEM_swap32(U32 in)
1204 {
1205 #if defined(_MSC_VER)     /* Visual Studio */
1206     return _byteswap_ulong(in);
1207 #elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
1208   || (defined(__clang__) && __has_builtin(__builtin_bswap32))
1209     return __builtin_bswap32(in);
1210 #else
1211     return MEM_swap32_fallback(in);
1212 #endif
1213 }
1214
1215 MEM_STATIC U64 MEM_swap64_fallback(U64 in)
1216 {
1217      return  ((in << 56) & 0xff00000000000000ULL) |
1218             ((in << 40) & 0x00ff000000000000ULL) |
1219             ((in << 24) & 0x0000ff0000000000ULL) |
1220             ((in << 8)  & 0x000000ff00000000ULL) |
1221             ((in >> 8)  & 0x00000000ff000000ULL) |
1222             ((in >> 24) & 0x0000000000ff0000ULL) |
1223             ((in >> 40) & 0x000000000000ff00ULL) |
1224             ((in >> 56) & 0x00000000000000ffULL);
1225 }
1226
1227 MEM_STATIC U64 MEM_swap64(U64 in)
1228 {
1229 #if defined(_MSC_VER)     /* Visual Studio */
1230     return _byteswap_uint64(in);
1231 #elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
1232   || (defined(__clang__) && __has_builtin(__builtin_bswap64))
1233     return __builtin_bswap64(in);
1234 #else
1235     return MEM_swap64_fallback(in);
1236 #endif
1237 }
1238
1239 MEM_STATIC size_t MEM_swapST(size_t in)
1240 {
1241     if (MEM_32bits())
1242         return (size_t)MEM_swap32((U32)in);
1243     else
1244         return (size_t)MEM_swap64((U64)in);
1245 }
1246
1247 /*=== Little endian r/w ===*/
1248
1249 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
1250 {
1251     if (MEM_isLittleEndian())
1252         return MEM_read16(memPtr);
1253     else {
1254         const BYTE* p = (const BYTE*)memPtr;
1255         return (U16)(p[0] + (p[1]<<8));
1256     }
1257 }
1258
1259 MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
1260 {
1261     if (MEM_isLittleEndian()) {
1262         MEM_write16(memPtr, val);
1263     } else {
1264         BYTE* p = (BYTE*)memPtr;
1265         p[0] = (BYTE)val;
1266         p[1] = (BYTE)(val>>8);
1267     }
1268 }
1269
1270 MEM_STATIC U32 MEM_readLE24(const void* memPtr)
1271 {
1272     return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16);
1273 }
1274
1275 MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
1276 {
1277     MEM_writeLE16(memPtr, (U16)val);
1278     ((BYTE*)memPtr)[2] = (BYTE)(val>>16);
1279 }
1280
1281 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
1282 {
1283     if (MEM_isLittleEndian())
1284         return MEM_read32(memPtr);
1285     else
1286         return MEM_swap32(MEM_read32(memPtr));
1287 }
1288
1289 MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
1290 {
1291     if (MEM_isLittleEndian())
1292         MEM_write32(memPtr, val32);
1293     else
1294         MEM_write32(memPtr, MEM_swap32(val32));
1295 }
1296
1297 MEM_STATIC U64 MEM_readLE64(const void* memPtr)
1298 {
1299     if (MEM_isLittleEndian())
1300         return MEM_read64(memPtr);
1301     else
1302         return MEM_swap64(MEM_read64(memPtr));
1303 }
1304
1305 MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
1306 {
1307     if (MEM_isLittleEndian())
1308         MEM_write64(memPtr, val64);
1309     else
1310         MEM_write64(memPtr, MEM_swap64(val64));
1311 }
1312
1313 MEM_STATIC size_t MEM_readLEST(const void* memPtr)
1314 {
1315     if (MEM_32bits())
1316         return (size_t)MEM_readLE32(memPtr);
1317     else
1318         return (size_t)MEM_readLE64(memPtr);
1319 }
1320
1321 MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
1322 {
1323     if (MEM_32bits())
1324         MEM_writeLE32(memPtr, (U32)val);
1325     else
1326         MEM_writeLE64(memPtr, (U64)val);
1327 }
1328
1329 /*=== Big endian r/w ===*/
1330
1331 MEM_STATIC U32 MEM_readBE32(const void* memPtr)
1332 {
1333     if (MEM_isLittleEndian())
1334         return MEM_swap32(MEM_read32(memPtr));
1335     else
1336         return MEM_read32(memPtr);
1337 }
1338
1339 MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32)
1340 {
1341     if (MEM_isLittleEndian())
1342         MEM_write32(memPtr, MEM_swap32(val32));
1343     else
1344         MEM_write32(memPtr, val32);
1345 }
1346
1347 MEM_STATIC U64 MEM_readBE64(const void* memPtr)
1348 {
1349     if (MEM_isLittleEndian())
1350         return MEM_swap64(MEM_read64(memPtr));
1351     else
1352         return MEM_read64(memPtr);
1353 }
1354
1355 MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64)
1356 {
1357     if (MEM_isLittleEndian())
1358         MEM_write64(memPtr, MEM_swap64(val64));
1359     else
1360         MEM_write64(memPtr, val64);
1361 }
1362
1363 MEM_STATIC size_t MEM_readBEST(const void* memPtr)
1364 {
1365     if (MEM_32bits())
1366         return (size_t)MEM_readBE32(memPtr);
1367     else
1368         return (size_t)MEM_readBE64(memPtr);
1369 }
1370
1371 MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
1372 {
1373     if (MEM_32bits())
1374         MEM_writeBE32(memPtr, (U32)val);
1375     else
1376         MEM_writeBE64(memPtr, (U64)val);
1377 }
1378
1379 /* code only tested on 32 and 64 bits systems */
1380 MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
1381
1382
1383 #if defined (__cplusplus)
1384 }
1385 #endif
1386
1387 #endif /* MEM_H_MODULE */
1388 /**** ended inlining mem.h ****/
1389 /**** start inlining error_private.h ****/
1390 /*
1391  * Copyright (c) Meta Platforms, Inc. and affiliates.
1392  * All rights reserved.
1393  *
1394  * This source code is licensed under both the BSD-style license (found in the
1395  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
1396  * in the COPYING file in the root directory of this source tree).
1397  * You may select, at your option, one of the above-listed licenses.
1398  */
1399
1400 /* Note : this module is expected to remain private, do not expose it */
1401
1402 #ifndef ERROR_H_MODULE
1403 #define ERROR_H_MODULE
1404
1405 #if defined (__cplusplus)
1406 extern "C" {
1407 #endif
1408
1409
1410 /* ****************************************
1411 *  Dependencies
1412 ******************************************/
1413 /**** start inlining ../zstd_errors.h ****/
1414 /*
1415  * Copyright (c) Meta Platforms, Inc. and affiliates.
1416  * All rights reserved.
1417  *
1418  * This source code is licensed under both the BSD-style license (found in the
1419  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
1420  * in the COPYING file in the root directory of this source tree).
1421  * You may select, at your option, one of the above-listed licenses.
1422  */
1423
1424 #ifndef ZSTD_ERRORS_H_398273423
1425 #define ZSTD_ERRORS_H_398273423
1426
1427 #if defined (__cplusplus)
1428 extern "C" {
1429 #endif
1430
1431 /*===== dependency =====*/
1432 #include <stddef.h>   /* size_t */
1433
1434
1435 /* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
1436 #ifndef ZSTDERRORLIB_VISIBLE
1437    /* Backwards compatibility with old macro name */
1438 #  ifdef ZSTDERRORLIB_VISIBILITY
1439 #    define ZSTDERRORLIB_VISIBLE ZSTDERRORLIB_VISIBILITY
1440 #  elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
1441 #    define ZSTDERRORLIB_VISIBLE __attribute__ ((visibility ("default")))
1442 #  else
1443 #    define ZSTDERRORLIB_VISIBLE
1444 #  endif
1445 #endif
1446
1447 #ifndef ZSTDERRORLIB_HIDDEN
1448 #  if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
1449 #    define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden")))
1450 #  else
1451 #    define ZSTDERRORLIB_HIDDEN
1452 #  endif
1453 #endif
1454
1455 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
1456 #  define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBLE
1457 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
1458 #  define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
1459 #else
1460 #  define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE
1461 #endif
1462
1463 /*-*********************************************
1464  *  Error codes list
1465  *-*********************************************
1466  *  Error codes _values_ are pinned down since v1.3.1 only.
1467  *  Therefore, don't rely on values if you may link to any version < v1.3.1.
1468  *
1469  *  Only values < 100 are considered stable.
1470  *
1471  *  note 1 : this API shall be used with static linking only.
1472  *           dynamic linking is not yet officially supported.
1473  *  note 2 : Prefer relying on the enum than on its value whenever possible
1474  *           This is the only supported way to use the error list < v1.3.1
1475  *  note 3 : ZSTD_isError() is always correct, whatever the library version.
1476  **********************************************/
1477 typedef enum {
1478   ZSTD_error_no_error = 0,
1479   ZSTD_error_GENERIC  = 1,
1480   ZSTD_error_prefix_unknown                = 10,
1481   ZSTD_error_version_unsupported           = 12,
1482   ZSTD_error_frameParameter_unsupported    = 14,
1483   ZSTD_error_frameParameter_windowTooLarge = 16,
1484   ZSTD_error_corruption_detected = 20,
1485   ZSTD_error_checksum_wrong      = 22,
1486   ZSTD_error_literals_headerWrong = 24,
1487   ZSTD_error_dictionary_corrupted      = 30,
1488   ZSTD_error_dictionary_wrong          = 32,
1489   ZSTD_error_dictionaryCreation_failed = 34,
1490   ZSTD_error_parameter_unsupported   = 40,
1491   ZSTD_error_parameter_combination_unsupported = 41,
1492   ZSTD_error_parameter_outOfBound    = 42,
1493   ZSTD_error_tableLog_tooLarge       = 44,
1494   ZSTD_error_maxSymbolValue_tooLarge = 46,
1495   ZSTD_error_maxSymbolValue_tooSmall = 48,
1496   ZSTD_error_stabilityCondition_notRespected = 50,
1497   ZSTD_error_stage_wrong       = 60,
1498   ZSTD_error_init_missing      = 62,
1499   ZSTD_error_memory_allocation = 64,
1500   ZSTD_error_workSpace_tooSmall= 66,
1501   ZSTD_error_dstSize_tooSmall = 70,
1502   ZSTD_error_srcSize_wrong    = 72,
1503   ZSTD_error_dstBuffer_null   = 74,
1504   ZSTD_error_noForwardProgress_destFull = 80,
1505   ZSTD_error_noForwardProgress_inputEmpty = 82,
1506   /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
1507   ZSTD_error_frameIndex_tooLarge = 100,
1508   ZSTD_error_seekableIO          = 102,
1509   ZSTD_error_dstBuffer_wrong     = 104,
1510   ZSTD_error_srcBuffer_wrong     = 105,
1511   ZSTD_error_sequenceProducer_failed = 106,
1512   ZSTD_error_externalSequences_invalid = 107,
1513   ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
1514 } ZSTD_ErrorCode;
1515
1516 /*! ZSTD_getErrorCode() :
1517     convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
1518     which can be used to compare with enum list published above */
1519 ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
1520 ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
1521
1522
1523 #if defined (__cplusplus)
1524 }
1525 #endif
1526
1527 #endif /* ZSTD_ERRORS_H_398273423 */
1528 /**** ended inlining ../zstd_errors.h ****/
1529 /**** skipping file: compiler.h ****/
1530 /**** skipping file: debug.h ****/
1531 /**** skipping file: zstd_deps.h ****/
1532
1533
1534 /* ****************************************
1535 *  Compiler-specific
1536 ******************************************/
1537 #if defined(__GNUC__)
1538 #  define ERR_STATIC static __attribute__((unused))
1539 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
1540 #  define ERR_STATIC static inline
1541 #elif defined(_MSC_VER)
1542 #  define ERR_STATIC static __inline
1543 #else
1544 #  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
1545 #endif
1546
1547
1548 /*-****************************************
1549 *  Customization (error_public.h)
1550 ******************************************/
1551 typedef ZSTD_ErrorCode ERR_enum;
1552 #define PREFIX(name) ZSTD_error_##name
1553
1554
1555 /*-****************************************
1556 *  Error codes handling
1557 ******************************************/
1558 #undef ERROR   /* already defined on Visual Studio */
1559 #define ERROR(name) ZSTD_ERROR(name)
1560 #define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
1561
1562 ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
1563
1564 ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
1565
1566 /* check and forward error code */
1567 #define CHECK_V_F(e, f)     \
1568     size_t const e = f;     \
1569     do {                    \
1570         if (ERR_isError(e)) \
1571             return e;       \
1572     } while (0)
1573 #define CHECK_F(f)   do { CHECK_V_F(_var_err__, f); } while (0)
1574
1575
1576 /*-****************************************
1577 *  Error Strings
1578 ******************************************/
1579
1580 const char* ERR_getErrorString(ERR_enum code);   /* error_private.c */
1581
1582 ERR_STATIC const char* ERR_getErrorName(size_t code)
1583 {
1584     return ERR_getErrorString(ERR_getErrorCode(code));
1585 }
1586
1587 /**
1588  * Ignore: this is an internal helper.
1589  *
1590  * This is a helper function to help force C99-correctness during compilation.
1591  * Under strict compilation modes, variadic macro arguments can't be empty.
1592  * However, variadic function arguments can be. Using a function therefore lets
1593  * us statically check that at least one (string) argument was passed,
1594  * independent of the compilation flags.
1595  */
1596 static INLINE_KEYWORD UNUSED_ATTR
1597 void _force_has_format_string(const char *format, ...) {
1598   (void)format;
1599 }
1600
1601 /**
1602  * Ignore: this is an internal helper.
1603  *
1604  * We want to force this function invocation to be syntactically correct, but
1605  * we don't want to force runtime evaluation of its arguments.
1606  */
1607 #define _FORCE_HAS_FORMAT_STRING(...)              \
1608     do {                                           \
1609         if (0) {                                   \
1610             _force_has_format_string(__VA_ARGS__); \
1611         }                                          \
1612     } while (0)
1613
1614 #define ERR_QUOTE(str) #str
1615
1616 /**
1617  * Return the specified error if the condition evaluates to true.
1618  *
1619  * In debug modes, prints additional information.
1620  * In order to do that (particularly, printing the conditional that failed),
1621  * this can't just wrap RETURN_ERROR().
1622  */
1623 #define RETURN_ERROR_IF(cond, err, ...)                                        \
1624     do {                                                                       \
1625         if (cond) {                                                            \
1626             RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s",          \
1627                   __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
1628             _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                             \
1629             RAWLOG(3, ": " __VA_ARGS__);                                       \
1630             RAWLOG(3, "\n");                                                   \
1631             return ERROR(err);                                                 \
1632         }                                                                      \
1633     } while (0)
1634
1635 /**
1636  * Unconditionally return the specified error.
1637  *
1638  * In debug modes, prints additional information.
1639  */
1640 #define RETURN_ERROR(err, ...)                                               \
1641     do {                                                                     \
1642         RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
1643               __FILE__, __LINE__, ERR_QUOTE(ERROR(err)));                    \
1644         _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                               \
1645         RAWLOG(3, ": " __VA_ARGS__);                                         \
1646         RAWLOG(3, "\n");                                                     \
1647         return ERROR(err);                                                   \
1648     } while(0)
1649
1650 /**
1651  * If the provided expression evaluates to an error code, returns that error code.
1652  *
1653  * In debug modes, prints additional information.
1654  */
1655 #define FORWARD_IF_ERROR(err, ...)                                                 \
1656     do {                                                                           \
1657         size_t const err_code = (err);                                             \
1658         if (ERR_isError(err_code)) {                                               \
1659             RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s",                 \
1660                   __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
1661             _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                                 \
1662             RAWLOG(3, ": " __VA_ARGS__);                                           \
1663             RAWLOG(3, "\n");                                                       \
1664             return err_code;                                                       \
1665         }                                                                          \
1666     } while(0)
1667
1668 #if defined (__cplusplus)
1669 }
1670 #endif
1671
1672 #endif /* ERROR_H_MODULE */
1673 /**** ended inlining error_private.h ****/
1674 #define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
1675 /**** start inlining fse.h ****/
1676 /* ******************************************************************
1677  * FSE : Finite State Entropy codec
1678  * Public Prototypes declaration
1679  * Copyright (c) Meta Platforms, Inc. and affiliates.
1680  *
1681  * You can contact the author at :
1682  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
1683  *
1684  * This source code is licensed under both the BSD-style license (found in the
1685  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
1686  * in the COPYING file in the root directory of this source tree).
1687  * You may select, at your option, one of the above-listed licenses.
1688 ****************************************************************** */
1689
1690 #if defined (__cplusplus)
1691 extern "C" {
1692 #endif
1693
1694 #ifndef FSE_H
1695 #define FSE_H
1696
1697
1698 /*-*****************************************
1699 *  Dependencies
1700 ******************************************/
1701 /**** skipping file: zstd_deps.h ****/
1702
1703
1704 /*-*****************************************
1705 *  FSE_PUBLIC_API : control library symbols visibility
1706 ******************************************/
1707 #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
1708 #  define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
1709 #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
1710 #  define FSE_PUBLIC_API __declspec(dllexport)
1711 #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
1712 #  define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
1713 #else
1714 #  define FSE_PUBLIC_API
1715 #endif
1716
1717 /*------   Version   ------*/
1718 #define FSE_VERSION_MAJOR    0
1719 #define FSE_VERSION_MINOR    9
1720 #define FSE_VERSION_RELEASE  0
1721
1722 #define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
1723 #define FSE_QUOTE(str) #str
1724 #define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
1725 #define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
1726
1727 #define FSE_VERSION_NUMBER  (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
1728 FSE_PUBLIC_API unsigned FSE_versionNumber(void);   /**< library version number; to be used when checking dll version */
1729
1730
1731 /*-*****************************************
1732 *  Tool functions
1733 ******************************************/
1734 FSE_PUBLIC_API size_t FSE_compressBound(size_t size);       /* maximum compressed size */
1735
1736 /* Error Management */
1737 FSE_PUBLIC_API unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
1738 FSE_PUBLIC_API const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
1739
1740
1741 /*-*****************************************
1742 *  FSE detailed API
1743 ******************************************/
1744 /*!
1745 FSE_compress() does the following:
1746 1. count symbol occurrence from source[] into table count[] (see hist.h)
1747 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
1748 3. save normalized counters to memory buffer using writeNCount()
1749 4. build encoding table 'CTable' from normalized counters
1750 5. encode the data stream using encoding table 'CTable'
1751
1752 FSE_decompress() does the following:
1753 1. read normalized counters with readNCount()
1754 2. build decoding table 'DTable' from normalized counters
1755 3. decode the data stream using decoding table 'DTable'
1756
1757 The following API allows targeting specific sub-functions for advanced tasks.
1758 For example, it's possible to compress several blocks using the same 'CTable',
1759 or to save and provide normalized distribution using external method.
1760 */
1761
1762 /* *** COMPRESSION *** */
1763
1764 /*! FSE_optimalTableLog():
1765     dynamically downsize 'tableLog' when conditions are met.
1766     It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
1767     @return : recommended tableLog (necessarily <= 'maxTableLog') */
1768 FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
1769
1770 /*! FSE_normalizeCount():
1771     normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
1772     'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
1773     useLowProbCount is a boolean parameter which trades off compressed size for
1774     faster header decoding. When it is set to 1, the compressed data will be slightly
1775     smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
1776     faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
1777     is a good default, since header deserialization makes a big speed difference.
1778     Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
1779     @return : tableLog,
1780               or an errorCode, which can be tested using FSE_isError() */
1781 FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
1782                     const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
1783
1784 /*! FSE_NCountWriteBound():
1785     Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
1786     Typically useful for allocation purpose. */
1787 FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
1788
1789 /*! FSE_writeNCount():
1790     Compactly save 'normalizedCounter' into 'buffer'.
1791     @return : size of the compressed table,
1792               or an errorCode, which can be tested using FSE_isError(). */
1793 FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
1794                                  const short* normalizedCounter,
1795                                  unsigned maxSymbolValue, unsigned tableLog);
1796
1797 /*! Constructor and Destructor of FSE_CTable.
1798     Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
1799 typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
1800
1801 /*! FSE_buildCTable():
1802     Builds `ct`, which must be already allocated, using FSE_createCTable().
1803     @return : 0, or an errorCode, which can be tested using FSE_isError() */
1804 FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
1805
1806 /*! FSE_compress_usingCTable():
1807     Compress `src` using `ct` into `dst` which must be already allocated.
1808     @return : size of compressed data (<= `dstCapacity`),
1809               or 0 if compressed data could not fit into `dst`,
1810               or an errorCode, which can be tested using FSE_isError() */
1811 FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
1812
1813 /*!
1814 Tutorial :
1815 ----------
1816 The first step is to count all symbols. FSE_count() does this job very fast.
1817 Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
1818 'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
1819 maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
1820 FSE_count() will return the number of occurrence of the most frequent symbol.
1821 This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
1822 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
1823
1824 The next step is to normalize the frequencies.
1825 FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
1826 It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
1827 You can use 'tableLog'==0 to mean "use default tableLog value".
1828 If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
1829 which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
1830
1831 The result of FSE_normalizeCount() will be saved into a table,
1832 called 'normalizedCounter', which is a table of signed short.
1833 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
1834 The return value is tableLog if everything proceeded as expected.
1835 It is 0 if there is a single symbol within distribution.
1836 If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
1837
1838 'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
1839 'buffer' must be already allocated.
1840 For guaranteed success, buffer size must be at least FSE_headerBound().
1841 The result of the function is the number of bytes written into 'buffer'.
1842 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
1843
1844 'normalizedCounter' can then be used to create the compression table 'CTable'.
1845 The space required by 'CTable' must be already allocated, using FSE_createCTable().
1846 You can then use FSE_buildCTable() to fill 'CTable'.
1847 If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
1848
1849 'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
1850 Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
1851 The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
1852 If it returns '0', compressed data could not fit into 'dst'.
1853 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
1854 */
1855
1856
1857 /* *** DECOMPRESSION *** */
1858
1859 /*! FSE_readNCount():
1860     Read compactly saved 'normalizedCounter' from 'rBuffer'.
1861     @return : size read from 'rBuffer',
1862               or an errorCode, which can be tested using FSE_isError().
1863               maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
1864 FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
1865                            unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
1866                            const void* rBuffer, size_t rBuffSize);
1867
1868 /*! FSE_readNCount_bmi2():
1869  * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
1870  */
1871 FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
1872                            unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
1873                            const void* rBuffer, size_t rBuffSize, int bmi2);
1874
1875 typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
1876
1877 /*!
1878 Tutorial :
1879 ----------
1880 (Note : these functions only decompress FSE-compressed blocks.
1881  If block is uncompressed, use memcpy() instead
1882  If block is a single repeated byte, use memset() instead )
1883
1884 The first step is to obtain the normalized frequencies of symbols.
1885 This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
1886 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
1887 In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
1888 or size the table to handle worst case situations (typically 256).
1889 FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
1890 The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
1891 Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
1892 If there is an error, the function will return an error code, which can be tested using FSE_isError().
1893
1894 The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
1895 This is performed by the function FSE_buildDTable().
1896 The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
1897 If there is an error, the function will return an error code, which can be tested using FSE_isError().
1898
1899 `FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
1900 `cSrcSize` must be strictly correct, otherwise decompression will fail.
1901 FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
1902 If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
1903 */
1904
1905 #endif  /* FSE_H */
1906
1907
1908 #if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
1909 #define FSE_H_FSE_STATIC_LINKING_ONLY
1910
1911 /* *** Dependency *** */
1912 /**** start inlining bitstream.h ****/
1913 /* ******************************************************************
1914  * bitstream
1915  * Part of FSE library
1916  * Copyright (c) Meta Platforms, Inc. and affiliates.
1917  *
1918  * You can contact the author at :
1919  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
1920  *
1921  * This source code is licensed under both the BSD-style license (found in the
1922  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
1923  * in the COPYING file in the root directory of this source tree).
1924  * You may select, at your option, one of the above-listed licenses.
1925 ****************************************************************** */
1926 #ifndef BITSTREAM_H_MODULE
1927 #define BITSTREAM_H_MODULE
1928
1929 #if defined (__cplusplus)
1930 extern "C" {
1931 #endif
1932 /*
1933 *  This API consists of small unitary functions, which must be inlined for best performance.
1934 *  Since link-time-optimization is not available for all compilers,
1935 *  these functions are defined into a .h to be included.
1936 */
1937
1938 /*-****************************************
1939 *  Dependencies
1940 ******************************************/
1941 /**** skipping file: mem.h ****/
1942 /**** skipping file: compiler.h ****/
1943 /**** skipping file: debug.h ****/
1944 /**** skipping file: error_private.h ****/
1945 /**** start inlining bits.h ****/
1946 /*
1947  * Copyright (c) Meta Platforms, Inc. and affiliates.
1948  * All rights reserved.
1949  *
1950  * This source code is licensed under both the BSD-style license (found in the
1951  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
1952  * in the COPYING file in the root directory of this source tree).
1953  * You may select, at your option, one of the above-listed licenses.
1954  */
1955
1956 #ifndef ZSTD_BITS_H
1957 #define ZSTD_BITS_H
1958
1959 /**** skipping file: mem.h ****/
1960
1961 MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
1962 {
1963     assert(val != 0);
1964     {
1965         static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
1966                                                 30, 22, 20, 15, 25, 17, 4, 8,
1967                                                 31, 27, 13, 23, 21, 19, 16, 7,
1968                                                 26, 12, 18, 6, 11, 5, 10, 9};
1969         return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
1970     }
1971 }
1972
1973 MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
1974 {
1975     assert(val != 0);
1976 #   if defined(_MSC_VER)
1977 #       if STATIC_BMI2 == 1
1978             return (unsigned)_tzcnt_u32(val);
1979 #       else
1980             if (val != 0) {
1981                 unsigned long r;
1982                 _BitScanForward(&r, val);
1983                 return (unsigned)r;
1984             } else {
1985                 /* Should not reach this code path */
1986                 __assume(0);
1987             }
1988 #       endif
1989 #   elif defined(__GNUC__) && (__GNUC__ >= 4)
1990         return (unsigned)__builtin_ctz(val);
1991 #   else
1992         return ZSTD_countTrailingZeros32_fallback(val);
1993 #   endif
1994 }
1995
1996 MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
1997     assert(val != 0);
1998     {
1999         static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
2000                                             11, 14, 16, 18, 22, 25, 3, 30,
2001                                             8, 12, 20, 28, 15, 17, 24, 7,
2002                                             19, 27, 23, 6, 26, 5, 4, 31};
2003         val |= val >> 1;
2004         val |= val >> 2;
2005         val |= val >> 4;
2006         val |= val >> 8;
2007         val |= val >> 16;
2008         return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
2009     }
2010 }
2011
2012 MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
2013 {
2014     assert(val != 0);
2015 #   if defined(_MSC_VER)
2016 #       if STATIC_BMI2 == 1
2017             return (unsigned)_lzcnt_u32(val);
2018 #       else
2019             if (val != 0) {
2020                 unsigned long r;
2021                 _BitScanReverse(&r, val);
2022                 return (unsigned)(31 - r);
2023             } else {
2024                 /* Should not reach this code path */
2025                 __assume(0);
2026             }
2027 #       endif
2028 #   elif defined(__GNUC__) && (__GNUC__ >= 4)
2029         return (unsigned)__builtin_clz(val);
2030 #   else
2031         return ZSTD_countLeadingZeros32_fallback(val);
2032 #   endif
2033 }
2034
2035 MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
2036 {
2037     assert(val != 0);
2038 #   if defined(_MSC_VER) && defined(_WIN64)
2039 #       if STATIC_BMI2 == 1
2040             return (unsigned)_tzcnt_u64(val);
2041 #       else
2042             if (val != 0) {
2043                 unsigned long r;
2044                 _BitScanForward64(&r, val);
2045                 return (unsigned)r;
2046             } else {
2047                 /* Should not reach this code path */
2048                 __assume(0);
2049             }
2050 #       endif
2051 #   elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
2052         return (unsigned)__builtin_ctzll(val);
2053 #   else
2054         {
2055             U32 mostSignificantWord = (U32)(val >> 32);
2056             U32 leastSignificantWord = (U32)val;
2057             if (leastSignificantWord == 0) {
2058                 return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
2059             } else {
2060                 return ZSTD_countTrailingZeros32(leastSignificantWord);
2061             }
2062         }
2063 #   endif
2064 }
2065
2066 MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
2067 {
2068     assert(val != 0);
2069 #   if defined(_MSC_VER) && defined(_WIN64)
2070 #       if STATIC_BMI2 == 1
2071             return (unsigned)_lzcnt_u64(val);
2072 #       else
2073             if (val != 0) {
2074                 unsigned long r;
2075                 _BitScanReverse64(&r, val);
2076                 return (unsigned)(63 - r);
2077             } else {
2078                 /* Should not reach this code path */
2079                 __assume(0);
2080             }
2081 #       endif
2082 #   elif defined(__GNUC__) && (__GNUC__ >= 4)
2083         return (unsigned)(__builtin_clzll(val));
2084 #   else
2085         {
2086             U32 mostSignificantWord = (U32)(val >> 32);
2087             U32 leastSignificantWord = (U32)val;
2088             if (mostSignificantWord == 0) {
2089                 return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
2090             } else {
2091                 return ZSTD_countLeadingZeros32(mostSignificantWord);
2092             }
2093         }
2094 #   endif
2095 }
2096
2097 MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
2098 {
2099     if (MEM_isLittleEndian()) {
2100         if (MEM_64bits()) {
2101             return ZSTD_countTrailingZeros64((U64)val) >> 3;
2102         } else {
2103             return ZSTD_countTrailingZeros32((U32)val) >> 3;
2104         }
2105     } else {  /* Big Endian CPU */
2106         if (MEM_64bits()) {
2107             return ZSTD_countLeadingZeros64((U64)val) >> 3;
2108         } else {
2109             return ZSTD_countLeadingZeros32((U32)val) >> 3;
2110         }
2111     }
2112 }
2113
2114 MEM_STATIC unsigned ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
2115 {
2116     assert(val != 0);
2117     return 31 - ZSTD_countLeadingZeros32(val);
2118 }
2119
2120 /* ZSTD_rotateRight_*():
2121  * Rotates a bitfield to the right by "count" bits.
2122  * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
2123  */
2124 MEM_STATIC
2125 U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
2126     assert(count < 64);
2127     count &= 0x3F; /* for fickle pattern recognition */
2128     return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
2129 }
2130
2131 MEM_STATIC
2132 U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
2133     assert(count < 32);
2134     count &= 0x1F; /* for fickle pattern recognition */
2135     return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
2136 }
2137
2138 MEM_STATIC
2139 U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
2140     assert(count < 16);
2141     count &= 0x0F; /* for fickle pattern recognition */
2142     return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
2143 }
2144
2145 #endif /* ZSTD_BITS_H */
2146 /**** ended inlining bits.h ****/
2147
2148
2149 /*=========================================
2150 *  Target specific
2151 =========================================*/
2152 #ifndef ZSTD_NO_INTRINSICS
2153 #  if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
2154 #    include <immintrin.h>   /* support for bextr (experimental)/bzhi */
2155 #  elif defined(__ICCARM__)
2156 #    include <intrinsics.h>
2157 #  endif
2158 #endif
2159
2160 #define STREAM_ACCUMULATOR_MIN_32  25
2161 #define STREAM_ACCUMULATOR_MIN_64  57
2162 #define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
2163
2164
2165 /*-******************************************
2166 *  bitStream encoding API (write forward)
2167 ********************************************/
2168 /* bitStream can mix input from multiple sources.
2169  * A critical property of these streams is that they encode and decode in **reverse** direction.
2170  * So the first bit sequence you add will be the last to be read, like a LIFO stack.
2171  */
2172 typedef struct {
2173     size_t bitContainer;
2174     unsigned bitPos;
2175     char*  startPtr;
2176     char*  ptr;
2177     char*  endPtr;
2178 } BIT_CStream_t;
2179
2180 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
2181 MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
2182 MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
2183 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
2184
2185 /* Start with initCStream, providing the size of buffer to write into.
2186 *  bitStream will never write outside of this buffer.
2187 *  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
2188 *
2189 *  bits are first added to a local register.
2190 *  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
2191 *  Writing data into memory is an explicit operation, performed by the flushBits function.
2192 *  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
2193 *  After a flushBits, a maximum of 7 bits might still be stored into local register.
2194 *
2195 *  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
2196 *
2197 *  Last operation is to close the bitStream.
2198 *  The function returns the final size of CStream in bytes.
2199 *  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
2200 */
2201
2202
2203 /*-********************************************
2204 *  bitStream decoding API (read backward)
2205 **********************************************/
2206 typedef size_t BitContainerType;
2207 typedef struct {
2208     BitContainerType bitContainer;
2209     unsigned bitsConsumed;
2210     const char* ptr;
2211     const char* start;
2212     const char* limitPtr;
2213 } BIT_DStream_t;
2214
2215 typedef enum { BIT_DStream_unfinished = 0,  /* fully refilled */
2216                BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
2217                BIT_DStream_completed = 2,   /* bitstream entirely consumed, bit-exact */
2218                BIT_DStream_overflow = 3     /* user requested more bits than present in bitstream */
2219     } BIT_DStream_status;  /* result of BIT_reloadDStream() */
2220
2221 MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
2222 MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
2223 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
2224 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
2225
2226
2227 /* Start by invoking BIT_initDStream().
2228 *  A chunk of the bitStream is then stored into a local register.
2229 *  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
2230 *  You can then retrieve bitFields stored into the local register, **in reverse order**.
2231 *  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
2232 *  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
2233 *  Otherwise, it can be less than that, so proceed accordingly.
2234 *  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
2235 */
2236
2237
2238 /*-****************************************
2239 *  unsafe API
2240 ******************************************/
2241 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
2242 /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
2243
2244 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
2245 /* unsafe version; does not check buffer overflow */
2246
2247 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
2248 /* faster, but works only if nbBits >= 1 */
2249
2250 /*=====    Local Constants   =====*/
2251 static const unsigned BIT_mask[] = {
2252     0,          1,         3,         7,         0xF,       0x1F,
2253     0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
2254     0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
2255     0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
2256     0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
2257     0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
2258 #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
2259
2260 /*-**************************************************************
2261 *  bitStream encoding
2262 ****************************************************************/
2263 /*! BIT_initCStream() :
2264  *  `dstCapacity` must be > sizeof(size_t)
2265  *  @return : 0 if success,
2266  *            otherwise an error code (can be tested using ERR_isError()) */
2267 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
2268                                   void* startPtr, size_t dstCapacity)
2269 {
2270     bitC->bitContainer = 0;
2271     bitC->bitPos = 0;
2272     bitC->startPtr = (char*)startPtr;
2273     bitC->ptr = bitC->startPtr;
2274     bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
2275     if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
2276     return 0;
2277 }
2278
2279 FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
2280 {
2281 #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
2282     return  _bzhi_u64(bitContainer, nbBits);
2283 #else
2284     assert(nbBits < BIT_MASK_SIZE);
2285     return bitContainer & BIT_mask[nbBits];
2286 #endif
2287 }
2288
2289 /*! BIT_addBits() :
2290  *  can add up to 31 bits into `bitC`.
2291  *  Note : does not check for register overflow ! */
2292 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
2293                             size_t value, unsigned nbBits)
2294 {
2295     DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
2296     assert(nbBits < BIT_MASK_SIZE);
2297     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
2298     bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
2299     bitC->bitPos += nbBits;
2300 }
2301
2302 /*! BIT_addBitsFast() :
2303  *  works only if `value` is _clean_,
2304  *  meaning all high bits above nbBits are 0 */
2305 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
2306                                 size_t value, unsigned nbBits)
2307 {
2308     assert((value>>nbBits) == 0);
2309     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
2310     bitC->bitContainer |= value << bitC->bitPos;
2311     bitC->bitPos += nbBits;
2312 }
2313
2314 /*! BIT_flushBitsFast() :
2315  *  assumption : bitContainer has not overflowed
2316  *  unsafe version; does not check buffer overflow */
2317 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
2318 {
2319     size_t const nbBytes = bitC->bitPos >> 3;
2320     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
2321     assert(bitC->ptr <= bitC->endPtr);
2322     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
2323     bitC->ptr += nbBytes;
2324     bitC->bitPos &= 7;
2325     bitC->bitContainer >>= nbBytes*8;
2326 }
2327
2328 /*! BIT_flushBits() :
2329  *  assumption : bitContainer has not overflowed
2330  *  safe version; check for buffer overflow, and prevents it.
2331  *  note : does not signal buffer overflow.
2332  *  overflow will be revealed later on using BIT_closeCStream() */
2333 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
2334 {
2335     size_t const nbBytes = bitC->bitPos >> 3;
2336     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
2337     assert(bitC->ptr <= bitC->endPtr);
2338     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
2339     bitC->ptr += nbBytes;
2340     if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
2341     bitC->bitPos &= 7;
2342     bitC->bitContainer >>= nbBytes*8;
2343 }
2344
2345 /*! BIT_closeCStream() :
2346  *  @return : size of CStream, in bytes,
2347  *            or 0 if it could not fit into dstBuffer */
2348 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
2349 {
2350     BIT_addBitsFast(bitC, 1, 1);   /* endMark */
2351     BIT_flushBits(bitC);
2352     if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
2353     return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
2354 }
2355
2356
2357 /*-********************************************************
2358 *  bitStream decoding
2359 **********************************************************/
2360 /*! BIT_initDStream() :
2361  *  Initialize a BIT_DStream_t.
2362  * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
2363  * `srcSize` must be the *exact* size of the bitStream, in bytes.
2364  * @return : size of stream (== srcSize), or an errorCode if a problem is detected
2365  */
2366 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
2367 {
2368     if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
2369
2370     bitD->start = (const char*)srcBuffer;
2371     bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
2372
2373     if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
2374         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
2375         bitD->bitContainer = MEM_readLEST(bitD->ptr);
2376         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
2377           bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
2378           if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
2379     } else {
2380         bitD->ptr   = bitD->start;
2381         bitD->bitContainer = *(const BYTE*)(bitD->start);
2382         switch(srcSize)
2383         {
2384         case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
2385                 ZSTD_FALLTHROUGH;
2386
2387         case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
2388                 ZSTD_FALLTHROUGH;
2389
2390         case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
2391                 ZSTD_FALLTHROUGH;
2392
2393         case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
2394                 ZSTD_FALLTHROUGH;
2395
2396         case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
2397                 ZSTD_FALLTHROUGH;
2398
2399         case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) <<  8;
2400                 ZSTD_FALLTHROUGH;
2401
2402         default: break;
2403         }
2404         {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
2405             bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
2406             if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
2407         }
2408         bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
2409     }
2410
2411     return srcSize;
2412 }
2413
2414 FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
2415 {
2416     return bitContainer >> start;
2417 }
2418
2419 FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
2420 {
2421     U32 const regMask = sizeof(bitContainer)*8 - 1;
2422     /* if start > regMask, bitstream is corrupted, and result is undefined */
2423     assert(nbBits < BIT_MASK_SIZE);
2424     /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
2425      * than accessing memory. When bmi2 instruction is not present, we consider
2426      * such cpus old (pre-Haswell, 2013) and their performance is not of that
2427      * importance.
2428      */
2429 #if defined(__x86_64__) || defined(_M_X86)
2430     return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
2431 #else
2432     return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
2433 #endif
2434 }
2435
2436 /*! BIT_lookBits() :
2437  *  Provides next n bits from local register.
2438  *  local register is not modified.
2439  *  On 32-bits, maxNbBits==24.
2440  *  On 64-bits, maxNbBits==56.
2441  * @return : value extracted */
2442 FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
2443 {
2444     /* arbitrate between double-shift and shift+mask */
2445 #if 1
2446     /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
2447      * bitstream is likely corrupted, and result is undefined */
2448     return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
2449 #else
2450     /* this code path is slower on my os-x laptop */
2451     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
2452     return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
2453 #endif
2454 }
2455
2456 /*! BIT_lookBitsFast() :
2457  *  unsafe version; only works if nbBits >= 1 */
2458 MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
2459 {
2460     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
2461     assert(nbBits >= 1);
2462     return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
2463 }
2464
2465 FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
2466 {
2467     bitD->bitsConsumed += nbBits;
2468 }
2469
2470 /*! BIT_readBits() :
2471  *  Read (consume) next n bits from local register and update.
2472  *  Pay attention to not read more than nbBits contained into local register.
2473  * @return : extracted value. */
2474 FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
2475 {
2476     size_t const value = BIT_lookBits(bitD, nbBits);
2477     BIT_skipBits(bitD, nbBits);
2478     return value;
2479 }
2480
2481 /*! BIT_readBitsFast() :
2482  *  unsafe version; only works if nbBits >= 1 */
2483 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
2484 {
2485     size_t const value = BIT_lookBitsFast(bitD, nbBits);
2486     assert(nbBits >= 1);
2487     BIT_skipBits(bitD, nbBits);
2488     return value;
2489 }
2490
2491 /*! BIT_reloadDStream_internal() :
2492  *  Simple variant of BIT_reloadDStream(), with two conditions:
2493  *  1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
2494  *  2. look window is valid after shifted down : bitD->ptr >= bitD->start
2495  */
2496 MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
2497 {
2498     assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
2499     bitD->ptr -= bitD->bitsConsumed >> 3;
2500     assert(bitD->ptr >= bitD->start);
2501     bitD->bitsConsumed &= 7;
2502     bitD->bitContainer = MEM_readLEST(bitD->ptr);
2503     return BIT_DStream_unfinished;
2504 }
2505
2506 /*! BIT_reloadDStreamFast() :
2507  *  Similar to BIT_reloadDStream(), but with two differences:
2508  *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
2509  *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
2510  *     point you must use BIT_reloadDStream() to reload.
2511  */
2512 MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
2513 {
2514     if (UNLIKELY(bitD->ptr < bitD->limitPtr))
2515         return BIT_DStream_overflow;
2516     return BIT_reloadDStream_internal(bitD);
2517 }
2518
2519 /*! BIT_reloadDStream() :
2520  *  Refill `bitD` from buffer previously set in BIT_initDStream() .
2521  *  This function is safe, it guarantees it will not never beyond src buffer.
2522  * @return : status of `BIT_DStream_t` internal register.
2523  *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
2524 FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
2525 {
2526     /* note : once in overflow mode, a bitstream remains in this mode until it's reset */
2527     if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
2528         static const BitContainerType zeroFilled = 0;
2529         bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
2530         /* overflow detected, erroneous scenario or end of stream: no update */
2531         return BIT_DStream_overflow;
2532     }
2533
2534     assert(bitD->ptr >= bitD->start);
2535
2536     if (bitD->ptr >= bitD->limitPtr) {
2537         return BIT_reloadDStream_internal(bitD);
2538     }
2539     if (bitD->ptr == bitD->start) {
2540         /* reached end of bitStream => no update */
2541         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
2542         return BIT_DStream_completed;
2543     }
2544     /* start < ptr < limitPtr => cautious update */
2545     {   U32 nbBytes = bitD->bitsConsumed >> 3;
2546         BIT_DStream_status result = BIT_DStream_unfinished;
2547         if (bitD->ptr - nbBytes < bitD->start) {
2548             nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
2549             result = BIT_DStream_endOfBuffer;
2550         }
2551         bitD->ptr -= nbBytes;
2552         bitD->bitsConsumed -= nbBytes*8;
2553         bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
2554         return result;
2555     }
2556 }
2557
2558 /*! BIT_endOfDStream() :
2559  * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
2560  */
2561 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
2562 {
2563     return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
2564 }
2565
2566 #if defined (__cplusplus)
2567 }
2568 #endif
2569
2570 #endif /* BITSTREAM_H_MODULE */
2571 /**** ended inlining bitstream.h ****/
2572
2573
2574 /* *****************************************
2575 *  Static allocation
2576 *******************************************/
2577 /* FSE buffer bounds */
2578 #define FSE_NCOUNTBOUND 512
2579 #define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
2580 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
2581
2582 /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
2583 #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
2584 #define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<(maxTableLog)))
2585
2586 /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
2587 #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
2588 #define FSE_DTABLE_SIZE(maxTableLog)                   (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
2589
2590
2591 /* *****************************************
2592  *  FSE advanced API
2593  ***************************************** */
2594
2595 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
2596 /**< same as FSE_optimalTableLog(), which used `minus==2` */
2597
2598 size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
2599 /**< build a fake FSE_CTable, designed to compress always the same symbolValue */
2600
2601 /* FSE_buildCTable_wksp() :
2602  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
2603  * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
2604  * See FSE_buildCTable_wksp() for breakdown of workspace usage.
2605  */
2606 #define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
2607 #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
2608 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
2609
2610 #define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
2611 #define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
2612 FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
2613 /**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
2614
2615 #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
2616 #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
2617 size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
2618 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
2619  * Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
2620
2621 typedef enum {
2622    FSE_repeat_none,  /**< Cannot use the previous table */
2623    FSE_repeat_check, /**< Can use the previous table but it must be checked */
2624    FSE_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
2625  } FSE_repeat;
2626
2627 /* *****************************************
2628 *  FSE symbol compression API
2629 *******************************************/
2630 /*!
2631    This API consists of small unitary functions, which highly benefit from being inlined.
2632    Hence their body are included in next section.
2633 */
2634 typedef struct {
2635     ptrdiff_t   value;
2636     const void* stateTable;
2637     const void* symbolTT;
2638     unsigned    stateLog;
2639 } FSE_CState_t;
2640
2641 static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
2642
2643 static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
2644
2645 static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
2646
2647 /**<
2648 These functions are inner components of FSE_compress_usingCTable().
2649 They allow the creation of custom streams, mixing multiple tables and bit sources.
2650
2651 A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
2652 So the first symbol you will encode is the last you will decode, like a LIFO stack.
2653
2654 You will need a few variables to track your CStream. They are :
2655
2656 FSE_CTable    ct;         // Provided by FSE_buildCTable()
2657 BIT_CStream_t bitStream;  // bitStream tracking structure
2658 FSE_CState_t  state;      // State tracking structure (can have several)
2659
2660
2661 The first thing to do is to init bitStream and state.
2662     size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
2663     FSE_initCState(&state, ct);
2664
2665 Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
2666 You can then encode your input data, byte after byte.
2667 FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
2668 Remember decoding will be done in reverse direction.
2669     FSE_encodeByte(&bitStream, &state, symbol);
2670
2671 At any time, you can also add any bit sequence.
2672 Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
2673     BIT_addBits(&bitStream, bitField, nbBits);
2674
2675 The above methods don't commit data to memory, they just store it into local register, for speed.
2676 Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
2677 Writing data to memory is a manual operation, performed by the flushBits function.
2678     BIT_flushBits(&bitStream);
2679
2680 Your last FSE encoding operation shall be to flush your last state value(s).
2681     FSE_flushState(&bitStream, &state);
2682
2683 Finally, you must close the bitStream.
2684 The function returns the size of CStream in bytes.
2685 If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
2686 If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
2687     size_t size = BIT_closeCStream(&bitStream);
2688 */
2689
2690
2691 /* *****************************************
2692 *  FSE symbol decompression API
2693 *******************************************/
2694 typedef struct {
2695     size_t      state;
2696     const void* table;   /* precise table may vary, depending on U16 */
2697 } FSE_DState_t;
2698
2699
2700 static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
2701
2702 static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
2703
2704 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
2705
2706 /**<
2707 Let's now decompose FSE_decompress_usingDTable() into its unitary components.
2708 You will decode FSE-encoded symbols from the bitStream,
2709 and also any other bitFields you put in, **in reverse order**.
2710
2711 You will need a few variables to track your bitStream. They are :
2712
2713 BIT_DStream_t DStream;    // Stream context
2714 FSE_DState_t  DState;     // State context. Multiple ones are possible
2715 FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
2716
2717 The first thing to do is to init the bitStream.
2718     errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
2719
2720 You should then retrieve your initial state(s)
2721 (in reverse flushing order if you have several ones) :
2722     errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
2723
2724 You can then decode your data, symbol after symbol.
2725 For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
2726 Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
2727     unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
2728
2729 You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
2730 Note : maximum allowed nbBits is 25, for 32-bits compatibility
2731     size_t bitField = BIT_readBits(&DStream, nbBits);
2732
2733 All above operations only read from local register (which size depends on size_t).
2734 Refueling the register from memory is manually performed by the reload method.
2735     endSignal = FSE_reloadDStream(&DStream);
2736
2737 BIT_reloadDStream() result tells if there is still some more data to read from DStream.
2738 BIT_DStream_unfinished : there is still some data left into the DStream.
2739 BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
2740 BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
2741 BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
2742
2743 When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
2744 to properly detect the exact end of stream.
2745 After each decoded symbol, check if DStream is fully consumed using this simple test :
2746     BIT_reloadDStream(&DStream) >= BIT_DStream_completed
2747
2748 When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
2749 Checking if DStream has reached its end is performed by :
2750     BIT_endOfDStream(&DStream);
2751 Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
2752     FSE_endOfDState(&DState);
2753 */
2754
2755
2756 /* *****************************************
2757 *  FSE unsafe API
2758 *******************************************/
2759 static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
2760 /* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
2761
2762
2763 /* *****************************************
2764 *  Implementation of inlined functions
2765 *******************************************/
2766 typedef struct {
2767     int deltaFindState;
2768     U32 deltaNbBits;
2769 } FSE_symbolCompressionTransform; /* total 8 bytes */
2770
2771 MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
2772 {
2773     const void* ptr = ct;
2774     const U16* u16ptr = (const U16*) ptr;
2775     const U32 tableLog = MEM_read16(ptr);
2776     statePtr->value = (ptrdiff_t)1<<tableLog;
2777     statePtr->stateTable = u16ptr+2;
2778     statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
2779     statePtr->stateLog = tableLog;
2780 }
2781
2782
2783 /*! FSE_initCState2() :
2784 *   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
2785 *   uses the smallest state value possible, saving the cost of this symbol */
2786 MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
2787 {
2788     FSE_initCState(statePtr, ct);
2789     {   const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
2790         const U16* stateTable = (const U16*)(statePtr->stateTable);
2791         U32 nbBitsOut  = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
2792         statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
2793         statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
2794     }
2795 }
2796
2797 MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
2798 {
2799     FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
2800     const U16* const stateTable = (const U16*)(statePtr->stateTable);
2801     U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
2802     BIT_addBits(bitC,  (size_t)statePtr->value, nbBitsOut);
2803     statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
2804 }
2805
2806 MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
2807 {
2808     BIT_addBits(bitC, (size_t)statePtr->value, statePtr->stateLog);
2809     BIT_flushBits(bitC);
2810 }
2811
2812
2813 /* FSE_getMaxNbBits() :
2814  * Approximate maximum cost of a symbol, in bits.
2815  * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
2816  * note 1 : assume symbolValue is valid (<= maxSymbolValue)
2817  * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
2818 MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
2819 {
2820     const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
2821     return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
2822 }
2823
2824 /* FSE_bitCost() :
2825  * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
2826  * note 1 : assume symbolValue is valid (<= maxSymbolValue)
2827  * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
2828 MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
2829 {
2830     const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
2831     U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
2832     U32 const threshold = (minNbBits+1) << 16;
2833     assert(tableLog < 16);
2834     assert(accuracyLog < 31-tableLog);  /* ensure enough room for renormalization double shift */
2835     {   U32 const tableSize = 1 << tableLog;
2836         U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
2837         U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog;   /* linear interpolation (very approximate) */
2838         U32 const bitMultiplier = 1 << accuracyLog;
2839         assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
2840         assert(normalizedDeltaFromThreshold <= bitMultiplier);
2841         return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
2842     }
2843 }
2844
2845
2846 /* ======    Decompression    ====== */
2847
2848 typedef struct {
2849     U16 tableLog;
2850     U16 fastMode;
2851 } FSE_DTableHeader;   /* sizeof U32 */
2852
2853 typedef struct
2854 {
2855     unsigned short newState;
2856     unsigned char  symbol;
2857     unsigned char  nbBits;
2858 } FSE_decode_t;   /* size == U32 */
2859
2860 MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
2861 {
2862     const void* ptr = dt;
2863     const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
2864     DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
2865     BIT_reloadDStream(bitD);
2866     DStatePtr->table = dt + 1;
2867 }
2868
2869 MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
2870 {
2871     FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
2872     return DInfo.symbol;
2873 }
2874
2875 MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
2876 {
2877     FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
2878     U32 const nbBits = DInfo.nbBits;
2879     size_t const lowBits = BIT_readBits(bitD, nbBits);
2880     DStatePtr->state = DInfo.newState + lowBits;
2881 }
2882
2883 MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
2884 {
2885     FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
2886     U32 const nbBits = DInfo.nbBits;
2887     BYTE const symbol = DInfo.symbol;
2888     size_t const lowBits = BIT_readBits(bitD, nbBits);
2889
2890     DStatePtr->state = DInfo.newState + lowBits;
2891     return symbol;
2892 }
2893
2894 /*! FSE_decodeSymbolFast() :
2895     unsafe, only works if no symbol has a probability > 50% */
2896 MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
2897 {
2898     FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
2899     U32 const nbBits = DInfo.nbBits;
2900     BYTE const symbol = DInfo.symbol;
2901     size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
2902
2903     DStatePtr->state = DInfo.newState + lowBits;
2904     return symbol;
2905 }
2906
2907 MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
2908 {
2909     return DStatePtr->state == 0;
2910 }
2911
2912
2913
2914 #ifndef FSE_COMMONDEFS_ONLY
2915
2916 /* **************************************************************
2917 *  Tuning parameters
2918 ****************************************************************/
2919 /*!MEMORY_USAGE :
2920 *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
2921 *  Increasing memory usage improves compression ratio
2922 *  Reduced memory usage can improve speed, due to cache effect
2923 *  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
2924 #ifndef FSE_MAX_MEMORY_USAGE
2925 #  define FSE_MAX_MEMORY_USAGE 14
2926 #endif
2927 #ifndef FSE_DEFAULT_MEMORY_USAGE
2928 #  define FSE_DEFAULT_MEMORY_USAGE 13
2929 #endif
2930 #if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
2931 #  error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
2932 #endif
2933
2934 /*!FSE_MAX_SYMBOL_VALUE :
2935 *  Maximum symbol value authorized.
2936 *  Required for proper stack allocation */
2937 #ifndef FSE_MAX_SYMBOL_VALUE
2938 #  define FSE_MAX_SYMBOL_VALUE 255
2939 #endif
2940
2941 /* **************************************************************
2942 *  template functions type & suffix
2943 ****************************************************************/
2944 #define FSE_FUNCTION_TYPE BYTE
2945 #define FSE_FUNCTION_EXTENSION
2946 #define FSE_DECODE_TYPE FSE_decode_t
2947
2948
2949 #endif   /* !FSE_COMMONDEFS_ONLY */
2950
2951
2952 /* ***************************************************************
2953 *  Constants
2954 *****************************************************************/
2955 #define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
2956 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
2957 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
2958 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
2959 #define FSE_MIN_TABLELOG 5
2960
2961 #define FSE_TABLELOG_ABSOLUTE_MAX 15
2962 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
2963 #  error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
2964 #endif
2965
2966 #define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
2967
2968
2969 #endif /* FSE_STATIC_LINKING_ONLY */
2970
2971
2972 #if defined (__cplusplus)
2973 }
2974 #endif
2975 /**** ended inlining fse.h ****/
2976 /**** start inlining huf.h ****/
2977 /* ******************************************************************
2978  * huff0 huffman codec,
2979  * part of Finite State Entropy library
2980  * Copyright (c) Meta Platforms, Inc. and affiliates.
2981  *
2982  * You can contact the author at :
2983  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
2984  *
2985  * This source code is licensed under both the BSD-style license (found in the
2986  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
2987  * in the COPYING file in the root directory of this source tree).
2988  * You may select, at your option, one of the above-listed licenses.
2989 ****************************************************************** */
2990
2991 #if defined (__cplusplus)
2992 extern "C" {
2993 #endif
2994
2995 #ifndef HUF_H_298734234
2996 #define HUF_H_298734234
2997
2998 /* *** Dependencies *** */
2999 /**** skipping file: zstd_deps.h ****/
3000 /**** skipping file: mem.h ****/
3001 #define FSE_STATIC_LINKING_ONLY
3002 /**** skipping file: fse.h ****/
3003
3004
3005 /* ***   Tool functions *** */
3006 #define HUF_BLOCKSIZE_MAX (128 * 1024)   /**< maximum input size for a single block compressed with HUF_compress */
3007 size_t HUF_compressBound(size_t size);   /**< maximum compressed size (worst case) */
3008
3009 /* Error Management */
3010 unsigned    HUF_isError(size_t code);       /**< tells if a return value is an error code */
3011 const char* HUF_getErrorName(size_t code);  /**< provides error code string (useful for debugging) */
3012
3013
3014 #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
3015 #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
3016
3017 /* *** Constants *** */
3018 #define HUF_TABLELOG_MAX      12      /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
3019 #define HUF_TABLELOG_DEFAULT  11      /* default tableLog value when none specified */
3020 #define HUF_SYMBOLVALUE_MAX  255
3021
3022 #define HUF_TABLELOG_ABSOLUTEMAX  12  /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
3023 #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
3024 #  error "HUF_TABLELOG_MAX is too large !"
3025 #endif
3026
3027
3028 /* ****************************************
3029 *  Static allocation
3030 ******************************************/
3031 /* HUF buffer bounds */
3032 #define HUF_CTABLEBOUND 129
3033 #define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true when incompressible is pre-filtered with fast heuristic */
3034 #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
3035
3036 /* static allocation of HUF's Compression Table */
3037 /* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
3038 typedef size_t HUF_CElt;   /* consider it an incomplete type */
3039 #define HUF_CTABLE_SIZE_ST(maxSymbolValue)   ((maxSymbolValue)+2)   /* Use tables of size_t, for proper alignment */
3040 #define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
3041 #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
3042     HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
3043
3044 /* static allocation of HUF's DTable */
3045 typedef U32 HUF_DTable;
3046 #define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))
3047 #define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
3048         HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
3049 #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
3050         HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
3051
3052
3053 /* ****************************************
3054 *  Advanced decompression functions
3055 ******************************************/
3056
3057 /**
3058  * Huffman flags bitset.
3059  * For all flags, 0 is the default value.
3060  */
3061 typedef enum {
3062     /**
3063      * If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime.
3064      * Otherwise: Ignored.
3065      */
3066     HUF_flags_bmi2 = (1 << 0),
3067     /**
3068      * If set: Test possible table depths to find the one that produces the smallest header + encoded size.
3069      * If unset: Use heuristic to find the table depth.
3070      */
3071     HUF_flags_optimalDepth = (1 << 1),
3072     /**
3073      * If set: If the previous table can encode the input, always reuse the previous table.
3074      * If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output.
3075      */
3076     HUF_flags_preferRepeat = (1 << 2),
3077     /**
3078      * If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress.
3079      * If unset: Always histogram the entire input.
3080      */
3081     HUF_flags_suspectUncompressible = (1 << 3),
3082     /**
3083      * If set: Don't use assembly implementations
3084      * If unset: Allow using assembly implementations
3085      */
3086     HUF_flags_disableAsm = (1 << 4),
3087     /**
3088      * If set: Don't use the fast decoding loop, always use the fallback decoding loop.
3089      * If unset: Use the fast decoding loop when possible.
3090      */
3091     HUF_flags_disableFast = (1 << 5)
3092 } HUF_flags_e;
3093
3094
3095 /* ****************************************
3096  *  HUF detailed API
3097  * ****************************************/
3098 #define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra
3099
3100 /*! HUF_compress() does the following:
3101  *  1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
3102  *  2. (optional) refine tableLog using HUF_optimalTableLog()
3103  *  3. build Huffman table from count using HUF_buildCTable()
3104  *  4. save Huffman table to memory buffer using HUF_writeCTable()
3105  *  5. encode the data stream using HUF_compress4X_usingCTable()
3106  *
3107  *  The following API allows targeting specific sub-functions for advanced tasks.
3108  *  For example, it's possible to compress several blocks using the same 'CTable',
3109  *  or to save and regenerate 'CTable' using external methods.
3110  */
3111 unsigned HUF_minTableLog(unsigned symbolCardinality);
3112 unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue);
3113 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace,
3114  size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */
3115 size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
3116 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
3117 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
3118 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
3119
3120 typedef enum {
3121    HUF_repeat_none,  /**< Cannot use the previous table */
3122    HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
3123    HUF_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
3124  } HUF_repeat;
3125
3126 /** HUF_compress4X_repeat() :
3127  *  Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
3128  *  If it uses hufTable it does not modify hufTable or repeat.
3129  *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
3130  *  If preferRepeat then the old table will always be used if valid.
3131  *  If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
3132 size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
3133                        const void* src, size_t srcSize,
3134                        unsigned maxSymbolValue, unsigned tableLog,
3135                        void* workSpace, size_t wkspSize,    /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
3136                        HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
3137
3138 /** HUF_buildCTable_wksp() :
3139  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
3140  * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
3141  */
3142 #define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192)
3143 #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
3144 size_t HUF_buildCTable_wksp (HUF_CElt* tree,
3145                        const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
3146                              void* workSpace, size_t wkspSize);
3147
3148 /*! HUF_readStats() :
3149  *  Read compact Huffman tree, saved by HUF_writeCTable().
3150  * `huffWeight` is destination buffer.
3151  * @return : size read from `src` , or an error Code .
3152  *  Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
3153 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
3154                      U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
3155                      const void* src, size_t srcSize);
3156
3157 /*! HUF_readStats_wksp() :
3158  * Same as HUF_readStats() but takes an external workspace which must be
3159  * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
3160  * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
3161  */
3162 #define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
3163 #define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
3164 size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
3165                           U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
3166                           const void* src, size_t srcSize,
3167                           void* workspace, size_t wkspSize,
3168                           int flags);
3169
3170 /** HUF_readCTable() :
3171  *  Loading a CTable saved with HUF_writeCTable() */
3172 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
3173
3174 /** HUF_getNbBitsFromCTable() :
3175  *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
3176  *  Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0
3177  *  Note 2 : is not inlined, as HUF_CElt definition is private
3178  */
3179 U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
3180
3181 typedef struct {
3182     BYTE tableLog;
3183     BYTE maxSymbolValue;
3184     BYTE unused[sizeof(size_t) - 2];
3185 } HUF_CTableHeader;
3186
3187 /** HUF_readCTableHeader() :
3188  *  @returns The header from the CTable specifying the tableLog and the maxSymbolValue.
3189  */
3190 HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable);
3191
3192 /*
3193  * HUF_decompress() does the following:
3194  * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
3195  * 2. build Huffman table from save, using HUF_readDTableX?()
3196  * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
3197  */
3198
3199 /** HUF_selectDecoder() :
3200  *  Tells which decoder is likely to decode faster,
3201  *  based on a set of pre-computed metrics.
3202  * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
3203  *  Assumption : 0 < dstSize <= 128 KB */
3204 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
3205
3206 /**
3207  *  The minimum workspace size for the `workSpace` used in
3208  *  HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
3209  *
3210  *  The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
3211  *  HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
3212  *  Buffer overflow errors may potentially occur if code modifications result in
3213  *  a required workspace size greater than that specified in the following
3214  *  macro.
3215  */
3216 #define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
3217 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
3218
3219
3220 /* ====================== */
3221 /* single stream variants */
3222 /* ====================== */
3223
3224 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
3225 /** HUF_compress1X_repeat() :
3226  *  Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
3227  *  If it uses hufTable it does not modify hufTable or repeat.
3228  *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
3229  *  If preferRepeat then the old table will always be used if valid.
3230  *  If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
3231 size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
3232                        const void* src, size_t srcSize,
3233                        unsigned maxSymbolValue, unsigned tableLog,
3234                        void* workSpace, size_t wkspSize,   /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
3235                        HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
3236
3237 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
3238 #ifndef HUF_FORCE_DECOMPRESS_X1
3239 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);   /**< double-symbols decoder */
3240 #endif
3241
3242 /* BMI2 variants.
3243  * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
3244  */
3245 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
3246 #ifndef HUF_FORCE_DECOMPRESS_X2
3247 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
3248 #endif
3249 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
3250 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
3251 #ifndef HUF_FORCE_DECOMPRESS_X2
3252 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
3253 #endif
3254 #ifndef HUF_FORCE_DECOMPRESS_X1
3255 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
3256 #endif
3257
3258 #endif   /* HUF_H_298734234 */
3259
3260 #if defined (__cplusplus)
3261 }
3262 #endif
3263 /**** ended inlining huf.h ****/
3264 /**** skipping file: bits.h ****/
3265
3266
3267 /*===   Version   ===*/
3268 unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
3269
3270
3271 /*===   Error Management   ===*/
3272 unsigned FSE_isError(size_t code) { return ERR_isError(code); }
3273 const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
3274
3275 unsigned HUF_isError(size_t code) { return ERR_isError(code); }
3276 const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
3277
3278
3279 /*-**************************************************************
3280 *  FSE NCount encoding-decoding
3281 ****************************************************************/
3282 FORCE_INLINE_TEMPLATE
3283 size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
3284                            const void* headerBuffer, size_t hbSize)
3285 {
3286     const BYTE* const istart = (const BYTE*) headerBuffer;
3287     const BYTE* const iend = istart + hbSize;
3288     const BYTE* ip = istart;
3289     int nbBits;
3290     int remaining;
3291     int threshold;
3292     U32 bitStream;
3293     int bitCount;
3294     unsigned charnum = 0;
3295     unsigned const maxSV1 = *maxSVPtr + 1;
3296     int previous0 = 0;
3297
3298     if (hbSize < 8) {
3299         /* This function only works when hbSize >= 8 */
3300         char buffer[8] = {0};
3301         ZSTD_memcpy(buffer, headerBuffer, hbSize);
3302         {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
3303                                                     buffer, sizeof(buffer));
3304             if (FSE_isError(countSize)) return countSize;
3305             if (countSize > hbSize) return ERROR(corruption_detected);
3306             return countSize;
3307     }   }
3308     assert(hbSize >= 8);
3309
3310     /* init */
3311     ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
3312     bitStream = MEM_readLE32(ip);
3313     nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
3314     if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
3315     bitStream >>= 4;
3316     bitCount = 4;
3317     *tableLogPtr = nbBits;
3318     remaining = (1<<nbBits)+1;
3319     threshold = 1<<nbBits;
3320     nbBits++;
3321
3322     for (;;) {
3323         if (previous0) {
3324             /* Count the number of repeats. Each time the
3325              * 2-bit repeat code is 0b11 there is another
3326              * repeat.
3327              * Avoid UB by setting the high bit to 1.
3328              */
3329             int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
3330             while (repeats >= 12) {
3331                 charnum += 3 * 12;
3332                 if (LIKELY(ip <= iend-7)) {
3333                     ip += 3;
3334                 } else {
3335                     bitCount -= (int)(8 * (iend - 7 - ip));
3336                     bitCount &= 31;
3337                     ip = iend - 4;
3338                 }
3339                 bitStream = MEM_readLE32(ip) >> bitCount;
3340                 repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
3341             }
3342             charnum += 3 * repeats;
3343             bitStream >>= 2 * repeats;
3344             bitCount += 2 * repeats;
3345
3346             /* Add the final repeat which isn't 0b11. */
3347             assert((bitStream & 3) < 3);
3348             charnum += bitStream & 3;
3349             bitCount += 2;
3350
3351             /* This is an error, but break and return an error
3352              * at the end, because returning out of a loop makes
3353              * it harder for the compiler to optimize.
3354              */
3355             if (charnum >= maxSV1) break;
3356
3357             /* We don't need to set the normalized count to 0
3358              * because we already memset the whole buffer to 0.
3359              */
3360
3361             if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
3362                 assert((bitCount >> 3) <= 3); /* For first condition to work */
3363                 ip += bitCount>>3;
3364                 bitCount &= 7;
3365             } else {
3366                 bitCount -= (int)(8 * (iend - 4 - ip));
3367                 bitCount &= 31;
3368                 ip = iend - 4;
3369             }
3370             bitStream = MEM_readLE32(ip) >> bitCount;
3371         }
3372         {
3373             int const max = (2*threshold-1) - remaining;
3374             int count;
3375
3376             if ((bitStream & (threshold-1)) < (U32)max) {
3377                 count = bitStream & (threshold-1);
3378                 bitCount += nbBits-1;
3379             } else {
3380                 count = bitStream & (2*threshold-1);
3381                 if (count >= threshold) count -= max;
3382                 bitCount += nbBits;
3383             }
3384
3385             count--;   /* extra accuracy */
3386             /* When it matters (small blocks), this is a
3387              * predictable branch, because we don't use -1.
3388              */
3389             if (count >= 0) {
3390                 remaining -= count;
3391             } else {
3392                 assert(count == -1);
3393                 remaining += count;
3394             }
3395             normalizedCounter[charnum++] = (short)count;
3396             previous0 = !count;
3397
3398             assert(threshold > 1);
3399             if (remaining < threshold) {
3400                 /* This branch can be folded into the
3401                  * threshold update condition because we
3402                  * know that threshold > 1.
3403                  */
3404                 if (remaining <= 1) break;
3405                 nbBits = ZSTD_highbit32(remaining) + 1;
3406                 threshold = 1 << (nbBits - 1);
3407             }
3408             if (charnum >= maxSV1) break;
3409
3410             if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
3411                 ip += bitCount>>3;
3412                 bitCount &= 7;
3413             } else {
3414                 bitCount -= (int)(8 * (iend - 4 - ip));
3415                 bitCount &= 31;
3416                 ip = iend - 4;
3417             }
3418             bitStream = MEM_readLE32(ip) >> bitCount;
3419     }   }
3420     if (remaining != 1) return ERROR(corruption_detected);
3421     /* Only possible when there are too many zeros. */
3422     if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
3423     if (bitCount > 32) return ERROR(corruption_detected);
3424     *maxSVPtr = charnum-1;
3425
3426     ip += (bitCount+7)>>3;
3427     return ip-istart;
3428 }
3429
3430 /* Avoids the FORCE_INLINE of the _body() function. */
3431 static size_t FSE_readNCount_body_default(
3432         short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
3433         const void* headerBuffer, size_t hbSize)
3434 {
3435     return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
3436 }
3437
3438 #if DYNAMIC_BMI2
3439 BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
3440         short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
3441         const void* headerBuffer, size_t hbSize)
3442 {
3443     return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
3444 }
3445 #endif
3446
3447 size_t FSE_readNCount_bmi2(
3448         short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
3449         const void* headerBuffer, size_t hbSize, int bmi2)
3450 {
3451 #if DYNAMIC_BMI2
3452     if (bmi2) {
3453         return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
3454     }
3455 #endif
3456     (void)bmi2;
3457     return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
3458 }
3459
3460 size_t FSE_readNCount(
3461         short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
3462         const void* headerBuffer, size_t hbSize)
3463 {
3464     return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
3465 }
3466
3467
3468 /*! HUF_readStats() :
3469     Read compact Huffman tree, saved by HUF_writeCTable().
3470     `huffWeight` is destination buffer.
3471     `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
3472     @return : size read from `src` , or an error Code .
3473     Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
3474 */
3475 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
3476                      U32* nbSymbolsPtr, U32* tableLogPtr,
3477                      const void* src, size_t srcSize)
3478 {
3479     U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
3480     return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0);
3481 }
3482
3483 FORCE_INLINE_TEMPLATE size_t
3484 HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
3485                    U32* nbSymbolsPtr, U32* tableLogPtr,
3486                    const void* src, size_t srcSize,
3487                    void* workSpace, size_t wkspSize,
3488                    int bmi2)
3489 {
3490     U32 weightTotal;
3491     const BYTE* ip = (const BYTE*) src;
3492     size_t iSize;
3493     size_t oSize;
3494
3495     if (!srcSize) return ERROR(srcSize_wrong);
3496     iSize = ip[0];
3497     /* ZSTD_memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
3498
3499     if (iSize >= 128) {  /* special header */
3500         oSize = iSize - 127;
3501         iSize = ((oSize+1)/2);
3502         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
3503         if (oSize >= hwSize) return ERROR(corruption_detected);
3504         ip += 1;
3505         {   U32 n;
3506             for (n=0; n<oSize; n+=2) {
3507                 huffWeight[n]   = ip[n/2] >> 4;
3508                 huffWeight[n+1] = ip[n/2] & 15;
3509     }   }   }
3510     else  {   /* header compressed with FSE (normal case) */
3511         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
3512         /* max (hwSize-1) values decoded, as last one is implied */
3513         oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
3514         if (FSE_isError(oSize)) return oSize;
3515     }
3516
3517     /* collect weight stats */
3518     ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
3519     weightTotal = 0;
3520     {   U32 n; for (n=0; n<oSize; n++) {
3521             if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
3522             rankStats[huffWeight[n]]++;
3523             weightTotal += (1 << huffWeight[n]) >> 1;
3524     }   }
3525     if (weightTotal == 0) return ERROR(corruption_detected);
3526
3527     /* get last non-null symbol weight (implied, total must be 2^n) */
3528     {   U32 const tableLog = ZSTD_highbit32(weightTotal) + 1;
3529         if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
3530         *tableLogPtr = tableLog;
3531         /* determine last weight */
3532         {   U32 const total = 1 << tableLog;
3533             U32 const rest = total - weightTotal;
3534             U32 const verif = 1 << ZSTD_highbit32(rest);
3535             U32 const lastWeight = ZSTD_highbit32(rest) + 1;
3536             if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
3537             huffWeight[oSize] = (BYTE)lastWeight;
3538             rankStats[lastWeight]++;
3539     }   }
3540
3541     /* check tree construction validity */
3542     if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
3543
3544     /* results */
3545     *nbSymbolsPtr = (U32)(oSize+1);
3546     return iSize+1;
3547 }
3548
3549 /* Avoids the FORCE_INLINE of the _body() function. */
3550 static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
3551                      U32* nbSymbolsPtr, U32* tableLogPtr,
3552                      const void* src, size_t srcSize,
3553                      void* workSpace, size_t wkspSize)
3554 {
3555     return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
3556 }
3557
3558 #if DYNAMIC_BMI2
3559 static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
3560                      U32* nbSymbolsPtr, U32* tableLogPtr,
3561                      const void* src, size_t srcSize,
3562                      void* workSpace, size_t wkspSize)
3563 {
3564     return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
3565 }
3566 #endif
3567
3568 size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
3569                      U32* nbSymbolsPtr, U32* tableLogPtr,
3570                      const void* src, size_t srcSize,
3571                      void* workSpace, size_t wkspSize,
3572                      int flags)
3573 {
3574 #if DYNAMIC_BMI2
3575     if (flags & HUF_flags_bmi2) {
3576         return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
3577     }
3578 #endif
3579     (void)flags;
3580     return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
3581 }
3582 /**** ended inlining common/entropy_common.c ****/
3583 /**** start inlining common/error_private.c ****/
3584 /*
3585  * Copyright (c) Meta Platforms, Inc. and affiliates.
3586  * All rights reserved.
3587  *
3588  * This source code is licensed under both the BSD-style license (found in the
3589  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
3590  * in the COPYING file in the root directory of this source tree).
3591  * You may select, at your option, one of the above-listed licenses.
3592  */
3593
3594 /* The purpose of this file is to have a single list of error strings embedded in binary */
3595
3596 /**** skipping file: error_private.h ****/
3597
3598 const char* ERR_getErrorString(ERR_enum code)
3599 {
3600 #ifdef ZSTD_STRIP_ERROR_STRINGS
3601     (void)code;
3602     return "Error strings stripped";
3603 #else
3604     static const char* const notErrorCode = "Unspecified error code";
3605     switch( code )
3606     {
3607     case PREFIX(no_error): return "No error detected";
3608     case PREFIX(GENERIC):  return "Error (generic)";
3609     case PREFIX(prefix_unknown): return "Unknown frame descriptor";
3610     case PREFIX(version_unsupported): return "Version not supported";
3611     case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
3612     case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
3613     case PREFIX(corruption_detected): return "Data corruption detected";
3614     case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
3615     case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification";
3616     case PREFIX(parameter_unsupported): return "Unsupported parameter";
3617     case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters";
3618     case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
3619     case PREFIX(init_missing): return "Context should be init first";
3620     case PREFIX(memory_allocation): return "Allocation error : not enough memory";
3621     case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough";
3622     case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
3623     case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
3624     case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
3625     case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
3626     case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected";
3627     case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
3628     case PREFIX(dictionary_wrong): return "Dictionary mismatch";
3629     case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
3630     case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
3631     case PREFIX(srcSize_wrong): return "Src size is incorrect";
3632     case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
3633     case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full";
3634     case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty";
3635         /* following error codes are not stable and may be removed or changed in a future version */
3636     case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
3637     case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
3638     case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
3639     case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
3640     case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code";
3641     case PREFIX(externalSequences_invalid): return "External sequences are not valid";
3642     case PREFIX(maxCode):
3643     default: return notErrorCode;
3644     }
3645 #endif
3646 }
3647 /**** ended inlining common/error_private.c ****/
3648 /**** start inlining common/fse_decompress.c ****/
3649 /* ******************************************************************
3650  * FSE : Finite State Entropy decoder
3651  * Copyright (c) Meta Platforms, Inc. and affiliates.
3652  *
3653  *  You can contact the author at :
3654  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
3655  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
3656  *
3657  * This source code is licensed under both the BSD-style license (found in the
3658  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
3659  * in the COPYING file in the root directory of this source tree).
3660  * You may select, at your option, one of the above-listed licenses.
3661 ****************************************************************** */
3662
3663
3664 /* **************************************************************
3665 *  Includes
3666 ****************************************************************/
3667 /**** skipping file: debug.h ****/
3668 /**** skipping file: bitstream.h ****/
3669 /**** skipping file: compiler.h ****/
3670 #define FSE_STATIC_LINKING_ONLY
3671 /**** skipping file: fse.h ****/
3672 /**** skipping file: error_private.h ****/
3673 /**** skipping file: zstd_deps.h ****/
3674 /**** skipping file: bits.h ****/
3675
3676
3677 /* **************************************************************
3678 *  Error Management
3679 ****************************************************************/
3680 #define FSE_isError ERR_isError
3681 #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
3682
3683
3684 /* **************************************************************
3685 *  Templates
3686 ****************************************************************/
3687 /*
3688   designed to be included
3689   for type-specific functions (template emulation in C)
3690   Objective is to write these functions only once, for improved maintenance
3691 */
3692
3693 /* safety checks */
3694 #ifndef FSE_FUNCTION_EXTENSION
3695 #  error "FSE_FUNCTION_EXTENSION must be defined"
3696 #endif
3697 #ifndef FSE_FUNCTION_TYPE
3698 #  error "FSE_FUNCTION_TYPE must be defined"
3699 #endif
3700
3701 /* Function names */
3702 #define FSE_CAT(X,Y) X##Y
3703 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
3704 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
3705
3706 static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
3707 {
3708     void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
3709     FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
3710     U16* symbolNext = (U16*)workSpace;
3711     BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
3712
3713     U32 const maxSV1 = maxSymbolValue + 1;
3714     U32 const tableSize = 1 << tableLog;
3715     U32 highThreshold = tableSize-1;
3716
3717     /* Sanity Checks */
3718     if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
3719     if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
3720     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
3721
3722     /* Init, lay down lowprob symbols */
3723     {   FSE_DTableHeader DTableH;
3724         DTableH.tableLog = (U16)tableLog;
3725         DTableH.fastMode = 1;
3726         {   S16 const largeLimit= (S16)(1 << (tableLog-1));
3727             U32 s;
3728             for (s=0; s<maxSV1; s++) {
3729                 if (normalizedCounter[s]==-1) {
3730                     tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
3731                     symbolNext[s] = 1;
3732                 } else {
3733                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
3734                     symbolNext[s] = (U16)normalizedCounter[s];
3735         }   }   }
3736         ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
3737     }
3738
3739     /* Spread symbols */
3740     if (highThreshold == tableSize - 1) {
3741         size_t const tableMask = tableSize-1;
3742         size_t const step = FSE_TABLESTEP(tableSize);
3743         /* First lay down the symbols in order.
3744          * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
3745          * misses since small blocks generally have small table logs, so nearly
3746          * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
3747          * our buffer to handle the over-write.
3748          */
3749         {   U64 const add = 0x0101010101010101ull;
3750             size_t pos = 0;
3751             U64 sv = 0;
3752             U32 s;
3753             for (s=0; s<maxSV1; ++s, sv += add) {
3754                 int i;
3755                 int const n = normalizedCounter[s];
3756                 MEM_write64(spread + pos, sv);
3757                 for (i = 8; i < n; i += 8) {
3758                     MEM_write64(spread + pos + i, sv);
3759                 }
3760                 pos += (size_t)n;
3761         }   }
3762         /* Now we spread those positions across the table.
3763          * The benefit of doing it in two stages is that we avoid the
3764          * variable size inner loop, which caused lots of branch misses.
3765          * Now we can run through all the positions without any branch misses.
3766          * We unroll the loop twice, since that is what empirically worked best.
3767          */
3768         {
3769             size_t position = 0;
3770             size_t s;
3771             size_t const unroll = 2;
3772             assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
3773             for (s = 0; s < (size_t)tableSize; s += unroll) {
3774                 size_t u;
3775                 for (u = 0; u < unroll; ++u) {
3776                     size_t const uPosition = (position + (u * step)) & tableMask;
3777                     tableDecode[uPosition].symbol = spread[s + u];
3778                 }
3779                 position = (position + (unroll * step)) & tableMask;
3780             }
3781             assert(position == 0);
3782         }
3783     } else {
3784         U32 const tableMask = tableSize-1;
3785         U32 const step = FSE_TABLESTEP(tableSize);
3786         U32 s, position = 0;
3787         for (s=0; s<maxSV1; s++) {
3788             int i;
3789             for (i=0; i<normalizedCounter[s]; i++) {
3790                 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
3791                 position = (position + step) & tableMask;
3792                 while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
3793         }   }
3794         if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
3795     }
3796
3797     /* Build Decoding table */
3798     {   U32 u;
3799         for (u=0; u<tableSize; u++) {
3800             FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
3801             U32 const nextState = symbolNext[symbol]++;
3802             tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
3803             tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
3804     }   }
3805
3806     return 0;
3807 }
3808
3809 size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
3810 {
3811     return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
3812 }
3813
3814
3815 #ifndef FSE_COMMONDEFS_ONLY
3816
3817 /*-*******************************************************
3818 *  Decompression (Byte symbols)
3819 *********************************************************/
3820
3821 FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
3822           void* dst, size_t maxDstSize,
3823     const void* cSrc, size_t cSrcSize,
3824     const FSE_DTable* dt, const unsigned fast)
3825 {
3826     BYTE* const ostart = (BYTE*) dst;
3827     BYTE* op = ostart;
3828     BYTE* const omax = op + maxDstSize;
3829     BYTE* const olimit = omax-3;
3830
3831     BIT_DStream_t bitD;
3832     FSE_DState_t state1;
3833     FSE_DState_t state2;
3834
3835     /* Init */
3836     CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
3837
3838     FSE_initDState(&state1, &bitD, dt);
3839     FSE_initDState(&state2, &bitD, dt);
3840
3841 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
3842
3843     /* 4 symbols per loop */
3844     for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
3845         op[0] = FSE_GETSYMBOL(&state1);
3846
3847         if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
3848             BIT_reloadDStream(&bitD);
3849
3850         op[1] = FSE_GETSYMBOL(&state2);
3851
3852         if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
3853             { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
3854
3855         op[2] = FSE_GETSYMBOL(&state1);
3856
3857         if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
3858             BIT_reloadDStream(&bitD);
3859
3860         op[3] = FSE_GETSYMBOL(&state2);
3861     }
3862
3863     /* tail */
3864     /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
3865     while (1) {
3866         if (op>(omax-2)) return ERROR(dstSize_tooSmall);
3867         *op++ = FSE_GETSYMBOL(&state1);
3868         if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
3869             *op++ = FSE_GETSYMBOL(&state2);
3870             break;
3871         }
3872
3873         if (op>(omax-2)) return ERROR(dstSize_tooSmall);
3874         *op++ = FSE_GETSYMBOL(&state2);
3875         if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
3876             *op++ = FSE_GETSYMBOL(&state1);
3877             break;
3878     }   }
3879
3880     assert(op >= ostart);
3881     return (size_t)(op-ostart);
3882 }
3883
3884 typedef struct {
3885     short ncount[FSE_MAX_SYMBOL_VALUE + 1];
3886 } FSE_DecompressWksp;
3887
3888
3889 FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
3890         void* dst, size_t dstCapacity,
3891         const void* cSrc, size_t cSrcSize,
3892         unsigned maxLog, void* workSpace, size_t wkspSize,
3893         int bmi2)
3894 {
3895     const BYTE* const istart = (const BYTE*)cSrc;
3896     const BYTE* ip = istart;
3897     unsigned tableLog;
3898     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
3899     FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
3900     size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable);
3901     FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos;
3902
3903     FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
3904     if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
3905
3906     /* correct offset to dtable depends on this property */
3907     FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0);
3908
3909     /* normal FSE decoding mode */
3910     {   size_t const NCountLength =
3911             FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
3912         if (FSE_isError(NCountLength)) return NCountLength;
3913         if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
3914         assert(NCountLength <= cSrcSize);
3915         ip += NCountLength;
3916         cSrcSize -= NCountLength;
3917     }
3918
3919     if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
3920     assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
3921     workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
3922     wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
3923
3924     CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
3925
3926     {
3927         const void* ptr = dtable;
3928         const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
3929         const U32 fastMode = DTableH->fastMode;
3930
3931         /* select fast mode (static) */
3932         if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
3933         return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
3934     }
3935 }
3936
3937 /* Avoids the FORCE_INLINE of the _body() function. */
3938 static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
3939 {
3940     return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
3941 }
3942
3943 #if DYNAMIC_BMI2
3944 BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
3945 {
3946     return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
3947 }
3948 #endif
3949
3950 size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
3951 {
3952 #if DYNAMIC_BMI2
3953     if (bmi2) {
3954         return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
3955     }
3956 #endif
3957     (void)bmi2;
3958     return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
3959 }
3960
3961 #endif   /* FSE_COMMONDEFS_ONLY */
3962 /**** ended inlining common/fse_decompress.c ****/
3963 /**** start inlining common/zstd_common.c ****/
3964 /*
3965  * Copyright (c) Meta Platforms, Inc. and affiliates.
3966  * All rights reserved.
3967  *
3968  * This source code is licensed under both the BSD-style license (found in the
3969  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
3970  * in the COPYING file in the root directory of this source tree).
3971  * You may select, at your option, one of the above-listed licenses.
3972  */
3973
3974
3975
3976 /*-*************************************
3977 *  Dependencies
3978 ***************************************/
3979 #define ZSTD_DEPS_NEED_MALLOC
3980 /**** skipping file: error_private.h ****/
3981 /**** start inlining zstd_internal.h ****/
3982 /*
3983  * Copyright (c) Meta Platforms, Inc. and affiliates.
3984  * All rights reserved.
3985  *
3986  * This source code is licensed under both the BSD-style license (found in the
3987  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
3988  * in the COPYING file in the root directory of this source tree).
3989  * You may select, at your option, one of the above-listed licenses.
3990  */
3991
3992 #ifndef ZSTD_CCOMMON_H_MODULE
3993 #define ZSTD_CCOMMON_H_MODULE
3994
3995 /* this module contains definitions which must be identical
3996  * across compression, decompression and dictBuilder.
3997  * It also contains a few functions useful to at least 2 of them
3998  * and which benefit from being inlined */
3999
4000 /*-*************************************
4001 *  Dependencies
4002 ***************************************/
4003 /**** skipping file: compiler.h ****/
4004 /**** start inlining cpu.h ****/
4005 /*
4006  * Copyright (c) Meta Platforms, Inc. and affiliates.
4007  * All rights reserved.
4008  *
4009  * This source code is licensed under both the BSD-style license (found in the
4010  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
4011  * in the COPYING file in the root directory of this source tree).
4012  * You may select, at your option, one of the above-listed licenses.
4013  */
4014
4015 #ifndef ZSTD_COMMON_CPU_H
4016 #define ZSTD_COMMON_CPU_H
4017
4018 /**
4019  * Implementation taken from folly/CpuId.h
4020  * https://github.com/facebook/folly/blob/master/folly/CpuId.h
4021  */
4022
4023 /**** skipping file: mem.h ****/
4024
4025 #ifdef _MSC_VER
4026 #include <intrin.h>
4027 #endif
4028
4029 typedef struct {
4030     U32 f1c;
4031     U32 f1d;
4032     U32 f7b;
4033     U32 f7c;
4034 } ZSTD_cpuid_t;
4035
4036 MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
4037     U32 f1c = 0;
4038     U32 f1d = 0;
4039     U32 f7b = 0;
4040     U32 f7c = 0;
4041 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
4042 #if !defined(__clang__)
4043     int reg[4];
4044     __cpuid((int*)reg, 0);
4045     {
4046         int const n = reg[0];
4047         if (n >= 1) {
4048             __cpuid((int*)reg, 1);
4049             f1c = (U32)reg[2];
4050             f1d = (U32)reg[3];
4051         }
4052         if (n >= 7) {
4053             __cpuidex((int*)reg, 7, 0);
4054             f7b = (U32)reg[1];
4055             f7c = (U32)reg[2];
4056         }
4057     }
4058 #else
4059     /* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in
4060      * which the `__cpuid` intrinsic does not save and restore `rbx` as it needs
4061      * to due to being a reserved register. So in that case, do the `cpuid`
4062      * ourselves. Clang supports inline assembly anyway.
4063      */
4064     U32 n;
4065     __asm__(
4066         "pushq %%rbx\n\t"
4067         "cpuid\n\t"
4068         "popq %%rbx\n\t"
4069         : "=a"(n)
4070         : "a"(0)
4071         : "rcx", "rdx");
4072     if (n >= 1) {
4073       U32 f1a;
4074       __asm__(
4075           "pushq %%rbx\n\t"
4076           "cpuid\n\t"
4077           "popq %%rbx\n\t"
4078           : "=a"(f1a), "=c"(f1c), "=d"(f1d)
4079           : "a"(1)
4080           :);
4081     }
4082     if (n >= 7) {
4083       __asm__(
4084           "pushq %%rbx\n\t"
4085           "cpuid\n\t"
4086           "movq %%rbx, %%rax\n\t"
4087           "popq %%rbx"
4088           : "=a"(f7b), "=c"(f7c)
4089           : "a"(7), "c"(0)
4090           : "rdx");
4091     }
4092 #endif
4093 #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
4094     /* The following block like the normal cpuid branch below, but gcc
4095      * reserves ebx for use of its pic register so we must specially
4096      * handle the save and restore to avoid clobbering the register
4097      */
4098     U32 n;
4099     __asm__(
4100         "pushl %%ebx\n\t"
4101         "cpuid\n\t"
4102         "popl %%ebx\n\t"
4103         : "=a"(n)
4104         : "a"(0)
4105         : "ecx", "edx");
4106     if (n >= 1) {
4107       U32 f1a;
4108       __asm__(
4109           "pushl %%ebx\n\t"
4110           "cpuid\n\t"
4111           "popl %%ebx\n\t"
4112           : "=a"(f1a), "=c"(f1c), "=d"(f1d)
4113           : "a"(1));
4114     }
4115     if (n >= 7) {
4116       __asm__(
4117           "pushl %%ebx\n\t"
4118           "cpuid\n\t"
4119           "movl %%ebx, %%eax\n\t"
4120           "popl %%ebx"
4121           : "=a"(f7b), "=c"(f7c)
4122           : "a"(7), "c"(0)
4123           : "edx");
4124     }
4125 #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
4126     U32 n;
4127     __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
4128     if (n >= 1) {
4129       U32 f1a;
4130       __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
4131     }
4132     if (n >= 7) {
4133       U32 f7a;
4134       __asm__("cpuid"
4135               : "=a"(f7a), "=b"(f7b), "=c"(f7c)
4136               : "a"(7), "c"(0)
4137               : "edx");
4138     }
4139 #endif
4140     {
4141         ZSTD_cpuid_t cpuid;
4142         cpuid.f1c = f1c;
4143         cpuid.f1d = f1d;
4144         cpuid.f7b = f7b;
4145         cpuid.f7c = f7c;
4146         return cpuid;
4147     }
4148 }
4149
4150 #define X(name, r, bit)                                                        \
4151   MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) {                 \
4152     return ((cpuid.r) & (1U << bit)) != 0;                                     \
4153   }
4154
4155 /* cpuid(1): Processor Info and Feature Bits. */
4156 #define C(name, bit) X(name, f1c, bit)
4157   C(sse3, 0)
4158   C(pclmuldq, 1)
4159   C(dtes64, 2)
4160   C(monitor, 3)
4161   C(dscpl, 4)
4162   C(vmx, 5)
4163   C(smx, 6)
4164   C(eist, 7)
4165   C(tm2, 8)
4166   C(ssse3, 9)
4167   C(cnxtid, 10)
4168   C(fma, 12)
4169   C(cx16, 13)
4170   C(xtpr, 14)
4171   C(pdcm, 15)
4172   C(pcid, 17)
4173   C(dca, 18)
4174   C(sse41, 19)
4175   C(sse42, 20)
4176   C(x2apic, 21)
4177   C(movbe, 22)
4178   C(popcnt, 23)
4179   C(tscdeadline, 24)
4180   C(aes, 25)
4181   C(xsave, 26)
4182   C(osxsave, 27)
4183   C(avx, 28)
4184   C(f16c, 29)
4185   C(rdrand, 30)
4186 #undef C
4187 #define D(name, bit) X(name, f1d, bit)
4188   D(fpu, 0)
4189   D(vme, 1)
4190   D(de, 2)
4191   D(pse, 3)
4192   D(tsc, 4)
4193   D(msr, 5)
4194   D(pae, 6)
4195   D(mce, 7)
4196   D(cx8, 8)
4197   D(apic, 9)
4198   D(sep, 11)
4199   D(mtrr, 12)
4200   D(pge, 13)
4201   D(mca, 14)
4202   D(cmov, 15)
4203   D(pat, 16)
4204   D(pse36, 17)
4205   D(psn, 18)
4206   D(clfsh, 19)
4207   D(ds, 21)
4208   D(acpi, 22)
4209   D(mmx, 23)
4210   D(fxsr, 24)
4211   D(sse, 25)
4212   D(sse2, 26)
4213   D(ss, 27)
4214   D(htt, 28)
4215   D(tm, 29)
4216   D(pbe, 31)
4217 #undef D
4218
4219 /* cpuid(7): Extended Features. */
4220 #define B(name, bit) X(name, f7b, bit)
4221   B(bmi1, 3)
4222   B(hle, 4)
4223   B(avx2, 5)
4224   B(smep, 7)
4225   B(bmi2, 8)
4226   B(erms, 9)
4227   B(invpcid, 10)
4228   B(rtm, 11)
4229   B(mpx, 14)
4230   B(avx512f, 16)
4231   B(avx512dq, 17)
4232   B(rdseed, 18)
4233   B(adx, 19)
4234   B(smap, 20)
4235   B(avx512ifma, 21)
4236   B(pcommit, 22)
4237   B(clflushopt, 23)
4238   B(clwb, 24)
4239   B(avx512pf, 26)
4240   B(avx512er, 27)
4241   B(avx512cd, 28)
4242   B(sha, 29)
4243   B(avx512bw, 30)
4244   B(avx512vl, 31)
4245 #undef B
4246 #define C(name, bit) X(name, f7c, bit)
4247   C(prefetchwt1, 0)
4248   C(avx512vbmi, 1)
4249 #undef C
4250
4251 #undef X
4252
4253 #endif /* ZSTD_COMMON_CPU_H */
4254 /**** ended inlining cpu.h ****/
4255 /**** skipping file: mem.h ****/
4256 /**** skipping file: debug.h ****/
4257 /**** skipping file: error_private.h ****/
4258 #define ZSTD_STATIC_LINKING_ONLY
4259 /**** start inlining ../zstd.h ****/
4260 /*
4261  * Copyright (c) Meta Platforms, Inc. and affiliates.
4262  * All rights reserved.
4263  *
4264  * This source code is licensed under both the BSD-style license (found in the
4265  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
4266  * in the COPYING file in the root directory of this source tree).
4267  * You may select, at your option, one of the above-listed licenses.
4268  */
4269 #if defined (__cplusplus)
4270 extern "C" {
4271 #endif
4272
4273 #ifndef ZSTD_H_235446
4274 #define ZSTD_H_235446
4275
4276 /* ======   Dependencies   ======*/
4277 #include <limits.h>   /* INT_MAX */
4278 #include <stddef.h>   /* size_t */
4279
4280
4281 /* =====   ZSTDLIB_API : control library symbols visibility   ===== */
4282 #ifndef ZSTDLIB_VISIBLE
4283    /* Backwards compatibility with old macro name */
4284 #  ifdef ZSTDLIB_VISIBILITY
4285 #    define ZSTDLIB_VISIBLE ZSTDLIB_VISIBILITY
4286 #  elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
4287 #    define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default")))
4288 #  else
4289 #    define ZSTDLIB_VISIBLE
4290 #  endif
4291 #endif
4292
4293 #ifndef ZSTDLIB_HIDDEN
4294 #  if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
4295 #    define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden")))
4296 #  else
4297 #    define ZSTDLIB_HIDDEN
4298 #  endif
4299 #endif
4300
4301 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
4302 #  define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBLE
4303 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
4304 #  define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
4305 #else
4306 #  define ZSTDLIB_API ZSTDLIB_VISIBLE
4307 #endif
4308
4309 /* Deprecation warnings :
4310  * Should these warnings be a problem, it is generally possible to disable them,
4311  * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
4312  * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
4313  */
4314 #ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
4315 #  define ZSTD_DEPRECATED(message) /* disable deprecation warnings */
4316 #else
4317 #  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
4318 #    define ZSTD_DEPRECATED(message) [[deprecated(message)]]
4319 #  elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
4320 #    define ZSTD_DEPRECATED(message) __attribute__((deprecated(message)))
4321 #  elif defined(__GNUC__) && (__GNUC__ >= 3)
4322 #    define ZSTD_DEPRECATED(message) __attribute__((deprecated))
4323 #  elif defined(_MSC_VER)
4324 #    define ZSTD_DEPRECATED(message) __declspec(deprecated(message))
4325 #  else
4326 #    pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
4327 #    define ZSTD_DEPRECATED(message)
4328 #  endif
4329 #endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
4330
4331
4332 /*******************************************************************************
4333   Introduction
4334
4335   zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
4336   real-time compression scenarios at zlib-level and better compression ratios.
4337   The zstd compression library provides in-memory compression and decompression
4338   functions.
4339
4340   The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
4341   which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
4342   caution, as they require more memory. The library also offers negative
4343   compression levels, which extend the range of speed vs. ratio preferences.
4344   The lower the level, the faster the speed (at the cost of compression).
4345
4346   Compression can be done in:
4347     - a single step (described as Simple API)
4348     - a single step, reusing a context (described as Explicit context)
4349     - unbounded multiple steps (described as Streaming compression)
4350
4351   The compression ratio achievable on small data can be highly improved using
4352   a dictionary. Dictionary compression can be performed in:
4353     - a single step (described as Simple dictionary API)
4354     - a single step, reusing a dictionary (described as Bulk-processing
4355       dictionary API)
4356
4357   Advanced experimental functions can be accessed using
4358   `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
4359
4360   Advanced experimental APIs should never be used with a dynamically-linked
4361   library. They are not "stable"; their definitions or signatures may change in
4362   the future. Only static linking is allowed.
4363 *******************************************************************************/
4364
4365 /*------   Version   ------*/
4366 #define ZSTD_VERSION_MAJOR    1
4367 #define ZSTD_VERSION_MINOR    5
4368 #define ZSTD_VERSION_RELEASE  6
4369 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
4370
4371 /*! ZSTD_versionNumber() :
4372  *  Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */
4373 ZSTDLIB_API unsigned ZSTD_versionNumber(void);
4374
4375 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
4376 #define ZSTD_QUOTE(str) #str
4377 #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
4378 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
4379
4380 /*! ZSTD_versionString() :
4381  *  Return runtime library version, like "1.4.5". Requires v1.3.0+. */
4382 ZSTDLIB_API const char* ZSTD_versionString(void);
4383
4384 /* *************************************
4385  *  Default constant
4386  ***************************************/
4387 #ifndef ZSTD_CLEVEL_DEFAULT
4388 #  define ZSTD_CLEVEL_DEFAULT 3
4389 #endif
4390
4391 /* *************************************
4392  *  Constants
4393  ***************************************/
4394
4395 /* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
4396 #define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
4397 #define ZSTD_MAGIC_DICTIONARY       0xEC30A437    /* valid since v0.7.0 */
4398 #define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50    /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
4399 #define ZSTD_MAGIC_SKIPPABLE_MASK   0xFFFFFFF0
4400
4401 #define ZSTD_BLOCKSIZELOG_MAX  17
4402 #define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
4403
4404
4405 /***************************************
4406 *  Simple API
4407 ***************************************/
4408 /*! ZSTD_compress() :
4409  *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
4410  *  NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have
4411  *        enough space to successfully compress the data.
4412  *  @return : compressed size written into `dst` (<= `dstCapacity),
4413  *            or an error code if it fails (which can be tested using ZSTD_isError()). */
4414 ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
4415                             const void* src, size_t srcSize,
4416                                   int compressionLevel);
4417
4418 /*! ZSTD_decompress() :
4419  *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
4420  *  `dstCapacity` is an upper bound of originalSize to regenerate.
4421  *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
4422  *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
4423  *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
4424 ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
4425                               const void* src, size_t compressedSize);
4426
4427 /*! ZSTD_getFrameContentSize() : requires v1.3.0+
4428  *  `src` should point to the start of a ZSTD encoded frame.
4429  *  `srcSize` must be at least as large as the frame header.
4430  *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
4431  *  @return : - decompressed size of `src` frame content, if known
4432  *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
4433  *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
4434  *   note 1 : a 0 return value means the frame is valid but "empty".
4435  *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
4436  *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
4437  *            In which case, it's necessary to use streaming mode to decompress data.
4438  *            Optionally, application can rely on some implicit limit,
4439  *            as ZSTD_decompress() only needs an upper bound of decompressed size.
4440  *            (For example, data could be necessarily cut into blocks <= 16 KB).
4441  *   note 3 : decompressed size is always present when compression is completed using single-pass functions,
4442  *            such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
4443  *   note 4 : decompressed size can be very large (64-bits value),
4444  *            potentially larger than what local system can handle as a single memory segment.
4445  *            In which case, it's necessary to use streaming mode to decompress data.
4446  *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
4447  *            Always ensure return value fits within application's authorized limits.
4448  *            Each application can set its own limits.
4449  *   note 6 : This function replaces ZSTD_getDecompressedSize() */
4450 #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
4451 #define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
4452 ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
4453
4454 /*! ZSTD_getDecompressedSize() :
4455  *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
4456  *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
4457  *  "empty", "unknown" and "error" results to the same return value (0),
4458  *  while ZSTD_getFrameContentSize() gives them separate return values.
4459  * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
4460 ZSTD_DEPRECATED("Replaced by ZSTD_getFrameContentSize")
4461 ZSTDLIB_API
4462 unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
4463
4464 /*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
4465  * `src` should point to the start of a ZSTD frame or skippable frame.
4466  * `srcSize` must be >= first frame size
4467  * @return : the compressed size of the first frame starting at `src`,
4468  *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
4469  *        or an error code if input is invalid */
4470 ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
4471
4472
4473 /*======  Helper functions  ======*/
4474 /* ZSTD_compressBound() :
4475  * maximum compressed size in worst case single-pass scenario.
4476  * When invoking `ZSTD_compress()` or any other one-pass compression function,
4477  * it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize)
4478  * as it eliminates one potential failure scenario,
4479  * aka not enough room in dst buffer to write the compressed frame.
4480  * Note : ZSTD_compressBound() itself can fail, if @srcSize > ZSTD_MAX_INPUT_SIZE .
4481  *        In which case, ZSTD_compressBound() will return an error code
4482  *        which can be tested using ZSTD_isError().
4483  *
4484  * ZSTD_COMPRESSBOUND() :
4485  * same as ZSTD_compressBound(), but as a macro.
4486  * It can be used to produce constants, which can be useful for static allocation,
4487  * for example to size a static array on stack.
4488  * Will produce constant value 0 if srcSize too large.
4489  */
4490 #define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U)
4491 #define ZSTD_COMPRESSBOUND(srcSize)   (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
4492 ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
4493 /* ZSTD_isError() :
4494  * Most ZSTD_* functions returning a size_t value can be tested for error,
4495  * using ZSTD_isError().
4496  * @return 1 if error, 0 otherwise
4497  */
4498 ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
4499 ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
4500 ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed, requires v1.4.0+ */
4501 ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
4502 ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */
4503
4504
4505 /***************************************
4506 *  Explicit context
4507 ***************************************/
4508 /*= Compression context
4509  *  When compressing many times,
4510  *  it is recommended to allocate a context just once,
4511  *  and reuse it for each successive compression operation.
4512  *  This will make workload friendlier for system's memory.
4513  *  Note : re-using context is just a speed / resource optimization.
4514  *         It doesn't change the compression ratio, which remains identical.
4515  *  Note 2 : In multi-threaded environments,
4516  *         use one different context per thread for parallel execution.
4517  */
4518 typedef struct ZSTD_CCtx_s ZSTD_CCtx;
4519 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
4520 ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer */
4521
4522 /*! ZSTD_compressCCtx() :
4523  *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
4524  *  Important : in order to mirror `ZSTD_compress()` behavior,
4525  *  this function compresses at the requested compression level,
4526  *  __ignoring any other advanced parameter__ .
4527  *  If any advanced parameter was set using the advanced API,
4528  *  they will all be reset. Only `compressionLevel` remains.
4529  */
4530 ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
4531                                      void* dst, size_t dstCapacity,
4532                                const void* src, size_t srcSize,
4533                                      int compressionLevel);
4534
4535 /*= Decompression context
4536  *  When decompressing many times,
4537  *  it is recommended to allocate a context only once,
4538  *  and reuse it for each successive compression operation.
4539  *  This will make workload friendlier for system's memory.
4540  *  Use one context per thread for parallel execution. */
4541 typedef struct ZSTD_DCtx_s ZSTD_DCtx;
4542 ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
4543 ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer */
4544
4545 /*! ZSTD_decompressDCtx() :
4546  *  Same as ZSTD_decompress(),
4547  *  requires an allocated ZSTD_DCtx.
4548  *  Compatible with sticky parameters (see below).
4549  */
4550 ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
4551                                        void* dst, size_t dstCapacity,
4552                                  const void* src, size_t srcSize);
4553
4554
4555 /*********************************************
4556 *  Advanced compression API (Requires v1.4.0+)
4557 **********************************************/
4558
4559 /* API design :
4560  *   Parameters are pushed one by one into an existing context,
4561  *   using ZSTD_CCtx_set*() functions.
4562  *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
4563  *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
4564  *   __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ .
4565  *
4566  *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
4567  *
4568  *   This API supersedes all other "advanced" API entry points in the experimental section.
4569  *   In the future, we expect to remove API entry points from experimental which are redundant with this API.
4570  */
4571
4572
4573 /* Compression strategies, listed from fastest to strongest */
4574 typedef enum { ZSTD_fast=1,
4575                ZSTD_dfast=2,
4576                ZSTD_greedy=3,
4577                ZSTD_lazy=4,
4578                ZSTD_lazy2=5,
4579                ZSTD_btlazy2=6,
4580                ZSTD_btopt=7,
4581                ZSTD_btultra=8,
4582                ZSTD_btultra2=9
4583                /* note : new strategies _might_ be added in the future.
4584                          Only the order (from fast to strong) is guaranteed */
4585 } ZSTD_strategy;
4586
4587 typedef enum {
4588
4589     /* compression parameters
4590      * Note: When compressing with a ZSTD_CDict these parameters are superseded
4591      * by the parameters used to construct the ZSTD_CDict.
4592      * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
4593     ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
4594                               * Note that exact compression parameters are dynamically determined,
4595                               * depending on both compression level and srcSize (when known).
4596                               * Default level is ZSTD_CLEVEL_DEFAULT==3.
4597                               * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
4598                               * Note 1 : it's possible to pass a negative compression level.
4599                               * Note 2 : setting a level does not automatically set all other compression parameters
4600                               *   to default. Setting this will however eventually dynamically impact the compression
4601                               *   parameters which have not been manually set. The manually set
4602                               *   ones will 'stick'. */
4603     /* Advanced compression parameters :
4604      * It's possible to pin down compression parameters to some specific values.
4605      * In which case, these values are no longer dynamically selected by the compressor */
4606     ZSTD_c_windowLog=101,    /* Maximum allowed back-reference distance, expressed as power of 2.
4607                               * This will set a memory budget for streaming decompression,
4608                               * with larger values requiring more memory
4609                               * and typically compressing more.
4610                               * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
4611                               * Special: value 0 means "use default windowLog".
4612                               * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
4613                               *       requires explicitly allowing such size at streaming decompression stage. */
4614     ZSTD_c_hashLog=102,      /* Size of the initial probe table, as a power of 2.
4615                               * Resulting memory usage is (1 << (hashLog+2)).
4616                               * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
4617                               * Larger tables improve compression ratio of strategies <= dFast,
4618                               * and improve speed of strategies > dFast.
4619                               * Special: value 0 means "use default hashLog". */
4620     ZSTD_c_chainLog=103,     /* Size of the multi-probe search table, as a power of 2.
4621                               * Resulting memory usage is (1 << (chainLog+2)).
4622                               * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
4623                               * Larger tables result in better and slower compression.
4624                               * This parameter is useless for "fast" strategy.
4625                               * It's still useful when using "dfast" strategy,
4626                               * in which case it defines a secondary probe table.
4627                               * Special: value 0 means "use default chainLog". */
4628     ZSTD_c_searchLog=104,    /* Number of search attempts, as a power of 2.
4629                               * More attempts result in better and slower compression.
4630                               * This parameter is useless for "fast" and "dFast" strategies.
4631                               * Special: value 0 means "use default searchLog". */
4632     ZSTD_c_minMatch=105,     /* Minimum size of searched matches.
4633                               * Note that Zstandard can still find matches of smaller size,
4634                               * it just tweaks its search algorithm to look for this size and larger.
4635                               * Larger values increase compression and decompression speed, but decrease ratio.
4636                               * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
4637                               * Note that currently, for all strategies < btopt, effective minimum is 4.
4638                               *                    , for all strategies > fast, effective maximum is 6.
4639                               * Special: value 0 means "use default minMatchLength". */
4640     ZSTD_c_targetLength=106, /* Impact of this field depends on strategy.
4641                               * For strategies btopt, btultra & btultra2:
4642                               *     Length of Match considered "good enough" to stop search.
4643                               *     Larger values make compression stronger, and slower.
4644                               * For strategy fast:
4645                               *     Distance between match sampling.
4646                               *     Larger values make compression faster, and weaker.
4647                               * Special: value 0 means "use default targetLength". */
4648     ZSTD_c_strategy=107,     /* See ZSTD_strategy enum definition.
4649                               * The higher the value of selected strategy, the more complex it is,
4650                               * resulting in stronger and slower compression.
4651                               * Special: value 0 means "use default strategy". */
4652
4653     ZSTD_c_targetCBlockSize=130, /* v1.5.6+
4654                                   * Attempts to fit compressed block size into approximatively targetCBlockSize.
4655                                   * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
4656                                   * Note that it's not a guarantee, just a convergence target (default:0).
4657                                   * No target when targetCBlockSize == 0.
4658                                   * This is helpful in low bandwidth streaming environments to improve end-to-end latency,
4659                                   * when a client can make use of partial documents (a prominent example being Chrome).
4660                                   * Note: this parameter is stable since v1.5.6.
4661                                   * It was present as an experimental parameter in earlier versions,
4662                                   * but it's not recommended using it with earlier library versions
4663                                   * due to massive performance regressions.
4664                                   */
4665     /* LDM mode parameters */
4666     ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
4667                                      * This parameter is designed to improve compression ratio
4668                                      * for large inputs, by finding large matches at long distance.
4669                                      * It increases memory usage and window size.
4670                                      * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
4671                                      * except when expressly set to a different value.
4672                                      * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and
4673                                      * compression strategy >= ZSTD_btopt (== compression level 16+) */
4674     ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
4675                               * Larger values increase memory usage and compression ratio,
4676                               * but decrease compression speed.
4677                               * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
4678                               * default: windowlog - 7.
4679                               * Special: value 0 means "automatically determine hashlog". */
4680     ZSTD_c_ldmMinMatch=162,  /* Minimum match size for long distance matcher.
4681                               * Larger/too small values usually decrease compression ratio.
4682                               * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
4683                               * Special: value 0 means "use default value" (default: 64). */
4684     ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution.
4685                               * Larger values improve collision resolution but decrease compression speed.
4686                               * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
4687                               * Special: value 0 means "use default value" (default: 3). */
4688     ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table.
4689                               * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
4690                               * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
4691                               * Larger values improve compression speed.
4692                               * Deviating far from default value will likely result in a compression ratio decrease.
4693                               * Special: value 0 means "automatically determine hashRateLog". */
4694
4695     /* frame parameters */
4696     ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
4697                               * Content size must be known at the beginning of compression.
4698                               * This is automatically the case when using ZSTD_compress2(),
4699                               * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
4700     ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
4701     ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */
4702
4703     /* multi-threading parameters */
4704     /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
4705      * Otherwise, trying to set any other value than default (0) will be a no-op and return an error.
4706      * In a situation where it's unknown if the linked library supports multi-threading or not,
4707      * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property.
4708      */
4709     ZSTD_c_nbWorkers=400,    /* Select how many threads will be spawned to compress in parallel.
4710                               * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() :
4711                               * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
4712                               * while compression is performed in parallel, within worker thread(s).
4713                               * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
4714                               *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
4715                               * More workers improve speed, but also increase memory usage.
4716                               * Default value is `0`, aka "single-threaded mode" : no worker is spawned,
4717                               * compression is performed inside Caller's thread, and all invocations are blocking */
4718     ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
4719                               * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
4720                               * 0 means default, which is dynamically determined based on compression parameters.
4721                               * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest.
4722                               * The minimum size is automatically and transparently enforced. */
4723     ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
4724                               * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
4725                               * It helps preserve compression ratio, while each job is compressed in parallel.
4726                               * This value is enforced only when nbWorkers >= 1.
4727                               * Larger values increase compression ratio, but decrease speed.
4728                               * Possible values range from 0 to 9 :
4729                               * - 0 means "default" : value will be determined by the library, depending on strategy
4730                               * - 1 means "no overlap"
4731                               * - 9 means "full overlap", using a full window size.
4732                               * Each intermediate rank increases/decreases load size by a factor 2 :
4733                               * 9: full window;  8: w/2;  7: w/4;  6: w/8;  5:w/16;  4: w/32;  3:w/64;  2:w/128;  1:no overlap;  0:default
4734                               * default value varies between 6 and 9, depending on strategy */
4735
4736     /* note : additional experimental parameters are also available
4737      * within the experimental section of the API.
4738      * At the time of this writing, they include :
4739      * ZSTD_c_rsyncable
4740      * ZSTD_c_format
4741      * ZSTD_c_forceMaxWindow
4742      * ZSTD_c_forceAttachDict
4743      * ZSTD_c_literalCompressionMode
4744      * ZSTD_c_srcSizeHint
4745      * ZSTD_c_enableDedicatedDictSearch
4746      * ZSTD_c_stableInBuffer
4747      * ZSTD_c_stableOutBuffer
4748      * ZSTD_c_blockDelimiters
4749      * ZSTD_c_validateSequences
4750      * ZSTD_c_useBlockSplitter
4751      * ZSTD_c_useRowMatchFinder
4752      * ZSTD_c_prefetchCDictTables
4753      * ZSTD_c_enableSeqProducerFallback
4754      * ZSTD_c_maxBlockSize
4755      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
4756      * note : never ever use experimentalParam? names directly;
4757      *        also, the enums values themselves are unstable and can still change.
4758      */
4759      ZSTD_c_experimentalParam1=500,
4760      ZSTD_c_experimentalParam2=10,
4761      ZSTD_c_experimentalParam3=1000,
4762      ZSTD_c_experimentalParam4=1001,
4763      ZSTD_c_experimentalParam5=1002,
4764      /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */
4765      ZSTD_c_experimentalParam7=1004,
4766      ZSTD_c_experimentalParam8=1005,
4767      ZSTD_c_experimentalParam9=1006,
4768      ZSTD_c_experimentalParam10=1007,
4769      ZSTD_c_experimentalParam11=1008,
4770      ZSTD_c_experimentalParam12=1009,
4771      ZSTD_c_experimentalParam13=1010,
4772      ZSTD_c_experimentalParam14=1011,
4773      ZSTD_c_experimentalParam15=1012,
4774      ZSTD_c_experimentalParam16=1013,
4775      ZSTD_c_experimentalParam17=1014,
4776      ZSTD_c_experimentalParam18=1015,
4777      ZSTD_c_experimentalParam19=1016
4778 } ZSTD_cParameter;
4779
4780 typedef struct {
4781     size_t error;
4782     int lowerBound;
4783     int upperBound;
4784 } ZSTD_bounds;
4785
4786 /*! ZSTD_cParam_getBounds() :
4787  *  All parameters must belong to an interval with lower and upper bounds,
4788  *  otherwise they will either trigger an error or be automatically clamped.
4789  * @return : a structure, ZSTD_bounds, which contains
4790  *         - an error status field, which must be tested using ZSTD_isError()
4791  *         - lower and upper bounds, both inclusive
4792  */
4793 ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
4794
4795 /*! ZSTD_CCtx_setParameter() :
4796  *  Set one compression parameter, selected by enum ZSTD_cParameter.
4797  *  All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
4798  *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
4799  *  Setting a parameter is generally only possible during frame initialization (before starting compression).
4800  *  Exception : when using multi-threading mode (nbWorkers >= 1),
4801  *              the following parameters can be updated _during_ compression (within same frame):
4802  *              => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
4803  *              new parameters will be active for next job only (after a flush()).
4804  * @return : an error code (which can be tested using ZSTD_isError()).
4805  */
4806 ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
4807
4808 /*! ZSTD_CCtx_setPledgedSrcSize() :
4809  *  Total input data size to be compressed as a single frame.
4810  *  Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
4811  *  This value will also be controlled at end of frame, and trigger an error if not respected.
4812  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
4813  *  Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
4814  *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
4815  *           ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
4816  *  Note 2 : pledgedSrcSize is only valid once, for the next frame.
4817  *           It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
4818  *  Note 3 : Whenever all input data is provided and consumed in a single round,
4819  *           for example with ZSTD_compress2(),
4820  *           or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
4821  *           this value is automatically overridden by srcSize instead.
4822  */
4823 ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
4824
4825 typedef enum {
4826     ZSTD_reset_session_only = 1,
4827     ZSTD_reset_parameters = 2,
4828     ZSTD_reset_session_and_parameters = 3
4829 } ZSTD_ResetDirective;
4830
4831 /*! ZSTD_CCtx_reset() :
4832  *  There are 2 different things that can be reset, independently or jointly :
4833  *  - The session : will stop compressing current frame, and make CCtx ready to start a new one.
4834  *                  Useful after an error, or to interrupt any ongoing compression.
4835  *                  Any internal data not yet flushed is cancelled.
4836  *                  Compression parameters and dictionary remain unchanged.
4837  *                  They will be used to compress next frame.
4838  *                  Resetting session never fails.
4839  *  - The parameters : changes all parameters back to "default".
4840  *                  This also removes any reference to any dictionary or external sequence producer.
4841  *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
4842  *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
4843  *  - Both : similar to resetting the session, followed by resetting parameters.
4844  */
4845 ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
4846
4847 /*! ZSTD_compress2() :
4848  *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
4849  *  (note that this entry point doesn't even expose a compression level parameter).
4850  *  ZSTD_compress2() always starts a new frame.
4851  *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
4852  *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
4853  *  - The function is always blocking, returns when compression is completed.
4854  *  NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have
4855  *        enough space to successfully compress the data, though it is possible it fails for other reasons.
4856  * @return : compressed size written into `dst` (<= `dstCapacity),
4857  *           or an error code if it fails (which can be tested using ZSTD_isError()).
4858  */
4859 ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
4860                                    void* dst, size_t dstCapacity,
4861                              const void* src, size_t srcSize);
4862
4863
4864 /***********************************************
4865 *  Advanced decompression API (Requires v1.4.0+)
4866 ************************************************/
4867
4868 /* The advanced API pushes parameters one by one into an existing DCtx context.
4869  * Parameters are sticky, and remain valid for all following frames
4870  * using the same DCtx context.
4871  * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
4872  * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
4873  *        Therefore, no new decompression function is necessary.
4874  */
4875
4876 typedef enum {
4877
4878     ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which
4879                               * the streaming API will refuse to allocate memory buffer
4880                               * in order to protect the host from unreasonable memory requirements.
4881                               * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
4882                               * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
4883                               * Special: value 0 means "use default maximum windowLog". */
4884
4885     /* note : additional experimental parameters are also available
4886      * within the experimental section of the API.
4887      * At the time of this writing, they include :
4888      * ZSTD_d_format
4889      * ZSTD_d_stableOutBuffer
4890      * ZSTD_d_forceIgnoreChecksum
4891      * ZSTD_d_refMultipleDDicts
4892      * ZSTD_d_disableHuffmanAssembly
4893      * ZSTD_d_maxBlockSize
4894      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
4895      * note : never ever use experimentalParam? names directly
4896      */
4897      ZSTD_d_experimentalParam1=1000,
4898      ZSTD_d_experimentalParam2=1001,
4899      ZSTD_d_experimentalParam3=1002,
4900      ZSTD_d_experimentalParam4=1003,
4901      ZSTD_d_experimentalParam5=1004,
4902      ZSTD_d_experimentalParam6=1005
4903
4904 } ZSTD_dParameter;
4905
4906 /*! ZSTD_dParam_getBounds() :
4907  *  All parameters must belong to an interval with lower and upper bounds,
4908  *  otherwise they will either trigger an error or be automatically clamped.
4909  * @return : a structure, ZSTD_bounds, which contains
4910  *         - an error status field, which must be tested using ZSTD_isError()
4911  *         - both lower and upper bounds, inclusive
4912  */
4913 ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
4914
4915 /*! ZSTD_DCtx_setParameter() :
4916  *  Set one compression parameter, selected by enum ZSTD_dParameter.
4917  *  All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
4918  *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
4919  *  Setting a parameter is only possible during frame initialization (before starting decompression).
4920  * @return : 0, or an error code (which can be tested using ZSTD_isError()).
4921  */
4922 ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
4923
4924 /*! ZSTD_DCtx_reset() :
4925  *  Return a DCtx to clean state.
4926  *  Session and parameters can be reset jointly or separately.
4927  *  Parameters can only be reset when no active frame is being decompressed.
4928  * @return : 0, or an error code, which can be tested with ZSTD_isError()
4929  */
4930 ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
4931
4932
4933 /****************************
4934 *  Streaming
4935 ****************************/
4936
4937 typedef struct ZSTD_inBuffer_s {
4938   const void* src;    /**< start of input buffer */
4939   size_t size;        /**< size of input buffer */
4940   size_t pos;         /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
4941 } ZSTD_inBuffer;
4942
4943 typedef struct ZSTD_outBuffer_s {
4944   void*  dst;         /**< start of output buffer */
4945   size_t size;        /**< size of output buffer */
4946   size_t pos;         /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
4947 } ZSTD_outBuffer;
4948
4949
4950
4951 /*-***********************************************************************
4952 *  Streaming compression - HowTo
4953 *
4954 *  A ZSTD_CStream object is required to track streaming operation.
4955 *  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
4956 *  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
4957 *  It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
4958 *
4959 *  For parallel execution, use one separate ZSTD_CStream per thread.
4960 *
4961 *  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
4962 *
4963 *  Parameters are sticky : when starting a new compression on the same context,
4964 *  it will reuse the same sticky parameters as previous compression session.
4965 *  When in doubt, it's recommended to fully initialize the context before usage.
4966 *  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
4967 *  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
4968 *  set more specific parameters, the pledged source size, or load a dictionary.
4969 *
4970 *  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
4971 *  consume input stream. The function will automatically update both `pos`
4972 *  fields within `input` and `output`.
4973 *  Note that the function may not consume the entire input, for example, because
4974 *  the output buffer is already full, in which case `input.pos < input.size`.
4975 *  The caller must check if input has been entirely consumed.
4976 *  If not, the caller must make some room to receive more compressed data,
4977 *  and then present again remaining input data.
4978 *  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
4979 *        but doesn't guarantee maximal forward progress. This is especially relevant
4980 *        when compressing with multiple threads. The call won't block if it can
4981 *        consume some input, but if it can't it will wait for some, but not all,
4982 *        output to be flushed.
4983 * @return : provides a minimum amount of data remaining to be flushed from internal buffers
4984 *           or an error code, which can be tested using ZSTD_isError().
4985 *
4986 *  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
4987 *  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
4988 *  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
4989 *  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
4990 *  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
4991 *  operation.
4992 *  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
4993 *        block until the flush is complete or the output buffer is full.
4994 *  @return : 0 if internal buffers are entirely flushed,
4995 *            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
4996 *            or an error code, which can be tested using ZSTD_isError().
4997 *
4998 *  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
4999 *  It will perform a flush and write frame epilogue.
5000 *  The epilogue is required for decoders to consider a frame completed.
5001 *  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
5002 *  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
5003 *  start a new frame.
5004 *  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
5005 *        block until the flush is complete or the output buffer is full.
5006 *  @return : 0 if frame fully completed and fully flushed,
5007 *            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
5008 *            or an error code, which can be tested using ZSTD_isError().
5009 *
5010 * *******************************************************************/
5011
5012 typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
5013                                  /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
5014 /*===== ZSTD_CStream management functions =====*/
5015 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
5016 ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);  /* accept NULL pointer */
5017
5018 /*===== Streaming compression functions =====*/
5019 typedef enum {
5020     ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
5021     ZSTD_e_flush=1,    /* flush any data provided so far,
5022                         * it creates (at least) one new block, that can be decoded immediately on reception;
5023                         * frame will continue: any future data can still reference previously compressed data, improving compression.
5024                         * note : multithreaded compression will block to flush as much output as possible. */
5025     ZSTD_e_end=2       /* flush any remaining data _and_ close current frame.
5026                         * note that frame is only closed after compressed data is fully flushed (return value == 0).
5027                         * After that point, any additional data starts a new frame.
5028                         * note : each frame is independent (does not reference any content from previous frame).
5029                         : note : multithreaded compression will block to flush as much output as possible. */
5030 } ZSTD_EndDirective;
5031
5032 /*! ZSTD_compressStream2() : Requires v1.4.0+
5033  *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
5034  *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
5035  *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
5036  *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
5037  *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
5038  *  - endOp must be a valid directive
5039  *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
5040  *  - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available,
5041  *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
5042  *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
5043  *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
5044  *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
5045  *            or an error code, which can be tested using ZSTD_isError().
5046  *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
5047  *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
5048  *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
5049  *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
5050  *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
5051  *            Before starting a new compression job, or changing compression parameters,
5052  *            it is required to fully flush internal buffers.
5053  *  - note: if an operation ends with an error, it may leave @cctx in an undefined state.
5054  *          Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state.
5055  *          In order to be re-employed after an error, a state must be reset,
5056  *          which can be done explicitly (ZSTD_CCtx_reset()),
5057  *          or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx())
5058  */
5059 ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
5060                                          ZSTD_outBuffer* output,
5061                                          ZSTD_inBuffer* input,
5062                                          ZSTD_EndDirective endOp);
5063
5064
5065 /* These buffer sizes are softly recommended.
5066  * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
5067  * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
5068  * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
5069  *
5070  * However, note that these recommendations are from the perspective of a C caller program.
5071  * If the streaming interface is invoked from some other language,
5072  * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
5073  * a major performance rule is to reduce crossing such interface to an absolute minimum.
5074  * It's not rare that performance ends being spent more into the interface, rather than compression itself.
5075  * In which cases, prefer using large buffers, as large as practical,
5076  * for both input and output, to reduce the nb of roundtrips.
5077  */
5078 ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
5079 ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
5080
5081
5082 /* *****************************************************************************
5083  * This following is a legacy streaming API, available since v1.0+ .
5084  * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
5085  * It is redundant, but remains fully supported.
5086  ******************************************************************************/
5087
5088 /*!
5089  * Equivalent to:
5090  *
5091  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
5092  *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
5093  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
5094  *
5095  * Note that ZSTD_initCStream() clears any previously set dictionary. Use the new API
5096  * to compress with a dictionary.
5097  */
5098 ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
5099 /*!
5100  * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
5101  * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
5102  * the next read size (if non-zero and not an error). ZSTD_compressStream2()
5103  * returns the minimum nb of bytes left to flush (if non-zero and not an error).
5104  */
5105 ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
5106 /*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
5107 ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
5108 /*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
5109 ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
5110
5111
5112 /*-***************************************************************************
5113 *  Streaming decompression - HowTo
5114 *
5115 *  A ZSTD_DStream object is required to track streaming operations.
5116 *  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
5117 *  ZSTD_DStream objects can be reused multiple times.
5118 *
5119 *  Use ZSTD_initDStream() to start a new decompression operation.
5120 * @return : recommended first input size
5121 *  Alternatively, use advanced API to set specific properties.
5122 *
5123 *  Use ZSTD_decompressStream() repetitively to consume your input.
5124 *  The function will update both `pos` fields.
5125 *  If `input.pos < input.size`, some input has not been consumed.
5126 *  It's up to the caller to present again remaining data.
5127 *  The function tries to flush all data decoded immediately, respecting output buffer size.
5128 *  If `output.pos < output.size`, decoder has flushed everything it could.
5129 *  But if `output.pos == output.size`, there might be some data left within internal buffers.,
5130 *  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
5131 *  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
5132 * @return : 0 when a frame is completely decoded and fully flushed,
5133 *        or an error code, which can be tested using ZSTD_isError(),
5134 *        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
5135 *                                the return value is a suggested next input size (just a hint for better latency)
5136 *                                that will never request more than the remaining frame size.
5137 * *******************************************************************************/
5138
5139 typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
5140                                  /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
5141 /*===== ZSTD_DStream management functions =====*/
5142 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
5143 ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);  /* accept NULL pointer */
5144
5145 /*===== Streaming decompression functions =====*/
5146
5147 /*! ZSTD_initDStream() :
5148  * Initialize/reset DStream state for new decompression operation.
5149  * Call before new decompression operation using same DStream.
5150  *
5151  * Note : This function is redundant with the advanced API and equivalent to:
5152  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
5153  *     ZSTD_DCtx_refDDict(zds, NULL);
5154  */
5155 ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
5156
5157 /*! ZSTD_decompressStream() :
5158  * Streaming decompression function.
5159  * Call repetitively to consume full input updating it as necessary.
5160  * Function will update both input and output `pos` fields exposing current state via these fields:
5161  * - `input.pos < input.size`, some input remaining and caller should provide remaining input
5162  *   on the next call.
5163  * - `output.pos < output.size`, decoder finished and flushed all remaining buffers.
5164  * - `output.pos == output.size`, potentially uncflushed data present in the internal buffers,
5165  *   call ZSTD_decompressStream() again to flush remaining data to output.
5166  * Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX.
5167  *
5168  * @return : 0 when a frame is completely decoded and fully flushed,
5169  *           or an error code, which can be tested using ZSTD_isError(),
5170  *           or any other value > 0, which means there is some decoding or flushing to do to complete current frame.
5171  *
5172  * Note: when an operation returns with an error code, the @zds state may be left in undefined state.
5173  *       It's UB to invoke `ZSTD_decompressStream()` on such a state.
5174  *       In order to re-use such a state, it must be first reset,
5175  *       which can be done explicitly (`ZSTD_DCtx_reset()`),
5176  *       or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`)
5177  */
5178 ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
5179
5180 ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
5181 ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
5182
5183
5184 /**************************
5185 *  Simple dictionary API
5186 ***************************/
5187 /*! ZSTD_compress_usingDict() :
5188  *  Compression at an explicit compression level using a Dictionary.
5189  *  A dictionary can be any arbitrary data segment (also called a prefix),
5190  *  or a buffer with specified information (see zdict.h).
5191  *  Note : This function loads the dictionary, resulting in significant startup delay.
5192  *         It's intended for a dictionary used only once.
5193  *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
5194 ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
5195                                            void* dst, size_t dstCapacity,
5196                                      const void* src, size_t srcSize,
5197                                      const void* dict,size_t dictSize,
5198                                            int compressionLevel);
5199
5200 /*! ZSTD_decompress_usingDict() :
5201  *  Decompression using a known Dictionary.
5202  *  Dictionary must be identical to the one used during compression.
5203  *  Note : This function loads the dictionary, resulting in significant startup delay.
5204  *         It's intended for a dictionary used only once.
5205  *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
5206 ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
5207                                              void* dst, size_t dstCapacity,
5208                                        const void* src, size_t srcSize,
5209                                        const void* dict,size_t dictSize);
5210
5211
5212 /***********************************
5213  *  Bulk processing dictionary API
5214  **********************************/
5215 typedef struct ZSTD_CDict_s ZSTD_CDict;
5216
5217 /*! ZSTD_createCDict() :
5218  *  When compressing multiple messages or blocks using the same dictionary,
5219  *  it's recommended to digest the dictionary only once, since it's a costly operation.
5220  *  ZSTD_createCDict() will create a state from digesting a dictionary.
5221  *  The resulting state can be used for future compression operations with very limited startup cost.
5222  *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
5223  * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
5224  *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
5225  *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
5226  *      in which case the only thing that it transports is the @compressionLevel.
5227  *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
5228  *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
5229 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
5230                                          int compressionLevel);
5231
5232 /*! ZSTD_freeCDict() :
5233  *  Function frees memory allocated by ZSTD_createCDict().
5234  *  If a NULL pointer is passed, no operation is performed. */
5235 ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
5236
5237 /*! ZSTD_compress_usingCDict() :
5238  *  Compression using a digested Dictionary.
5239  *  Recommended when same dictionary is used multiple times.
5240  *  Note : compression level is _decided at dictionary creation time_,
5241  *     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
5242 ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
5243                                             void* dst, size_t dstCapacity,
5244                                       const void* src, size_t srcSize,
5245                                       const ZSTD_CDict* cdict);
5246
5247
5248 typedef struct ZSTD_DDict_s ZSTD_DDict;
5249
5250 /*! ZSTD_createDDict() :
5251  *  Create a digested dictionary, ready to start decompression operation without startup delay.
5252  *  dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
5253 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
5254
5255 /*! ZSTD_freeDDict() :
5256  *  Function frees memory allocated with ZSTD_createDDict()
5257  *  If a NULL pointer is passed, no operation is performed. */
5258 ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
5259
5260 /*! ZSTD_decompress_usingDDict() :
5261  *  Decompression using a digested Dictionary.
5262  *  Recommended when same dictionary is used multiple times. */
5263 ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
5264                                               void* dst, size_t dstCapacity,
5265                                         const void* src, size_t srcSize,
5266                                         const ZSTD_DDict* ddict);
5267
5268
5269 /********************************
5270  *  Dictionary helper functions
5271  *******************************/
5272
5273 /*! ZSTD_getDictID_fromDict() : Requires v1.4.0+
5274  *  Provides the dictID stored within dictionary.
5275  *  if @return == 0, the dictionary is not conformant with Zstandard specification.
5276  *  It can still be loaded, but as a content-only dictionary. */
5277 ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
5278
5279 /*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+
5280  *  Provides the dictID of the dictionary loaded into `cdict`.
5281  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
5282  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
5283 ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
5284
5285 /*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+
5286  *  Provides the dictID of the dictionary loaded into `ddict`.
5287  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
5288  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
5289 ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
5290
5291 /*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+
5292  *  Provides the dictID required to decompressed the frame stored within `src`.
5293  *  If @return == 0, the dictID could not be decoded.
5294  *  This could for one of the following reasons :
5295  *  - The frame does not require a dictionary to be decoded (most common case).
5296  *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden piece of information.
5297  *    Note : this use case also happens when using a non-conformant dictionary.
5298  *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
5299  *  - This is not a Zstandard frame.
5300  *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
5301 ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
5302
5303
5304 /*******************************************************************************
5305  * Advanced dictionary and prefix API (Requires v1.4.0+)
5306  *
5307  * This API allows dictionaries to be used with ZSTD_compress2(),
5308  * ZSTD_compressStream2(), and ZSTD_decompressDCtx().
5309  * Dictionaries are sticky, they remain valid when same context is reused,
5310  * they only reset when the context is reset
5311  * with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters.
5312  * In contrast, Prefixes are single-use.
5313  ******************************************************************************/
5314
5315
5316 /*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+
5317  *  Create an internal CDict from `dict` buffer.
5318  *  Decompression will have to use same dictionary.
5319  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
5320  *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
5321  *           meaning "return to no-dictionary mode".
5322  *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames,
5323  *           until parameters are reset, a new dictionary is loaded, or the dictionary
5324  *           is explicitly invalidated by loading a NULL dictionary.
5325  *  Note 2 : Loading a dictionary involves building tables.
5326  *           It's also a CPU consuming operation, with non-negligible impact on latency.
5327  *           Tables are dependent on compression parameters, and for this reason,
5328  *           compression parameters can no longer be changed after loading a dictionary.
5329  *  Note 3 :`dict` content will be copied internally.
5330  *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
5331  *           In such a case, dictionary buffer must outlive its users.
5332  *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
5333  *           to precisely select how dictionary content must be interpreted.
5334  *  Note 5 : This method does not benefit from LDM (long distance mode).
5335  *           If you want to employ LDM on some large dictionary content,
5336  *           prefer employing ZSTD_CCtx_refPrefix() described below.
5337  */
5338 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
5339
5340 /*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
5341  *  Reference a prepared dictionary, to be used for all future compressed frames.
5342  *  Note that compression parameters are enforced from within CDict,
5343  *  and supersede any compression parameter previously set within CCtx.
5344  *  The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
5345  *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
5346  *  The dictionary will remain valid for future compressed frames using same CCtx.
5347  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
5348  *  Special : Referencing a NULL CDict means "return to no-dictionary mode".
5349  *  Note 1 : Currently, only one dictionary can be managed.
5350  *           Referencing a new dictionary effectively "discards" any previous one.
5351  *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
5352 ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
5353
5354 /*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+
5355  *  Reference a prefix (single-usage dictionary) for next compressed frame.
5356  *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
5357  *  Decompression will need same prefix to properly regenerate data.
5358  *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
5359  *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
5360  *  This method is compatible with LDM (long distance mode).
5361  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
5362  *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
5363  *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
5364  *           Its content must remain unmodified during compression.
5365  *  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
5366  *           ensure that the window size is large enough to contain the entire source.
5367  *           See ZSTD_c_windowLog.
5368  *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
5369  *           It's a CPU consuming operation, with non-negligible impact on latency.
5370  *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
5371  *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
5372  *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
5373 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
5374                                  const void* prefix, size_t prefixSize);
5375
5376 /*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+
5377  *  Create an internal DDict from dict buffer, to be used to decompress all future frames.
5378  *  The dictionary remains valid for all future frames, until explicitly invalidated, or
5379  *  a new dictionary is loaded.
5380  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
5381  *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
5382  *            meaning "return to no-dictionary mode".
5383  *  Note 1 : Loading a dictionary involves building tables,
5384  *           which has a non-negligible impact on CPU usage and latency.
5385  *           It's recommended to "load once, use many times", to amortize the cost
5386  *  Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
5387  *           Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
5388  *  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
5389  *           how dictionary content is loaded and interpreted.
5390  */
5391 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
5392
5393 /*! ZSTD_DCtx_refDDict() : Requires v1.4.0+
5394  *  Reference a prepared dictionary, to be used to decompress next frames.
5395  *  The dictionary remains active for decompression of future frames using same DCtx.
5396  *
5397  *  If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
5398  *  will store the DDict references in a table, and the DDict used for decompression
5399  *  will be determined at decompression time, as per the dict ID in the frame.
5400  *  The memory for the table is allocated on the first call to refDDict, and can be
5401  *  freed with ZSTD_freeDCtx().
5402  *
5403  *  If called with ZSTD_d_refMultipleDDicts disabled (the default), only one dictionary
5404  *  will be managed, and referencing a dictionary effectively "discards" any previous one.
5405  *
5406  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
5407  *  Special: referencing a NULL DDict means "return to no-dictionary mode".
5408  *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
5409  */
5410 ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
5411
5412 /*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+
5413  *  Reference a prefix (single-usage dictionary) to decompress next frame.
5414  *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
5415  *  and must use the same prefix as the one used during compression.
5416  *  Prefix is **only used once**. Reference is discarded at end of frame.
5417  *  End of frame is reached when ZSTD_decompressStream() returns 0.
5418  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
5419  *  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
5420  *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
5421  *           Prefix buffer must remain unmodified up to the end of frame,
5422  *           reached when ZSTD_decompressStream() returns 0.
5423  *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
5424  *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
5425  *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
5426  *           A full dictionary is more costly, as it requires building tables.
5427  */
5428 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
5429                                  const void* prefix, size_t prefixSize);
5430
5431 /* ===   Memory management   === */
5432
5433 /*! ZSTD_sizeof_*() : Requires v1.4.0+
5434  *  These functions give the _current_ memory usage of selected object.
5435  *  Note that object memory usage can evolve (increase or decrease) over time. */
5436 ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
5437 ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
5438 ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
5439 ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
5440 ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
5441 ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
5442
5443 #endif  /* ZSTD_H_235446 */
5444
5445
5446 /* **************************************************************************************
5447  *   ADVANCED AND EXPERIMENTAL FUNCTIONS
5448  ****************************************************************************************
5449  * The definitions in the following section are considered experimental.
5450  * They are provided for advanced scenarios.
5451  * They should never be used with a dynamic library, as prototypes may change in the future.
5452  * Use them only in association with static linking.
5453  * ***************************************************************************************/
5454
5455 #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
5456 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
5457
5458 /* This can be overridden externally to hide static symbols. */
5459 #ifndef ZSTDLIB_STATIC_API
5460 #  if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
5461 #    define ZSTDLIB_STATIC_API __declspec(dllexport) ZSTDLIB_VISIBLE
5462 #  elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
5463 #    define ZSTDLIB_STATIC_API __declspec(dllimport) ZSTDLIB_VISIBLE
5464 #  else
5465 #    define ZSTDLIB_STATIC_API ZSTDLIB_VISIBLE
5466 #  endif
5467 #endif
5468
5469 /****************************************************************************************
5470  *   experimental API (static linking only)
5471  ****************************************************************************************
5472  * The following symbols and constants
5473  * are not planned to join "stable API" status in the near future.
5474  * They can still change in future versions.
5475  * Some of them are planned to remain in the static_only section indefinitely.
5476  * Some of them might be removed in the future (especially when redundant with existing stable functions)
5477  * ***************************************************************************************/
5478
5479 #define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1)   /* minimum input size required to query frame header size */
5480 #define ZSTD_FRAMEHEADERSIZE_MIN(format)    ((format) == ZSTD_f_zstd1 ? 6 : 2)
5481 #define ZSTD_FRAMEHEADERSIZE_MAX   18   /* can be useful for static allocation */
5482 #define ZSTD_SKIPPABLEHEADERSIZE    8
5483
5484 /* compression parameter bounds */
5485 #define ZSTD_WINDOWLOG_MAX_32    30
5486 #define ZSTD_WINDOWLOG_MAX_64    31
5487 #define ZSTD_WINDOWLOG_MAX     ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
5488 #define ZSTD_WINDOWLOG_MIN       10
5489 #define ZSTD_HASHLOG_MAX       ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
5490 #define ZSTD_HASHLOG_MIN          6
5491 #define ZSTD_CHAINLOG_MAX_32     29
5492 #define ZSTD_CHAINLOG_MAX_64     30
5493 #define ZSTD_CHAINLOG_MAX      ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
5494 #define ZSTD_CHAINLOG_MIN        ZSTD_HASHLOG_MIN
5495 #define ZSTD_SEARCHLOG_MAX      (ZSTD_WINDOWLOG_MAX-1)
5496 #define ZSTD_SEARCHLOG_MIN        1
5497 #define ZSTD_MINMATCH_MAX         7   /* only for ZSTD_fast, other strategies are limited to 6 */
5498 #define ZSTD_MINMATCH_MIN         3   /* only for ZSTD_btopt+, faster strategies are limited to 4 */
5499 #define ZSTD_TARGETLENGTH_MAX    ZSTD_BLOCKSIZE_MAX
5500 #define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
5501 #define ZSTD_STRATEGY_MIN        ZSTD_fast
5502 #define ZSTD_STRATEGY_MAX        ZSTD_btultra2
5503 #define ZSTD_BLOCKSIZE_MAX_MIN (1 << 10) /* The minimum valid max blocksize. Maximum blocksizes smaller than this make compressBound() inaccurate. */
5504
5505
5506 #define ZSTD_OVERLAPLOG_MIN       0
5507 #define ZSTD_OVERLAPLOG_MAX       9
5508
5509 #define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27   /* by default, the streaming decoder will refuse any frame
5510                                            * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
5511                                            * to preserve host's memory from unreasonable requirements.
5512                                            * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
5513                                            * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
5514
5515
5516 /* LDM parameter bounds */
5517 #define ZSTD_LDM_HASHLOG_MIN      ZSTD_HASHLOG_MIN
5518 #define ZSTD_LDM_HASHLOG_MAX      ZSTD_HASHLOG_MAX
5519 #define ZSTD_LDM_MINMATCH_MIN        4
5520 #define ZSTD_LDM_MINMATCH_MAX     4096
5521 #define ZSTD_LDM_BUCKETSIZELOG_MIN   1
5522 #define ZSTD_LDM_BUCKETSIZELOG_MAX   8
5523 #define ZSTD_LDM_HASHRATELOG_MIN     0
5524 #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
5525
5526 /* Advanced parameter bounds */
5527 #define ZSTD_TARGETCBLOCKSIZE_MIN   1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */
5528 #define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
5529 #define ZSTD_SRCSIZEHINT_MIN        0
5530 #define ZSTD_SRCSIZEHINT_MAX        INT_MAX
5531
5532
5533 /* ---  Advanced types  --- */
5534
5535 typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
5536
5537 typedef struct {
5538     unsigned int offset;      /* The offset of the match. (NOT the same as the offset code)
5539                                * If offset == 0 and matchLength == 0, this sequence represents the last
5540                                * literals in the block of litLength size.
5541                                */
5542
5543     unsigned int litLength;   /* Literal length of the sequence. */
5544     unsigned int matchLength; /* Match length of the sequence. */
5545
5546                               /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
5547                                * In this case, we will treat the sequence as a marker for a block boundary.
5548                                */
5549
5550     unsigned int rep;         /* Represents which repeat offset is represented by the field 'offset'.
5551                                * Ranges from [0, 3].
5552                                *
5553                                * Repeat offsets are essentially previous offsets from previous sequences sorted in
5554                                * recency order. For more detail, see doc/zstd_compression_format.md
5555                                *
5556                                * If rep == 0, then 'offset' does not contain a repeat offset.
5557                                * If rep > 0:
5558                                *  If litLength != 0:
5559                                *      rep == 1 --> offset == repeat_offset_1
5560                                *      rep == 2 --> offset == repeat_offset_2
5561                                *      rep == 3 --> offset == repeat_offset_3
5562                                *  If litLength == 0:
5563                                *      rep == 1 --> offset == repeat_offset_2
5564                                *      rep == 2 --> offset == repeat_offset_3
5565                                *      rep == 3 --> offset == repeat_offset_1 - 1
5566                                *
5567                                * Note: This field is optional. ZSTD_generateSequences() will calculate the value of
5568                                * 'rep', but repeat offsets do not necessarily need to be calculated from an external
5569                                * sequence provider's perspective. For example, ZSTD_compressSequences() does not
5570                                * use this 'rep' field at all (as of now).
5571                                */
5572 } ZSTD_Sequence;
5573
5574 typedef struct {
5575     unsigned windowLog;       /**< largest match distance : larger == more compression, more memory needed during decompression */
5576     unsigned chainLog;        /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
5577     unsigned hashLog;         /**< dispatch table : larger == faster, more memory */
5578     unsigned searchLog;       /**< nb of searches : larger == more compression, slower */
5579     unsigned minMatch;        /**< match length searched : larger == faster decompression, sometimes less compression */
5580     unsigned targetLength;    /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
5581     ZSTD_strategy strategy;   /**< see ZSTD_strategy definition above */
5582 } ZSTD_compressionParameters;
5583
5584 typedef struct {
5585     int contentSizeFlag; /**< 1: content size will be in frame header (when known) */
5586     int checksumFlag;    /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
5587     int noDictIDFlag;    /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
5588 } ZSTD_frameParameters;
5589
5590 typedef struct {
5591     ZSTD_compressionParameters cParams;
5592     ZSTD_frameParameters fParams;
5593 } ZSTD_parameters;
5594
5595 typedef enum {
5596     ZSTD_dct_auto = 0,       /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
5597     ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
5598     ZSTD_dct_fullDict = 2    /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
5599 } ZSTD_dictContentType_e;
5600
5601 typedef enum {
5602     ZSTD_dlm_byCopy = 0,  /**< Copy dictionary content internally */
5603     ZSTD_dlm_byRef = 1    /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
5604 } ZSTD_dictLoadMethod_e;
5605
5606 typedef enum {
5607     ZSTD_f_zstd1 = 0,           /* zstd frame format, specified in zstd_compression_format.md (default) */
5608     ZSTD_f_zstd1_magicless = 1  /* Variant of zstd frame format, without initial 4-bytes magic number.
5609                                  * Useful to save 4 bytes per generated frame.
5610                                  * Decoder cannot recognise automatically this format, requiring this instruction. */
5611 } ZSTD_format_e;
5612
5613 typedef enum {
5614     /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */
5615     ZSTD_d_validateChecksum = 0,
5616     ZSTD_d_ignoreChecksum = 1
5617 } ZSTD_forceIgnoreChecksum_e;
5618
5619 typedef enum {
5620     /* Note: this enum controls ZSTD_d_refMultipleDDicts */
5621     ZSTD_rmd_refSingleDDict = 0,
5622     ZSTD_rmd_refMultipleDDicts = 1
5623 } ZSTD_refMultipleDDicts_e;
5624
5625 typedef enum {
5626     /* Note: this enum and the behavior it controls are effectively internal
5627      * implementation details of the compressor. They are expected to continue
5628      * to evolve and should be considered only in the context of extremely
5629      * advanced performance tuning.
5630      *
5631      * Zstd currently supports the use of a CDict in three ways:
5632      *
5633      * - The contents of the CDict can be copied into the working context. This
5634      *   means that the compression can search both the dictionary and input
5635      *   while operating on a single set of internal tables. This makes
5636      *   the compression faster per-byte of input. However, the initial copy of
5637      *   the CDict's tables incurs a fixed cost at the beginning of the
5638      *   compression. For small compressions (< 8 KB), that copy can dominate
5639      *   the cost of the compression.
5640      *
5641      * - The CDict's tables can be used in-place. In this model, compression is
5642      *   slower per input byte, because the compressor has to search two sets of
5643      *   tables. However, this model incurs no start-up cost (as long as the
5644      *   working context's tables can be reused). For small inputs, this can be
5645      *   faster than copying the CDict's tables.
5646      *
5647      * - The CDict's tables are not used at all, and instead we use the working
5648      *   context alone to reload the dictionary and use params based on the source
5649      *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
5650      *   This method is effective when the dictionary sizes are very small relative
5651      *   to the input size, and the input size is fairly large to begin with.
5652      *
5653      * Zstd has a simple internal heuristic that selects which strategy to use
5654      * at the beginning of a compression. However, if experimentation shows that
5655      * Zstd is making poor choices, it is possible to override that choice with
5656      * this enum.
5657      */
5658     ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
5659     ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
5660     ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
5661     ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
5662 } ZSTD_dictAttachPref_e;
5663
5664 typedef enum {
5665   ZSTD_lcm_auto = 0,          /**< Automatically determine the compression mode based on the compression level.
5666                                *   Negative compression levels will be uncompressed, and positive compression
5667                                *   levels will be compressed. */
5668   ZSTD_lcm_huffman = 1,       /**< Always attempt Huffman compression. Uncompressed literals will still be
5669                                *   emitted if Huffman compression is not profitable. */
5670   ZSTD_lcm_uncompressed = 2   /**< Always emit uncompressed literals. */
5671 } ZSTD_literalCompressionMode_e;
5672
5673 typedef enum {
5674   /* Note: This enum controls features which are conditionally beneficial. Zstd typically will make a final
5675    * decision on whether or not to enable the feature (ZSTD_ps_auto), but setting the switch to ZSTD_ps_enable
5676    * or ZSTD_ps_disable allow for a force enable/disable the feature.
5677    */
5678   ZSTD_ps_auto = 0,         /* Let the library automatically determine whether the feature shall be enabled */
5679   ZSTD_ps_enable = 1,       /* Force-enable the feature */
5680   ZSTD_ps_disable = 2       /* Do not use the feature */
5681 } ZSTD_paramSwitch_e;
5682
5683 /***************************************
5684 *  Frame header and size functions
5685 ***************************************/
5686
5687 /*! ZSTD_findDecompressedSize() :
5688  *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
5689  *  `srcSize` must be the _exact_ size of this series
5690  *       (i.e. there should be a frame boundary at `src + srcSize`)
5691  *  @return : - decompressed size of all data in all successive frames
5692  *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
5693  *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
5694  *
5695  *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
5696  *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
5697  *            In which case, it's necessary to use streaming mode to decompress data.
5698  *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
5699  *   note 3 : decompressed size can be very large (64-bits value),
5700  *            potentially larger than what local system can handle as a single memory segment.
5701  *            In which case, it's necessary to use streaming mode to decompress data.
5702  *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
5703  *            Always ensure result fits within application's authorized limits.
5704  *            Each application can set its own limits.
5705  *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
5706  *            read each contained frame header.  This is fast as most of the data is skipped,
5707  *            however it does mean that all frame data must be present and valid. */
5708 ZSTDLIB_STATIC_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
5709
5710 /*! ZSTD_decompressBound() :
5711  *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
5712  *  `srcSize` must be the _exact_ size of this series
5713  *       (i.e. there should be a frame boundary at `src + srcSize`)
5714  *  @return : - upper-bound for the decompressed size of all data in all successive frames
5715  *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
5716  *
5717  *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
5718  *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
5719  *            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
5720  *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
5721  *              upper-bound = # blocks * min(128 KB, Window_Size)
5722  */
5723 ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
5724
5725 /*! ZSTD_frameHeaderSize() :
5726  *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
5727  * @return : size of the Frame Header,
5728  *           or an error code (if srcSize is too small) */
5729 ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
5730
5731 typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
5732 typedef struct {
5733     unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
5734     unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
5735     unsigned blockSizeMax;
5736     ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
5737     unsigned headerSize;
5738     unsigned dictID;
5739     unsigned checksumFlag;
5740     unsigned _reserved1;
5741     unsigned _reserved2;
5742 } ZSTD_frameHeader;
5743
5744 /*! ZSTD_getFrameHeader() :
5745  *  decode Frame Header, or requires larger `srcSize`.
5746  * @return : 0, `zfhPtr` is correctly filled,
5747  *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
5748  *           or an error code, which can be tested using ZSTD_isError() */
5749 ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
5750 /*! ZSTD_getFrameHeader_advanced() :
5751  *  same as ZSTD_getFrameHeader(),
5752  *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
5753 ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
5754
5755 /*! ZSTD_decompressionMargin() :
5756  * Zstd supports in-place decompression, where the input and output buffers overlap.
5757  * In this case, the output buffer must be at least (Margin + Output_Size) bytes large,
5758  * and the input buffer must be at the end of the output buffer.
5759  *
5760  *  _______________________ Output Buffer ________________________
5761  * |                                                              |
5762  * |                                        ____ Input Buffer ____|
5763  * |                                       |                      |
5764  * v                                       v                      v
5765  * |---------------------------------------|-----------|----------|
5766  * ^                                                   ^          ^
5767  * |___________________ Output_Size ___________________|_ Margin _|
5768  *
5769  * NOTE: See also ZSTD_DECOMPRESSION_MARGIN().
5770  * NOTE: This applies only to single-pass decompression through ZSTD_decompress() or
5771  * ZSTD_decompressDCtx().
5772  * NOTE: This function supports multi-frame input.
5773  *
5774  * @param src The compressed frame(s)
5775  * @param srcSize The size of the compressed frame(s)
5776  * @returns The decompression margin or an error that can be checked with ZSTD_isError().
5777  */
5778 ZSTDLIB_STATIC_API size_t ZSTD_decompressionMargin(const void* src, size_t srcSize);
5779
5780 /*! ZSTD_DECOMPRESS_MARGIN() :
5781  * Similar to ZSTD_decompressionMargin(), but instead of computing the margin from
5782  * the compressed frame, compute it from the original size and the blockSizeLog.
5783  * See ZSTD_decompressionMargin() for details.
5784  *
5785  * WARNING: This macro does not support multi-frame input, the input must be a single
5786  * zstd frame. If you need that support use the function, or implement it yourself.
5787  *
5788  * @param originalSize The original uncompressed size of the data.
5789  * @param blockSize    The block size == MIN(windowSize, ZSTD_BLOCKSIZE_MAX).
5790  *                     Unless you explicitly set the windowLog smaller than
5791  *                     ZSTD_BLOCKSIZELOG_MAX you can just use ZSTD_BLOCKSIZE_MAX.
5792  */
5793 #define ZSTD_DECOMPRESSION_MARGIN(originalSize, blockSize) ((size_t)(                                              \
5794         ZSTD_FRAMEHEADERSIZE_MAX                                                              /* Frame header */ + \
5795         4                                                                                         /* checksum */ + \
5796         ((originalSize) == 0 ? 0 : 3 * (((originalSize) + (blockSize) - 1) / blockSize)) /* 3 bytes per block */ + \
5797         (blockSize)                                                                    /* One block of margin */   \
5798     ))
5799
5800 typedef enum {
5801   ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
5802   ZSTD_sf_explicitBlockDelimiters = 1    /* Representation of ZSTD_Sequence contains explicit block delimiters */
5803 } ZSTD_sequenceFormat_e;
5804
5805 /*! ZSTD_sequenceBound() :
5806  * `srcSize` : size of the input buffer
5807  *  @return : upper-bound for the number of sequences that can be generated
5808  *            from a buffer of srcSize bytes
5809  *
5810  *  note : returns number of sequences - to get bytes, multiply by sizeof(ZSTD_Sequence).
5811  */
5812 ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize);
5813
5814 /*! ZSTD_generateSequences() :
5815  * WARNING: This function is meant for debugging and informational purposes ONLY!
5816  * Its implementation is flawed, and it will be deleted in a future version.
5817  * It is not guaranteed to succeed, as there are several cases where it will give
5818  * up and fail. You should NOT use this function in production code.
5819  *
5820  * This function is deprecated, and will be removed in a future version.
5821  *
5822  * Generate sequences using ZSTD_compress2(), given a source buffer.
5823  *
5824  * @param zc The compression context to be used for ZSTD_compress2(). Set any
5825  *           compression parameters you need on this context.
5826  * @param outSeqs The output sequences buffer of size @p outSeqsSize
5827  * @param outSeqsSize The size of the output sequences buffer.
5828  *                    ZSTD_sequenceBound(srcSize) is an upper bound on the number
5829  *                    of sequences that can be generated.
5830  * @param src The source buffer to generate sequences from of size @p srcSize.
5831  * @param srcSize The size of the source buffer.
5832  *
5833  * Each block will end with a dummy sequence
5834  * with offset == 0, matchLength == 0, and litLength == length of last literals.
5835  * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
5836  * simply acts as a block delimiter.
5837  *
5838  * @returns The number of sequences generated, necessarily less than
5839  *          ZSTD_sequenceBound(srcSize), or an error code that can be checked
5840  *          with ZSTD_isError().
5841  */
5842 ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()")
5843 ZSTDLIB_STATIC_API size_t
5844 ZSTD_generateSequences(ZSTD_CCtx* zc,
5845                        ZSTD_Sequence* outSeqs, size_t outSeqsSize,
5846                        const void* src, size_t srcSize);
5847
5848 /*! ZSTD_mergeBlockDelimiters() :
5849  * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
5850  * by merging them into the literals of the next sequence.
5851  *
5852  * As such, the final generated result has no explicit representation of block boundaries,
5853  * and the final last literals segment is not represented in the sequences.
5854  *
5855  * The output of this function can be fed into ZSTD_compressSequences() with CCtx
5856  * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
5857  * @return : number of sequences left after merging
5858  */
5859 ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
5860
5861 /*! ZSTD_compressSequences() :
5862  * Compress an array of ZSTD_Sequence, associated with @src buffer, into dst.
5863  * @src contains the entire input (not just the literals).
5864  * If @srcSize > sum(sequence.length), the remaining bytes are considered all literals
5865  * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
5866  * The entire source is compressed into a single frame.
5867  *
5868  * The compression behavior changes based on cctx params. In particular:
5869  *    If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain
5870  *    no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on
5871  *    the block size derived from the cctx, and sequences may be split. This is the default setting.
5872  *
5873  *    If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
5874  *    block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
5875  *
5876  *    If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
5877  *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
5878  *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
5879  *
5880  *    In addition to the two adjustable experimental params, there are other important cctx params.
5881  *    - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
5882  *    - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
5883  *    - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
5884  *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
5885  *
5886  * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
5887  * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
5888  *         and cannot emit an RLE block that disagrees with the repcode history
5889  * @return : final compressed size, or a ZSTD error code.
5890  */
5891 ZSTDLIB_STATIC_API size_t
5892 ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize,
5893                         const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
5894                         const void* src, size_t srcSize);
5895
5896
5897 /*! ZSTD_writeSkippableFrame() :
5898  * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer.
5899  *
5900  * Skippable frames begin with a 4-byte magic number. There are 16 possible choices of magic number,
5901  * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
5902  * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
5903  * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
5904  *
5905  * Returns an error if destination buffer is not large enough, if the source size is not representable
5906  * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid).
5907  *
5908  * @return : number of bytes written or a ZSTD error.
5909  */
5910 ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
5911                                             const void* src, size_t srcSize, unsigned magicVariant);
5912
5913 /*! ZSTD_readSkippableFrame() :
5914  * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
5915  *
5916  * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
5917  * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START.  This can be NULL if the caller is not interested
5918  * in the magicVariant.
5919  *
5920  * Returns an error if destination buffer is not large enough, or if the frame is not skippable.
5921  *
5922  * @return : number of bytes written or a ZSTD error.
5923  */
5924 ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
5925                                             const void* src, size_t srcSize);
5926
5927 /*! ZSTD_isSkippableFrame() :
5928  *  Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
5929  */
5930 ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
5931
5932
5933
5934 /***************************************
5935 *  Memory management
5936 ***************************************/
5937
5938 /*! ZSTD_estimate*() :
5939  *  These functions make it possible to estimate memory usage
5940  *  of a future {D,C}Ctx, before its creation.
5941  *  This is useful in combination with ZSTD_initStatic(),
5942  *  which makes it possible to employ a static buffer for ZSTD_CCtx* state.
5943  *
5944  *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
5945  *  to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2()
5946  *  associated with any compression level up to max specified one.
5947  *  The estimate will assume the input may be arbitrarily large,
5948  *  which is the worst case.
5949  *
5950  *  Note that the size estimation is specific for one-shot compression,
5951  *  it is not valid for streaming (see ZSTD_estimateCStreamSize*())
5952  *  nor other potential ways of using a ZSTD_CCtx* state.
5953  *
5954  *  When srcSize can be bound by a known and rather "small" value,
5955  *  this knowledge can be used to provide a tighter budget estimation
5956  *  because the ZSTD_CCtx* state will need less memory for small inputs.
5957  *  This tighter estimation can be provided by employing more advanced functions
5958  *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
5959  *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
5960  *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
5961  *
5962  *  Note : only single-threaded compression is supported.
5963  *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
5964  */
5965 ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel);
5966 ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
5967 ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
5968 ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
5969
5970 /*! ZSTD_estimateCStreamSize() :
5971  *  ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression
5972  *  using any compression level up to the max specified one.
5973  *  It will also consider src size to be arbitrarily "large", which is a worst case scenario.
5974  *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
5975  *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
5976  *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
5977  *  Note : CStream size estimation is only correct for single-threaded compression.
5978  *  ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
5979  *  Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
5980  *  Size estimates assume that no external sequence producer is registered.
5981  *
5982  *  ZSTD_DStream memory budget depends on frame's window Size.
5983  *  This information can be passed manually, using ZSTD_estimateDStreamSize,
5984  *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
5985  *  Any frame requesting a window size larger than max specified one will be rejected.
5986  *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
5987  *         an internal ?Dict will be created, which additional size is not estimated here.
5988  *         In this case, get total size by adding ZSTD_estimate?DictSize
5989  */
5990 ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel);
5991 ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
5992 ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
5993 ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize);
5994 ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
5995
5996 /*! ZSTD_estimate?DictSize() :
5997  *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
5998  *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
5999  *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
6000  */
6001 ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
6002 ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
6003 ZSTDLIB_STATIC_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
6004
6005 /*! ZSTD_initStatic*() :
6006  *  Initialize an object using a pre-allocated fixed-size buffer.
6007  *  workspace: The memory area to emplace the object into.
6008  *             Provided pointer *must be 8-bytes aligned*.
6009  *             Buffer must outlive object.
6010  *  workspaceSize: Use ZSTD_estimate*Size() to determine
6011  *                 how large workspace must be to support target scenario.
6012  * @return : pointer to object (same address as workspace, just different type),
6013  *           or NULL if error (size too small, incorrect alignment, etc.)
6014  *  Note : zstd will never resize nor malloc() when using a static buffer.
6015  *         If the object requires more memory than available,
6016  *         zstd will just error out (typically ZSTD_error_memory_allocation).
6017  *  Note 2 : there is no corresponding "free" function.
6018  *           Since workspace is allocated externally, it must be freed externally too.
6019  *  Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
6020  *           into its associated cParams.
6021  *  Limitation 1 : currently not compatible with internal dictionary creation, triggered by
6022  *                 ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
6023  *  Limitation 2 : static cctx currently not compatible with multi-threading.
6024  *  Limitation 3 : static dctx is incompatible with legacy support.
6025  */
6026 ZSTDLIB_STATIC_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
6027 ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticCCtx() */
6028
6029 ZSTDLIB_STATIC_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
6030 ZSTDLIB_STATIC_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticDCtx() */
6031
6032 ZSTDLIB_STATIC_API const ZSTD_CDict* ZSTD_initStaticCDict(
6033                                         void* workspace, size_t workspaceSize,
6034                                         const void* dict, size_t dictSize,
6035                                         ZSTD_dictLoadMethod_e dictLoadMethod,
6036                                         ZSTD_dictContentType_e dictContentType,
6037                                         ZSTD_compressionParameters cParams);
6038
6039 ZSTDLIB_STATIC_API const ZSTD_DDict* ZSTD_initStaticDDict(
6040                                         void* workspace, size_t workspaceSize,
6041                                         const void* dict, size_t dictSize,
6042                                         ZSTD_dictLoadMethod_e dictLoadMethod,
6043                                         ZSTD_dictContentType_e dictContentType);
6044
6045
6046 /*! Custom memory allocation :
6047  *  These prototypes make it possible to pass your own allocation/free functions.
6048  *  ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
6049  *  All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
6050  */
6051 typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
6052 typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
6053 typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
6054 static
6055 #ifdef __GNUC__
6056 __attribute__((__unused__))
6057 #endif
6058 ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< this constant defers to stdlib's functions */
6059
6060 ZSTDLIB_STATIC_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
6061 ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
6062 ZSTDLIB_STATIC_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
6063 ZSTDLIB_STATIC_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
6064
6065 ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
6066                                                   ZSTD_dictLoadMethod_e dictLoadMethod,
6067                                                   ZSTD_dictContentType_e dictContentType,
6068                                                   ZSTD_compressionParameters cParams,
6069                                                   ZSTD_customMem customMem);
6070
6071 /*! Thread pool :
6072  *  These prototypes make it possible to share a thread pool among multiple compression contexts.
6073  *  This can limit resources for applications with multiple threads where each one uses
6074  *  a threaded compression mode (via ZSTD_c_nbWorkers parameter).
6075  *  ZSTD_createThreadPool creates a new thread pool with a given number of threads.
6076  *  Note that the lifetime of such pool must exist while being used.
6077  *  ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
6078  *  to use an internal thread pool).
6079  *  ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
6080  */
6081 typedef struct POOL_ctx_s ZSTD_threadPool;
6082 ZSTDLIB_STATIC_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
6083 ZSTDLIB_STATIC_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  /* accept NULL pointer */
6084 ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
6085
6086
6087 /*
6088  * This API is temporary and is expected to change or disappear in the future!
6089  */
6090 ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_advanced2(
6091     const void* dict, size_t dictSize,
6092     ZSTD_dictLoadMethod_e dictLoadMethod,
6093     ZSTD_dictContentType_e dictContentType,
6094     const ZSTD_CCtx_params* cctxParams,
6095     ZSTD_customMem customMem);
6096
6097 ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_advanced(
6098     const void* dict, size_t dictSize,
6099     ZSTD_dictLoadMethod_e dictLoadMethod,
6100     ZSTD_dictContentType_e dictContentType,
6101     ZSTD_customMem customMem);
6102
6103
6104 /***************************************
6105 *  Advanced compression functions
6106 ***************************************/
6107
6108 /*! ZSTD_createCDict_byReference() :
6109  *  Create a digested dictionary for compression
6110  *  Dictionary content is just referenced, not duplicated.
6111  *  As a consequence, `dictBuffer` **must** outlive CDict,
6112  *  and its content must remain unmodified throughout the lifetime of CDict.
6113  *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
6114 ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
6115
6116 /*! ZSTD_getCParams() :
6117  * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
6118  * `estimatedSrcSize` value is optional, select 0 if not known */
6119 ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
6120
6121 /*! ZSTD_getParams() :
6122  *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
6123  *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
6124 ZSTDLIB_STATIC_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
6125
6126 /*! ZSTD_checkCParams() :
6127  *  Ensure param values remain within authorized range.
6128  * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
6129 ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
6130
6131 /*! ZSTD_adjustCParams() :
6132  *  optimize params for a given `srcSize` and `dictSize`.
6133  * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
6134  * `dictSize` must be `0` when there is no dictionary.
6135  *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
6136  *  This function never fails (wide contract) */
6137 ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
6138
6139 /*! ZSTD_CCtx_setCParams() :
6140  *  Set all parameters provided within @p cparams into the working @p cctx.
6141  *  Note : if modifying parameters during compression (MT mode only),
6142  *         note that changes to the .windowLog parameter will be ignored.
6143  * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
6144  *         On failure, no parameters are updated.
6145  */
6146 ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams);
6147
6148 /*! ZSTD_CCtx_setFParams() :
6149  *  Set all parameters provided within @p fparams into the working @p cctx.
6150  * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
6151  */
6152 ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams);
6153
6154 /*! ZSTD_CCtx_setParams() :
6155  *  Set all parameters provided within @p params into the working @p cctx.
6156  * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
6157  */
6158 ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params);
6159
6160 /*! ZSTD_compress_advanced() :
6161  *  Note : this function is now DEPRECATED.
6162  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
6163  *  This prototype will generate compilation warnings. */
6164 ZSTD_DEPRECATED("use ZSTD_compress2")
6165 ZSTDLIB_STATIC_API
6166 size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
6167                               void* dst, size_t dstCapacity,
6168                         const void* src, size_t srcSize,
6169                         const void* dict,size_t dictSize,
6170                               ZSTD_parameters params);
6171
6172 /*! ZSTD_compress_usingCDict_advanced() :
6173  *  Note : this function is now DEPRECATED.
6174  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
6175  *  This prototype will generate compilation warnings. */
6176 ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
6177 ZSTDLIB_STATIC_API
6178 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
6179                                               void* dst, size_t dstCapacity,
6180                                         const void* src, size_t srcSize,
6181                                         const ZSTD_CDict* cdict,
6182                                               ZSTD_frameParameters fParams);
6183
6184
6185 /*! ZSTD_CCtx_loadDictionary_byReference() :
6186  *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
6187  *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
6188 ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
6189
6190 /*! ZSTD_CCtx_loadDictionary_advanced() :
6191  *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
6192  *  how to load the dictionary (by copy ? by reference ?)
6193  *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
6194 ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
6195
6196 /*! ZSTD_CCtx_refPrefix_advanced() :
6197  *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
6198  *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
6199 ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
6200
6201 /* ===   experimental parameters   === */
6202 /* these parameters can be used with ZSTD_setParameter()
6203  * they are not guaranteed to remain supported in the future */
6204
6205  /* Enables rsyncable mode,
6206   * which makes compressed files more rsync friendly
6207   * by adding periodic synchronization points to the compressed data.
6208   * The target average block size is ZSTD_c_jobSize / 2.
6209   * It's possible to modify the job size to increase or decrease
6210   * the granularity of the synchronization point.
6211   * Once the jobSize is smaller than the window size,
6212   * it will result in compression ratio degradation.
6213   * NOTE 1: rsyncable mode only works when multithreading is enabled.
6214   * NOTE 2: rsyncable performs poorly in combination with long range mode,
6215   * since it will decrease the effectiveness of synchronization points,
6216   * though mileage may vary.
6217   * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
6218   * If the selected compression level is already running significantly slower,
6219   * the overall speed won't be significantly impacted.
6220   */
6221  #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
6222
6223 /* Select a compression format.
6224  * The value must be of type ZSTD_format_e.
6225  * See ZSTD_format_e enum definition for details */
6226 #define ZSTD_c_format ZSTD_c_experimentalParam2
6227
6228 /* Force back-reference distances to remain < windowSize,
6229  * even when referencing into Dictionary content (default:0) */
6230 #define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
6231
6232 /* Controls whether the contents of a CDict
6233  * are used in place, or copied into the working context.
6234  * Accepts values from the ZSTD_dictAttachPref_e enum.
6235  * See the comments on that enum for an explanation of the feature. */
6236 #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
6237
6238 /* Controlled with ZSTD_paramSwitch_e enum.
6239  * Default is ZSTD_ps_auto.
6240  * Set to ZSTD_ps_disable to never compress literals.
6241  * Set to ZSTD_ps_enable to always compress literals. (Note: uncompressed literals
6242  * may still be emitted if huffman is not beneficial to use.)
6243  *
6244  * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
6245  * literals compression based on the compression parameters - specifically,
6246  * negative compression levels do not use literal compression.
6247  */
6248 #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
6249
6250 /* User's best guess of source size.
6251  * Hint is not valid when srcSizeHint == 0.
6252  * There is no guarantee that hint is close to actual source size,
6253  * but compression ratio may regress significantly if guess considerably underestimates */
6254 #define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
6255
6256 /* Controls whether the new and experimental "dedicated dictionary search
6257  * structure" can be used. This feature is still rough around the edges, be
6258  * prepared for surprising behavior!
6259  *
6260  * How to use it:
6261  *
6262  * When using a CDict, whether to use this feature or not is controlled at
6263  * CDict creation, and it must be set in a CCtxParams set passed into that
6264  * construction (via ZSTD_createCDict_advanced2()). A compression will then
6265  * use the feature or not based on how the CDict was constructed; the value of
6266  * this param, set in the CCtx, will have no effect.
6267  *
6268  * However, when a dictionary buffer is passed into a CCtx, such as via
6269  * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control
6270  * whether the CDict that is created internally can use the feature or not.
6271  *
6272  * What it does:
6273  *
6274  * Normally, the internal data structures of the CDict are analogous to what
6275  * would be stored in a CCtx after compressing the contents of a dictionary.
6276  * To an approximation, a compression using a dictionary can then use those
6277  * data structures to simply continue what is effectively a streaming
6278  * compression where the simulated compression of the dictionary left off.
6279  * Which is to say, the search structures in the CDict are normally the same
6280  * format as in the CCtx.
6281  *
6282  * It is possible to do better, since the CDict is not like a CCtx: the search
6283  * structures are written once during CDict creation, and then are only read
6284  * after that, while the search structures in the CCtx are both read and
6285  * written as the compression goes along. This means we can choose a search
6286  * structure for the dictionary that is read-optimized.
6287  *
6288  * This feature enables the use of that different structure.
6289  *
6290  * Note that some of the members of the ZSTD_compressionParameters struct have
6291  * different semantics and constraints in the dedicated search structure. It is
6292  * highly recommended that you simply set a compression level in the CCtxParams
6293  * you pass into the CDict creation call, and avoid messing with the cParams
6294  * directly.
6295  *
6296  * Effects:
6297  *
6298  * This will only have any effect when the selected ZSTD_strategy
6299  * implementation supports this feature. Currently, that's limited to
6300  * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2.
6301  *
6302  * Note that this means that the CDict tables can no longer be copied into the
6303  * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be
6304  * usable. The dictionary can only be attached or reloaded.
6305  *
6306  * In general, you should expect compression to be faster--sometimes very much
6307  * so--and CDict creation to be slightly slower. Eventually, we will probably
6308  * make this mode the default.
6309  */
6310 #define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
6311
6312 /* ZSTD_c_stableInBuffer
6313  * Experimental parameter.
6314  * Default is 0 == disabled. Set to 1 to enable.
6315  *
6316  * Tells the compressor that input data presented with ZSTD_inBuffer
6317  * will ALWAYS be the same between calls.
6318  * Technically, the @src pointer must never be changed,
6319  * and the @pos field can only be updated by zstd.
6320  * However, it's possible to increase the @size field,
6321  * allowing scenarios where more data can be appended after compressions starts.
6322  * These conditions are checked by the compressor,
6323  * and compression will fail if they are not respected.
6324  * Also, data in the ZSTD_inBuffer within the range [src, src + pos)
6325  * MUST not be modified during compression or it will result in data corruption.
6326  *
6327  * When this flag is enabled zstd won't allocate an input window buffer,
6328  * because the user guarantees it can reference the ZSTD_inBuffer until
6329  * the frame is complete. But, it will still allocate an output buffer
6330  * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
6331  * avoid the memcpy() from the input buffer to the input window buffer.
6332  *
6333  * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
6334  * this flag is ALWAYS memory safe, and will never access out-of-bounds
6335  * memory. However, compression WILL fail if conditions are not respected.
6336  *
6337  * WARNING: The data in the ZSTD_inBuffer in the range [src, src + pos) MUST
6338  * not be modified during compression or it will result in data corruption.
6339  * This is because zstd needs to reference data in the ZSTD_inBuffer to find
6340  * matches. Normally zstd maintains its own window buffer for this purpose,
6341  * but passing this flag tells zstd to rely on user provided buffer instead.
6342  */
6343 #define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
6344
6345 /* ZSTD_c_stableOutBuffer
6346  * Experimental parameter.
6347  * Default is 0 == disabled. Set to 1 to enable.
6348  *
6349  * Tells he compressor that the ZSTD_outBuffer will not be resized between
6350  * calls. Specifically: (out.size - out.pos) will never grow. This gives the
6351  * compressor the freedom to say: If the compressed data doesn't fit in the
6352  * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to
6353  * always decompress directly into the output buffer, instead of decompressing
6354  * into an internal buffer and copying to the output buffer.
6355  *
6356  * When this flag is enabled zstd won't allocate an output buffer, because
6357  * it can write directly to the ZSTD_outBuffer. It will still allocate the
6358  * input window buffer (see ZSTD_c_stableInBuffer).
6359  *
6360  * Zstd will check that (out.size - out.pos) never grows and return an error
6361  * if it does. While not strictly necessary, this should prevent surprises.
6362  */
6363 #define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10
6364
6365 /* ZSTD_c_blockDelimiters
6366  * Default is 0 == ZSTD_sf_noBlockDelimiters.
6367  *
6368  * For use with sequence compression API: ZSTD_compressSequences().
6369  *
6370  * Designates whether or not the given array of ZSTD_Sequence contains block delimiters
6371  * and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
6372  * See the definition of ZSTD_Sequence for more specifics.
6373  */
6374 #define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
6375
6376 /* ZSTD_c_validateSequences
6377  * Default is 0 == disabled. Set to 1 to enable sequence validation.
6378  *
6379  * For use with sequence compression API: ZSTD_compressSequences().
6380  * Designates whether or not we validate sequences provided to ZSTD_compressSequences()
6381  * during function execution.
6382  *
6383  * Without validation, providing a sequence that does not conform to the zstd spec will cause
6384  * undefined behavior, and may produce a corrupted block.
6385  *
6386  * With validation enabled, if sequence is invalid (see doc/zstd_compression_format.md for
6387  * specifics regarding offset/matchlength requirements) then the function will bail out and
6388  * return an error.
6389  *
6390  */
6391 #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
6392
6393 /* ZSTD_c_useBlockSplitter
6394  * Controlled with ZSTD_paramSwitch_e enum.
6395  * Default is ZSTD_ps_auto.
6396  * Set to ZSTD_ps_disable to never use block splitter.
6397  * Set to ZSTD_ps_enable to always use block splitter.
6398  *
6399  * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
6400  * block splitting based on the compression parameters.
6401  */
6402 #define ZSTD_c_useBlockSplitter ZSTD_c_experimentalParam13
6403
6404 /* ZSTD_c_useRowMatchFinder
6405  * Controlled with ZSTD_paramSwitch_e enum.
6406  * Default is ZSTD_ps_auto.
6407  * Set to ZSTD_ps_disable to never use row-based matchfinder.
6408  * Set to ZSTD_ps_enable to force usage of row-based matchfinder.
6409  *
6410  * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
6411  * the row-based matchfinder based on support for SIMD instructions and the window log.
6412  * Note that this only pertains to compression strategies: greedy, lazy, and lazy2
6413  */
6414 #define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14
6415
6416 /* ZSTD_c_deterministicRefPrefix
6417  * Default is 0 == disabled. Set to 1 to enable.
6418  *
6419  * Zstd produces different results for prefix compression when the prefix is
6420  * directly adjacent to the data about to be compressed vs. when it isn't.
6421  * This is because zstd detects that the two buffers are contiguous and it can
6422  * use a more efficient match finding algorithm. However, this produces different
6423  * results than when the two buffers are non-contiguous. This flag forces zstd
6424  * to always load the prefix in non-contiguous mode, even if it happens to be
6425  * adjacent to the data, to guarantee determinism.
6426  *
6427  * If you really care about determinism when using a dictionary or prefix,
6428  * like when doing delta compression, you should select this option. It comes
6429  * at a speed penalty of about ~2.5% if the dictionary and data happened to be
6430  * contiguous, and is free if they weren't contiguous. We don't expect that
6431  * intentionally making the dictionary and data contiguous will be worth the
6432  * cost to memcpy() the data.
6433  */
6434 #define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
6435
6436 /* ZSTD_c_prefetchCDictTables
6437  * Controlled with ZSTD_paramSwitch_e enum. Default is ZSTD_ps_auto.
6438  *
6439  * In some situations, zstd uses CDict tables in-place rather than copying them
6440  * into the working context. (See docs on ZSTD_dictAttachPref_e above for details).
6441  * In such situations, compression speed is seriously impacted when CDict tables are
6442  * "cold" (outside CPU cache). This parameter instructs zstd to prefetch CDict tables
6443  * when they are used in-place.
6444  *
6445  * For sufficiently small inputs, the cost of the prefetch will outweigh the benefit.
6446  * For sufficiently large inputs, zstd will by default memcpy() CDict tables
6447  * into the working context, so there is no need to prefetch. This parameter is
6448  * targeted at a middle range of input sizes, where a prefetch is cheap enough to be
6449  * useful but memcpy() is too expensive. The exact range of input sizes where this
6450  * makes sense is best determined by careful experimentation.
6451  *
6452  * Note: for this parameter, ZSTD_ps_auto is currently equivalent to ZSTD_ps_disable,
6453  * but in the future zstd may conditionally enable this feature via an auto-detection
6454  * heuristic for cold CDicts.
6455  * Use ZSTD_ps_disable to opt out of prefetching under any circumstances.
6456  */
6457 #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16
6458
6459 /* ZSTD_c_enableSeqProducerFallback
6460  * Allowed values are 0 (disable) and 1 (enable). The default setting is 0.
6461  *
6462  * Controls whether zstd will fall back to an internal sequence producer if an
6463  * external sequence producer is registered and returns an error code. This fallback
6464  * is block-by-block: the internal sequence producer will only be called for blocks
6465  * where the external sequence producer returns an error code. Fallback parsing will
6466  * follow any other cParam settings, such as compression level, the same as in a
6467  * normal (fully-internal) compression operation.
6468  *
6469  * The user is strongly encouraged to read the full Block-Level Sequence Producer API
6470  * documentation (below) before setting this parameter. */
6471 #define ZSTD_c_enableSeqProducerFallback ZSTD_c_experimentalParam17
6472
6473 /* ZSTD_c_maxBlockSize
6474  * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
6475  * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
6476  *
6477  * This parameter can be used to set an upper bound on the blocksize
6478  * that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper
6479  * bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make
6480  * compressBound() inaccurate). Only currently meant to be used for testing.
6481  *
6482  */
6483 #define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18
6484
6485 /* ZSTD_c_searchForExternalRepcodes
6486  * This parameter affects how zstd parses external sequences, such as sequences
6487  * provided through the compressSequences() API or from an external block-level
6488  * sequence producer.
6489  *
6490  * If set to ZSTD_ps_enable, the library will check for repeated offsets in
6491  * external sequences, even if those repcodes are not explicitly indicated in
6492  * the "rep" field. Note that this is the only way to exploit repcode matches
6493  * while using compressSequences() or an external sequence producer, since zstd
6494  * currently ignores the "rep" field of external sequences.
6495  *
6496  * If set to ZSTD_ps_disable, the library will not exploit repeated offsets in
6497  * external sequences, regardless of whether the "rep" field has been set. This
6498  * reduces sequence compression overhead by about 25% while sacrificing some
6499  * compression ratio.
6500  *
6501  * The default value is ZSTD_ps_auto, for which the library will enable/disable
6502  * based on compression level.
6503  *
6504  * Note: for now, this param only has an effect if ZSTD_c_blockDelimiters is
6505  * set to ZSTD_sf_explicitBlockDelimiters. That may change in the future.
6506  */
6507 #define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19
6508
6509 /*! ZSTD_CCtx_getParameter() :
6510  *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
6511  *  and store it into int* value.
6512  * @return : 0, or an error code (which can be tested with ZSTD_isError()).
6513  */
6514 ZSTDLIB_STATIC_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
6515
6516
6517 /*! ZSTD_CCtx_params :
6518  *  Quick howto :
6519  *  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
6520  *  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
6521  *                                     an existing ZSTD_CCtx_params structure.
6522  *                                     This is similar to
6523  *                                     ZSTD_CCtx_setParameter().
6524  *  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
6525  *                                    an existing CCtx.
6526  *                                    These parameters will be applied to
6527  *                                    all subsequent frames.
6528  *  - ZSTD_compressStream2() : Do compression using the CCtx.
6529  *  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
6530  *
6531  *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
6532  *  for static allocation of CCtx for single-threaded compression.
6533  */
6534 ZSTDLIB_STATIC_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
6535 ZSTDLIB_STATIC_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  /* accept NULL pointer */
6536
6537 /*! ZSTD_CCtxParams_reset() :
6538  *  Reset params to default values.
6539  */
6540 ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
6541
6542 /*! ZSTD_CCtxParams_init() :
6543  *  Initializes the compression parameters of cctxParams according to
6544  *  compression level. All other parameters are reset to their default values.
6545  */
6546 ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
6547
6548 /*! ZSTD_CCtxParams_init_advanced() :
6549  *  Initializes the compression and frame parameters of cctxParams according to
6550  *  params. All other parameters are reset to their default values.
6551  */
6552 ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
6553
6554 /*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+
6555  *  Similar to ZSTD_CCtx_setParameter.
6556  *  Set one compression parameter, selected by enum ZSTD_cParameter.
6557  *  Parameters must be applied to a ZSTD_CCtx using
6558  *  ZSTD_CCtx_setParametersUsingCCtxParams().
6559  * @result : a code representing success or failure (which can be tested with
6560  *           ZSTD_isError()).
6561  */
6562 ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
6563
6564 /*! ZSTD_CCtxParams_getParameter() :
6565  * Similar to ZSTD_CCtx_getParameter.
6566  * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
6567  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
6568  */
6569 ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
6570
6571 /*! ZSTD_CCtx_setParametersUsingCCtxParams() :
6572  *  Apply a set of ZSTD_CCtx_params to the compression context.
6573  *  This can be done even after compression is started,
6574  *    if nbWorkers==0, this will have no impact until a new compression is started.
6575  *    if nbWorkers>=1, new parameters will be picked up at next job,
6576  *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
6577  */
6578 ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
6579         ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
6580
6581 /*! ZSTD_compressStream2_simpleArgs() :
6582  *  Same as ZSTD_compressStream2(),
6583  *  but using only integral types as arguments.
6584  *  This variant might be helpful for binders from dynamic languages
6585  *  which have troubles handling structures containing memory pointers.
6586  */
6587 ZSTDLIB_STATIC_API size_t ZSTD_compressStream2_simpleArgs (
6588                             ZSTD_CCtx* cctx,
6589                             void* dst, size_t dstCapacity, size_t* dstPos,
6590                       const void* src, size_t srcSize, size_t* srcPos,
6591                             ZSTD_EndDirective endOp);
6592
6593
6594 /***************************************
6595 *  Advanced decompression functions
6596 ***************************************/
6597
6598 /*! ZSTD_isFrame() :
6599  *  Tells if the content of `buffer` starts with a valid Frame Identifier.
6600  *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
6601  *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
6602  *  Note 3 : Skippable Frame Identifiers are considered valid. */
6603 ZSTDLIB_STATIC_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
6604
6605 /*! ZSTD_createDDict_byReference() :
6606  *  Create a digested dictionary, ready to start decompression operation without startup delay.
6607  *  Dictionary content is referenced, and therefore stays in dictBuffer.
6608  *  It is important that dictBuffer outlives DDict,
6609  *  it must remain read accessible throughout the lifetime of DDict */
6610 ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
6611
6612 /*! ZSTD_DCtx_loadDictionary_byReference() :
6613  *  Same as ZSTD_DCtx_loadDictionary(),
6614  *  but references `dict` content instead of copying it into `dctx`.
6615  *  This saves memory if `dict` remains around.,
6616  *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
6617 ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
6618
6619 /*! ZSTD_DCtx_loadDictionary_advanced() :
6620  *  Same as ZSTD_DCtx_loadDictionary(),
6621  *  but gives direct control over
6622  *  how to load the dictionary (by copy ? by reference ?)
6623  *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
6624 ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
6625
6626 /*! ZSTD_DCtx_refPrefix_advanced() :
6627  *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
6628  *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
6629 ZSTDLIB_STATIC_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
6630
6631 /*! ZSTD_DCtx_setMaxWindowSize() :
6632  *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
6633  *  This protects a decoder context from reserving too much memory for itself (potential attack scenario).
6634  *  This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
6635  *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
6636  * @return : 0, or an error code (which can be tested using ZSTD_isError()).
6637  */
6638 ZSTDLIB_STATIC_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
6639
6640 /*! ZSTD_DCtx_getParameter() :
6641  *  Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
6642  *  and store it into int* value.
6643  * @return : 0, or an error code (which can be tested with ZSTD_isError()).
6644  */
6645 ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
6646
6647 /* ZSTD_d_format
6648  * experimental parameter,
6649  * allowing selection between ZSTD_format_e input compression formats
6650  */
6651 #define ZSTD_d_format ZSTD_d_experimentalParam1
6652 /* ZSTD_d_stableOutBuffer
6653  * Experimental parameter.
6654  * Default is 0 == disabled. Set to 1 to enable.
6655  *
6656  * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
6657  * between calls, except for the modifications that zstd makes to pos (the
6658  * caller must not modify pos). This is checked by the decompressor, and
6659  * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
6660  * MUST be large enough to fit the entire decompressed frame. This will be
6661  * checked when the frame content size is known. The data in the ZSTD_outBuffer
6662  * in the range [dst, dst + pos) MUST not be modified during decompression
6663  * or you will get data corruption.
6664  *
6665  * When this flag is enabled zstd won't allocate an output buffer, because
6666  * it can write directly to the ZSTD_outBuffer, but it will still allocate
6667  * an input buffer large enough to fit any compressed block. This will also
6668  * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
6669  * If you need to avoid the input buffer allocation use the buffer-less
6670  * streaming API.
6671  *
6672  * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
6673  * this flag is ALWAYS memory safe, and will never access out-of-bounds
6674  * memory. However, decompression WILL fail if you violate the preconditions.
6675  *
6676  * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
6677  * not be modified during decompression or you will get data corruption. This
6678  * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
6679  * matches. Normally zstd maintains its own buffer for this purpose, but passing
6680  * this flag tells zstd to use the user provided buffer.
6681  */
6682 #define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
6683
6684 /* ZSTD_d_forceIgnoreChecksum
6685  * Experimental parameter.
6686  * Default is 0 == disabled. Set to 1 to enable
6687  *
6688  * Tells the decompressor to skip checksum validation during decompression, regardless
6689  * of whether checksumming was specified during compression. This offers some
6690  * slight performance benefits, and may be useful for debugging.
6691  * Param has values of type ZSTD_forceIgnoreChecksum_e
6692  */
6693 #define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3
6694
6695 /* ZSTD_d_refMultipleDDicts
6696  * Experimental parameter.
6697  * Default is 0 == disabled. Set to 1 to enable
6698  *
6699  * If enabled and dctx is allocated on the heap, then additional memory will be allocated
6700  * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict()
6701  * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead
6702  * store all references. At decompression time, the appropriate dictID is selected
6703  * from the set of DDicts based on the dictID in the frame.
6704  *
6705  * Usage is simply calling ZSTD_refDDict() on multiple dict buffers.
6706  *
6707  * Param has values of byte ZSTD_refMultipleDDicts_e
6708  *
6709  * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory
6710  * allocation for the hash table. ZSTD_freeDCtx() also frees this memory.
6711  * Memory is allocated as per ZSTD_DCtx::customMem.
6712  *
6713  * Although this function allocates memory for the table, the user is still responsible for
6714  * memory management of the underlying ZSTD_DDict* themselves.
6715  */
6716 #define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
6717
6718 /* ZSTD_d_disableHuffmanAssembly
6719  * Set to 1 to disable the Huffman assembly implementation.
6720  * The default value is 0, which allows zstd to use the Huffman assembly
6721  * implementation if available.
6722  *
6723  * This parameter can be used to disable Huffman assembly at runtime.
6724  * If you want to disable it at compile time you can define the macro
6725  * ZSTD_DISABLE_ASM.
6726  */
6727 #define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5
6728
6729 /* ZSTD_d_maxBlockSize
6730  * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
6731  * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
6732  *
6733  * Forces the decompressor to reject blocks whose content size is
6734  * larger than the configured maxBlockSize. When maxBlockSize is
6735  * larger than the windowSize, the windowSize is used instead.
6736  * This saves memory on the decoder when you know all blocks are small.
6737  *
6738  * This option is typically used in conjunction with ZSTD_c_maxBlockSize.
6739  *
6740  * WARNING: This causes the decoder to reject otherwise valid frames
6741  * that have block sizes larger than the configured maxBlockSize.
6742  */
6743 #define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6
6744
6745
6746 /*! ZSTD_DCtx_setFormat() :
6747  *  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
6748  *  Instruct the decoder context about what kind of data to decode next.
6749  *  This instruction is mandatory to decode data without a fully-formed header,
6750  *  such ZSTD_f_zstd1_magicless for example.
6751  * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
6752 ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
6753 ZSTDLIB_STATIC_API
6754 size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
6755
6756 /*! ZSTD_decompressStream_simpleArgs() :
6757  *  Same as ZSTD_decompressStream(),
6758  *  but using only integral types as arguments.
6759  *  This can be helpful for binders from dynamic languages
6760  *  which have troubles handling structures containing memory pointers.
6761  */
6762 ZSTDLIB_STATIC_API size_t ZSTD_decompressStream_simpleArgs (
6763                             ZSTD_DCtx* dctx,
6764                             void* dst, size_t dstCapacity, size_t* dstPos,
6765                       const void* src, size_t srcSize, size_t* srcPos);
6766
6767
6768 /********************************************************************
6769 *  Advanced streaming functions
6770 *  Warning : most of these functions are now redundant with the Advanced API.
6771 *  Once Advanced API reaches "stable" status,
6772 *  redundant functions will be deprecated, and then at some point removed.
6773 ********************************************************************/
6774
6775 /*=====   Advanced Streaming compression functions  =====*/
6776
6777 /*! ZSTD_initCStream_srcSize() :
6778  * This function is DEPRECATED, and equivalent to:
6779  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
6780  *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
6781  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
6782  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
6783  *
6784  * pledgedSrcSize must be correct. If it is not known at init time, use
6785  * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
6786  * "0" also disables frame content size field. It may be enabled in the future.
6787  * This prototype will generate compilation warnings.
6788  */
6789 ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
6790 ZSTDLIB_STATIC_API
6791 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
6792                          int compressionLevel,
6793                          unsigned long long pledgedSrcSize);
6794
6795 /*! ZSTD_initCStream_usingDict() :
6796  * This function is DEPRECATED, and is equivalent to:
6797  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
6798  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
6799  *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
6800  *
6801  * Creates of an internal CDict (incompatible with static CCtx), except if
6802  * dict == NULL or dictSize < 8, in which case no dict is used.
6803  * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
6804  * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
6805  * This prototype will generate compilation warnings.
6806  */
6807 ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
6808 ZSTDLIB_STATIC_API
6809 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
6810                      const void* dict, size_t dictSize,
6811                            int compressionLevel);
6812
6813 /*! ZSTD_initCStream_advanced() :
6814  * This function is DEPRECATED, and is equivalent to:
6815  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
6816  *     ZSTD_CCtx_setParams(zcs, params);
6817  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
6818  *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
6819  *
6820  * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
6821  * pledgedSrcSize must be correct.
6822  * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
6823  * This prototype will generate compilation warnings.
6824  */
6825 ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
6826 ZSTDLIB_STATIC_API
6827 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
6828                     const void* dict, size_t dictSize,
6829                           ZSTD_parameters params,
6830                           unsigned long long pledgedSrcSize);
6831
6832 /*! ZSTD_initCStream_usingCDict() :
6833  * This function is DEPRECATED, and equivalent to:
6834  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
6835  *     ZSTD_CCtx_refCDict(zcs, cdict);
6836  *
6837  * note : cdict will just be referenced, and must outlive compression session
6838  * This prototype will generate compilation warnings.
6839  */
6840 ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
6841 ZSTDLIB_STATIC_API
6842 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
6843
6844 /*! ZSTD_initCStream_usingCDict_advanced() :
6845  *   This function is DEPRECATED, and is equivalent to:
6846  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
6847  *     ZSTD_CCtx_setFParams(zcs, fParams);
6848  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
6849  *     ZSTD_CCtx_refCDict(zcs, cdict);
6850  *
6851  * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
6852  * pledgedSrcSize must be correct. If srcSize is not known at init time, use
6853  * value ZSTD_CONTENTSIZE_UNKNOWN.
6854  * This prototype will generate compilation warnings.
6855  */
6856 ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
6857 ZSTDLIB_STATIC_API
6858 size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
6859                                const ZSTD_CDict* cdict,
6860                                      ZSTD_frameParameters fParams,
6861                                      unsigned long long pledgedSrcSize);
6862
6863 /*! ZSTD_resetCStream() :
6864  * This function is DEPRECATED, and is equivalent to:
6865  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
6866  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
6867  * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but
6868  *       ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be
6869  *       explicitly specified.
6870  *
6871  *  start a new frame, using same parameters from previous frame.
6872  *  This is typically useful to skip dictionary loading stage, since it will reuse it in-place.
6873  *  Note that zcs must be init at least once before using ZSTD_resetCStream().
6874  *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
6875  *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
6876  *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
6877  *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
6878  * @return : 0, or an error code (which can be tested using ZSTD_isError())
6879  *  This prototype will generate compilation warnings.
6880  */
6881 ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
6882 ZSTDLIB_STATIC_API
6883 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
6884
6885
6886 typedef struct {
6887     unsigned long long ingested;   /* nb input bytes read and buffered */
6888     unsigned long long consumed;   /* nb input bytes actually compressed */
6889     unsigned long long produced;   /* nb of compressed bytes generated and buffered */
6890     unsigned long long flushed;    /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
6891     unsigned currentJobID;         /* MT only : latest started job nb */
6892     unsigned nbActiveWorkers;      /* MT only : nb of workers actively compressing at probe time */
6893 } ZSTD_frameProgression;
6894
6895 /* ZSTD_getFrameProgression() :
6896  * tells how much data has been ingested (read from input)
6897  * consumed (input actually compressed) and produced (output) for current frame.
6898  * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
6899  * Aggregates progression inside active worker threads.
6900  */
6901 ZSTDLIB_STATIC_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
6902
6903 /*! ZSTD_toFlushNow() :
6904  *  Tell how many bytes are ready to be flushed immediately.
6905  *  Useful for multithreading scenarios (nbWorkers >= 1).
6906  *  Probe the oldest active job, defined as oldest job not yet entirely flushed,
6907  *  and check its output buffer.
6908  * @return : amount of data stored in oldest job and ready to be flushed immediately.
6909  *  if @return == 0, it means either :
6910  *  + there is no active job (could be checked with ZSTD_frameProgression()), or
6911  *  + oldest job is still actively compressing data,
6912  *    but everything it has produced has also been flushed so far,
6913  *    therefore flush speed is limited by production speed of oldest job
6914  *    irrespective of the speed of concurrent (and newer) jobs.
6915  */
6916 ZSTDLIB_STATIC_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
6917
6918
6919 /*=====   Advanced Streaming decompression functions  =====*/
6920
6921 /*!
6922  * This function is deprecated, and is equivalent to:
6923  *
6924  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
6925  *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
6926  *
6927  * note: no dictionary will be used if dict == NULL or dictSize < 8
6928  */
6929 ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_loadDictionary, see zstd.h for detailed instructions")
6930 ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
6931
6932 /*!
6933  * This function is deprecated, and is equivalent to:
6934  *
6935  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
6936  *     ZSTD_DCtx_refDDict(zds, ddict);
6937  *
6938  * note : ddict is referenced, it must outlive decompression session
6939  */
6940 ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_refDDict, see zstd.h for detailed instructions")
6941 ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
6942
6943 /*!
6944  * This function is deprecated, and is equivalent to:
6945  *
6946  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
6947  *
6948  * reuse decompression parameters from previous init; saves dictionary loading
6949  */
6950 ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions")
6951 ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
6952
6953
6954 /* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API *********************
6955  *
6956  * *** OVERVIEW ***
6957  * The Block-Level Sequence Producer API allows users to provide their own custom
6958  * sequence producer which libzstd invokes to process each block. The produced list
6959  * of sequences (literals and matches) is then post-processed by libzstd to produce
6960  * valid compressed blocks.
6961  *
6962  * This block-level offload API is a more granular complement of the existing
6963  * frame-level offload API compressSequences() (introduced in v1.5.1). It offers
6964  * an easier migration story for applications already integrated with libzstd: the
6965  * user application continues to invoke the same compression functions
6966  * ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits
6967  * from the specific advantages of the external sequence producer. For example,
6968  * the sequence producer could be tuned to take advantage of known characteristics
6969  * of the input, to offer better speed / ratio, or could leverage hardware
6970  * acceleration not available within libzstd itself.
6971  *
6972  * See contrib/externalSequenceProducer for an example program employing the
6973  * Block-Level Sequence Producer API.
6974  *
6975  * *** USAGE ***
6976  * The user is responsible for implementing a function of type
6977  * ZSTD_sequenceProducer_F. For each block, zstd will pass the following
6978  * arguments to the user-provided function:
6979  *
6980  *   - sequenceProducerState: a pointer to a user-managed state for the sequence
6981  *     producer.
6982  *
6983  *   - outSeqs, outSeqsCapacity: an output buffer for the sequence producer.
6984  *     outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory
6985  *     backing outSeqs is managed by the CCtx.
6986  *
6987  *   - src, srcSize: an input buffer for the sequence producer to parse.
6988  *     srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
6989  *
6990  *   - dict, dictSize: a history buffer, which may be empty, which the sequence
6991  *     producer may reference as it parses the src buffer. Currently, zstd will
6992  *     always pass dictSize == 0 into external sequence producers, but this will
6993  *     change in the future.
6994  *
6995  *   - compressionLevel: a signed integer representing the zstd compression level
6996  *     set by the user for the current operation. The sequence producer may choose
6997  *     to use this information to change its compression strategy and speed/ratio
6998  *     tradeoff. Note: the compression level does not reflect zstd parameters set
6999  *     through the advanced API.
7000  *
7001  *   - windowSize: a size_t representing the maximum allowed offset for external
7002  *     sequences. Note that sequence offsets are sometimes allowed to exceed the
7003  *     windowSize if a dictionary is present, see doc/zstd_compression_format.md
7004  *     for details.
7005  *
7006  * The user-provided function shall return a size_t representing the number of
7007  * sequences written to outSeqs. This return value will be treated as an error
7008  * code if it is greater than outSeqsCapacity. The return value must be non-zero
7009  * if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided
7010  * for convenience, but any value greater than outSeqsCapacity will be treated as
7011  * an error code.
7012  *
7013  * If the user-provided function does not return an error code, the sequences
7014  * written to outSeqs must be a valid parse of the src buffer. Data corruption may
7015  * occur if the parse is not valid. A parse is defined to be valid if the
7016  * following conditions hold:
7017  *   - The sum of matchLengths and literalLengths must equal srcSize.
7018  *   - All sequences in the parse, except for the final sequence, must have
7019  *     matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have
7020  *     matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0.
7021  *   - All offsets must respect the windowSize parameter as specified in
7022  *     doc/zstd_compression_format.md.
7023  *   - If the final sequence has matchLength == 0, it must also have offset == 0.
7024  *
7025  * zstd will only validate these conditions (and fail compression if they do not
7026  * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence
7027  * validation has a performance cost.
7028  *
7029  * If the user-provided function returns an error, zstd will either fall back
7030  * to an internal sequence producer or fail the compression operation. The user can
7031  * choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback
7032  * cParam. Fallback compression will follow any other cParam settings, such as
7033  * compression level, the same as in a normal compression operation.
7034  *
7035  * The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F
7036  * function by calling
7037  *         ZSTD_registerSequenceProducer(cctx,
7038  *                                       sequenceProducerState,
7039  *                                       sequenceProducer)
7040  * This setting will persist until the next parameter reset of the CCtx.
7041  *
7042  * The sequenceProducerState must be initialized by the user before calling
7043  * ZSTD_registerSequenceProducer(). The user is responsible for destroying the
7044  * sequenceProducerState.
7045  *
7046  * *** LIMITATIONS ***
7047  * This API is compatible with all zstd compression APIs which respect advanced parameters.
7048  * However, there are three limitations:
7049  *
7050  * First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported.
7051  * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level
7052  * external sequence producer.
7053  *   - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some
7054  *     cases (see its documentation for details). Users must explicitly set
7055  *     ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external
7056  *     sequence producer is registered.
7057  *   - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default
7058  *     whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should
7059  *     check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence
7060  *     Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog).
7061  *
7062  * Second, history buffers are not currently supported. Concretely, zstd will always pass
7063  * dictSize == 0 to the external sequence producer (for now). This has two implications:
7064  *   - Dictionaries are not currently supported. Compression will *not* fail if the user
7065  *     references a dictionary, but the dictionary won't have any effect.
7066  *   - Stream history is not currently supported. All advanced compression APIs, including
7067  *     streaming APIs, work with external sequence producers, but each block is treated as
7068  *     an independent chunk without history from previous blocks.
7069  *
7070  * Third, multi-threading within a single compression is not currently supported. In other words,
7071  * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered.
7072  * Multi-threading across compressions is fine: simply create one CCtx per thread.
7073  *
7074  * Long-term, we plan to overcome all three limitations. There is no technical blocker to
7075  * overcoming them. It is purely a question of engineering effort.
7076  */
7077
7078 #define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1))
7079
7080 typedef size_t (*ZSTD_sequenceProducer_F) (
7081   void* sequenceProducerState,
7082   ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
7083   const void* src, size_t srcSize,
7084   const void* dict, size_t dictSize,
7085   int compressionLevel,
7086   size_t windowSize
7087 );
7088
7089 /*! ZSTD_registerSequenceProducer() :
7090  * Instruct zstd to use a block-level external sequence producer function.
7091  *
7092  * The sequenceProducerState must be initialized by the caller, and the caller is
7093  * responsible for managing its lifetime. This parameter is sticky across
7094  * compressions. It will remain set until the user explicitly resets compression
7095  * parameters.
7096  *
7097  * Sequence producer registration is considered to be an "advanced parameter",
7098  * part of the "advanced API". This means it will only have an effect on compression
7099  * APIs which respect advanced parameters, such as compress2() and compressStream2().
7100  * Older compression APIs such as compressCCtx(), which predate the introduction of
7101  * "advanced parameters", will ignore any external sequence producer setting.
7102  *
7103  * The sequence producer can be "cleared" by registering a NULL function pointer. This
7104  * removes all limitations described above in the "LIMITATIONS" section of the API docs.
7105  *
7106  * The user is strongly encouraged to read the full API documentation (above) before
7107  * calling this function. */
7108 ZSTDLIB_STATIC_API void
7109 ZSTD_registerSequenceProducer(
7110   ZSTD_CCtx* cctx,
7111   void* sequenceProducerState,
7112   ZSTD_sequenceProducer_F sequenceProducer
7113 );
7114
7115 /*! ZSTD_CCtxParams_registerSequenceProducer() :
7116  * Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params.
7117  * This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(),
7118  * which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx().
7119  *
7120  * If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx()
7121  * is required, then this function is for you. Otherwise, you probably don't need it.
7122  *
7123  * See tests/zstreamtest.c for example usage. */
7124 ZSTDLIB_STATIC_API void
7125 ZSTD_CCtxParams_registerSequenceProducer(
7126   ZSTD_CCtx_params* params,
7127   void* sequenceProducerState,
7128   ZSTD_sequenceProducer_F sequenceProducer
7129 );
7130
7131
7132 /*********************************************************************
7133 *  Buffer-less and synchronous inner streaming functions (DEPRECATED)
7134 *
7135 *  This API is deprecated, and will be removed in a future version.
7136 *  It allows streaming (de)compression with user allocated buffers.
7137 *  However, it is hard to use, and not as well tested as the rest of
7138 *  our API.
7139 *
7140 *  Please use the normal streaming API instead: ZSTD_compressStream2,
7141 *  and ZSTD_decompressStream.
7142 *  If there is functionality that you need, but it doesn't provide,
7143 *  please open an issue on our GitHub.
7144 ********************************************************************* */
7145
7146 /**
7147   Buffer-less streaming compression (synchronous mode)
7148
7149   A ZSTD_CCtx object is required to track streaming operations.
7150   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
7151   ZSTD_CCtx object can be reused multiple times within successive compression operations.
7152
7153   Start by initializing a context.
7154   Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
7155
7156   Then, consume your input using ZSTD_compressContinue().
7157   There are some important considerations to keep in mind when using this advanced function :
7158   - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
7159   - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
7160   - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
7161     Worst case evaluation is provided by ZSTD_compressBound().
7162     ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
7163   - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
7164     It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
7165   - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
7166     In which case, it will "discard" the relevant memory section from its history.
7167
7168   Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
7169   It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
7170   Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
7171
7172   `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again.
7173 */
7174
7175 /*=====   Buffer-less streaming compression functions  =====*/
7176 ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
7177 ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
7178 ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
7179 ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
7180 ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
7181 ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
7182
7183 ZSTD_DEPRECATED("This function will likely be removed in a future release. It is misleading and has very limited utility.")
7184 ZSTDLIB_STATIC_API
7185 size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
7186
7187 ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
7188 ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
7189 ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
7190 ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
7191
7192 /* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
7193 ZSTD_DEPRECATED("use advanced API to access custom parameters")
7194 ZSTDLIB_STATIC_API
7195 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
7196 ZSTD_DEPRECATED("use advanced API to access custom parameters")
7197 ZSTDLIB_STATIC_API
7198 size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
7199 /**
7200   Buffer-less streaming decompression (synchronous mode)
7201
7202   A ZSTD_DCtx object is required to track streaming operations.
7203   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
7204   A ZSTD_DCtx object can be reused multiple times.
7205
7206   First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
7207   Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
7208   Data fragment must be large enough to ensure successful decoding.
7209  `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
7210   result  : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
7211            >0 : `srcSize` is too small, please provide at least result bytes on next attempt.
7212            errorCode, which can be tested using ZSTD_isError().
7213
7214   It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
7215   such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
7216   Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
7217   As a consequence, check that values remain within valid application range.
7218   For example, do not allocate memory blindly, check that `windowSize` is within expectation.
7219   Each application can set its own limits, depending on local restrictions.
7220   For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
7221
7222   ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
7223   ZSTD_decompressContinue() is very sensitive to contiguity,
7224   if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
7225   or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
7226   There are multiple ways to guarantee this condition.
7227
7228   The most memory efficient way is to use a round buffer of sufficient size.
7229   Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
7230   which can return an error code if required value is too large for current system (in 32-bits mode).
7231   In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
7232   up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
7233   which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
7234   At which point, decoding can resume from the beginning of the buffer.
7235   Note that already decoded data stored in the buffer should be flushed before being overwritten.
7236
7237   There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
7238
7239   Finally, if you control the compression process, you can also ignore all buffer size rules,
7240   as long as the encoder and decoder progress in "lock-step",
7241   aka use exactly the same buffer sizes, break contiguity at the same place, etc.
7242
7243   Once buffers are setup, start decompression, with ZSTD_decompressBegin().
7244   If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
7245
7246   Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
7247   ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
7248   ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
7249
7250   result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
7251   It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
7252   It can also be an error code, which can be tested with ZSTD_isError().
7253
7254   A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
7255   Context can then be reset to start a new decompression.
7256
7257   Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
7258   This information is not required to properly decode a frame.
7259
7260   == Special case : skippable frames ==
7261
7262   Skippable frames allow integration of user-defined data into a flow of concatenated frames.
7263   Skippable frames will be ignored (skipped) by decompressor.
7264   The format of skippable frames is as follows :
7265   a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
7266   b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
7267   c) Frame Content - any content (User Data) of length equal to Frame Size
7268   For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
7269   For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
7270 */
7271
7272 /*=====   Buffer-less streaming decompression functions  =====*/
7273
7274 ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
7275
7276 ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
7277 ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
7278 ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
7279
7280 ZSTDLIB_STATIC_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
7281 ZSTDLIB_STATIC_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
7282
7283 /* misc */
7284 ZSTD_DEPRECATED("This function will likely be removed in the next minor release. It is misleading and has very limited utility.")
7285 ZSTDLIB_STATIC_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
7286 typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
7287 ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
7288
7289
7290
7291
7292 /* ========================================= */
7293 /**       Block level API (DEPRECATED)       */
7294 /* ========================================= */
7295
7296 /*!
7297
7298     This API is deprecated in favor of the regular compression API.
7299     You can get the frame header down to 2 bytes by setting:
7300       - ZSTD_c_format = ZSTD_f_zstd1_magicless
7301       - ZSTD_c_contentSizeFlag = 0
7302       - ZSTD_c_checksumFlag = 0
7303       - ZSTD_c_dictIDFlag = 0
7304
7305     This API is not as well tested as our normal API, so we recommend not using it.
7306     We will be removing it in a future version. If the normal API doesn't provide
7307     the functionality you need, please open a GitHub issue.
7308
7309     Block functions produce and decode raw zstd blocks, without frame metadata.
7310     Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
7311     But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
7312
7313     A few rules to respect :
7314     - Compressing and decompressing require a context structure
7315       + Use ZSTD_createCCtx() and ZSTD_createDCtx()
7316     - It is necessary to init context before starting
7317       + compression : any ZSTD_compressBegin*() variant, including with dictionary
7318       + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
7319     - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
7320       + If input is larger than a block size, it's necessary to split input data into multiple blocks
7321       + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
7322         Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
7323     - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
7324       ===> In which case, nothing is produced into `dst` !
7325       + User __must__ test for such outcome and deal directly with uncompressed data
7326       + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
7327         Doing so would mess up with statistics history, leading to potential data corruption.
7328       + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
7329       + In case of multiple successive blocks, should some of them be uncompressed,
7330         decoder must be informed of their existence in order to follow proper history.
7331         Use ZSTD_insertBlock() for such a case.
7332 */
7333
7334 /*=====   Raw zstd block functions  =====*/
7335 ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
7336 ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
7337 ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
7338 ZSTDLIB_STATIC_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
7339 ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
7340 ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
7341 ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
7342 ZSTDLIB_STATIC_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
7343
7344 #endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
7345
7346 #if defined (__cplusplus)
7347 }
7348 #endif
7349 /**** ended inlining ../zstd.h ****/
7350 #define FSE_STATIC_LINKING_ONLY
7351 /**** skipping file: fse.h ****/
7352 /**** skipping file: huf.h ****/
7353 #ifndef XXH_STATIC_LINKING_ONLY
7354 #  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
7355 #endif
7356 /**** start inlining xxhash.h ****/
7357 /*
7358  * xxHash - Extremely Fast Hash algorithm
7359  * Header File
7360  * Copyright (c) Yann Collet - Meta Platforms, Inc
7361  *
7362  * This source code is licensed under both the BSD-style license (found in the
7363  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7364  * in the COPYING file in the root directory of this source tree).
7365  * You may select, at your option, one of the above-listed licenses.
7366  */
7367
7368 /* Local adaptations for Zstandard */
7369
7370 #ifndef XXH_NO_XXH3
7371 # define XXH_NO_XXH3
7372 #endif
7373
7374 #ifndef XXH_NAMESPACE
7375 # define XXH_NAMESPACE ZSTD_
7376 #endif
7377
7378 /*!
7379  * @mainpage xxHash
7380  *
7381  * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed
7382  * limits.
7383  *
7384  * It is proposed in four flavors, in three families:
7385  * 1. @ref XXH32_family
7386  *   - Classic 32-bit hash function. Simple, compact, and runs on almost all
7387  *     32-bit and 64-bit systems.
7388  * 2. @ref XXH64_family
7389  *   - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most
7390  *     64-bit systems (but _not_ 32-bit systems).
7391  * 3. @ref XXH3_family
7392  *   - Modern 64-bit and 128-bit hash function family which features improved
7393  *     strength and performance across the board, especially on smaller data.
7394  *     It benefits greatly from SIMD and 64-bit without requiring it.
7395  *
7396  * Benchmarks
7397  * ---
7398  * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.
7399  * The open source benchmark program is compiled with clang v10.0 using -O3 flag.
7400  *
7401  * | Hash Name            | ISA ext | Width | Large Data Speed | Small Data Velocity |
7402  * | -------------------- | ------- | ----: | ---------------: | ------------------: |
7403  * | XXH3_64bits()        | @b AVX2 |    64 |        59.4 GB/s |               133.1 |
7404  * | MeowHash             | AES-NI  |   128 |        58.2 GB/s |                52.5 |
7405  * | XXH3_128bits()       | @b AVX2 |   128 |        57.9 GB/s |               118.1 |
7406  * | CLHash               | PCLMUL  |    64 |        37.1 GB/s |                58.1 |
7407  * | XXH3_64bits()        | @b SSE2 |    64 |        31.5 GB/s |               133.1 |
7408  * | XXH3_128bits()       | @b SSE2 |   128 |        29.6 GB/s |               118.1 |
7409  * | RAM sequential read  |         |   N/A |        28.0 GB/s |                 N/A |
7410  * | ahash                | AES-NI  |    64 |        22.5 GB/s |               107.2 |
7411  * | City64               |         |    64 |        22.0 GB/s |                76.6 |
7412  * | T1ha2                |         |    64 |        22.0 GB/s |                99.0 |
7413  * | City128              |         |   128 |        21.7 GB/s |                57.7 |
7414  * | FarmHash             | AES-NI  |    64 |        21.3 GB/s |                71.9 |
7415  * | XXH64()              |         |    64 |        19.4 GB/s |                71.0 |
7416  * | SpookyHash           |         |    64 |        19.3 GB/s |                53.2 |
7417  * | Mum                  |         |    64 |        18.0 GB/s |                67.0 |
7418  * | CRC32C               | SSE4.2  |    32 |        13.0 GB/s |                57.9 |
7419  * | XXH32()              |         |    32 |         9.7 GB/s |                71.9 |
7420  * | City32               |         |    32 |         9.1 GB/s |                66.0 |
7421  * | Blake3*              | @b AVX2 |   256 |         4.4 GB/s |                 8.1 |
7422  * | Murmur3              |         |    32 |         3.9 GB/s |                56.1 |
7423  * | SipHash*             |         |    64 |         3.0 GB/s |                43.2 |
7424  * | Blake3*              | @b SSE2 |   256 |         2.4 GB/s |                 8.1 |
7425  * | HighwayHash          |         |    64 |         1.4 GB/s |                 6.0 |
7426  * | FNV64                |         |    64 |         1.2 GB/s |                62.7 |
7427  * | Blake2*              |         |   256 |         1.1 GB/s |                 5.1 |
7428  * | SHA1*                |         |   160 |         0.8 GB/s |                 5.6 |
7429  * | MD5*                 |         |   128 |         0.6 GB/s |                 7.8 |
7430  * @note
7431  *   - Hashes which require a specific ISA extension are noted. SSE2 is also noted,
7432  *     even though it is mandatory on x64.
7433  *   - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic
7434  *     by modern standards.
7435  *   - Small data velocity is a rough average of algorithm's efficiency for small
7436  *     data. For more accurate information, see the wiki.
7437  *   - More benchmarks and strength tests are found on the wiki:
7438  *         https://github.com/Cyan4973/xxHash/wiki
7439  *
7440  * Usage
7441  * ------
7442  * All xxHash variants use a similar API. Changing the algorithm is a trivial
7443  * substitution.
7444  *
7445  * @pre
7446  *    For functions which take an input and length parameter, the following
7447  *    requirements are assumed:
7448  *    - The range from [`input`, `input + length`) is valid, readable memory.
7449  *      - The only exception is if the `length` is `0`, `input` may be `NULL`.
7450  *    - For C++, the objects must have the *TriviallyCopyable* property, as the
7451  *      functions access bytes directly as if it was an array of `unsigned char`.
7452  *
7453  * @anchor single_shot_example
7454  * **Single Shot**
7455  *
7456  * These functions are stateless functions which hash a contiguous block of memory,
7457  * immediately returning the result. They are the easiest and usually the fastest
7458  * option.
7459  *
7460  * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()
7461  *
7462  * @code{.c}
7463  *   #include <string.h>
7464  *   #include "xxhash.h"
7465  *
7466  *   // Example for a function which hashes a null terminated string with XXH32().
7467  *   XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)
7468  *   {
7469  *       // NULL pointers are only valid if the length is zero
7470  *       size_t length = (string == NULL) ? 0 : strlen(string);
7471  *       return XXH32(string, length, seed);
7472  *   }
7473  * @endcode
7474  *
7475  *
7476  * @anchor streaming_example
7477  * **Streaming**
7478  *
7479  * These groups of functions allow incremental hashing of unknown size, even
7480  * more than what would fit in a size_t.
7481  *
7482  * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()
7483  *
7484  * @code{.c}
7485  *   #include <stdio.h>
7486  *   #include <assert.h>
7487  *   #include "xxhash.h"
7488  *   // Example for a function which hashes a FILE incrementally with XXH3_64bits().
7489  *   XXH64_hash_t hashFile(FILE* f)
7490  *   {
7491  *       // Allocate a state struct. Do not just use malloc() or new.
7492  *       XXH3_state_t* state = XXH3_createState();
7493  *       assert(state != NULL && "Out of memory!");
7494  *       // Reset the state to start a new hashing session.
7495  *       XXH3_64bits_reset(state);
7496  *       char buffer[4096];
7497  *       size_t count;
7498  *       // Read the file in chunks
7499  *       while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {
7500  *           // Run update() as many times as necessary to process the data
7501  *           XXH3_64bits_update(state, buffer, count);
7502  *       }
7503  *       // Retrieve the finalized hash. This will not change the state.
7504  *       XXH64_hash_t result = XXH3_64bits_digest(state);
7505  *       // Free the state. Do not use free().
7506  *       XXH3_freeState(state);
7507  *       return result;
7508  *   }
7509  * @endcode
7510  *
7511  * Streaming functions generate the xxHash value from an incremental input.
7512  * This method is slower than single-call functions, due to state management.
7513  * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
7514  *
7515  * An XXH state must first be allocated using `XXH*_createState()`.
7516  *
7517  * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
7518  *
7519  * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
7520  *
7521  * The function returns an error code, with 0 meaning OK, and any other value
7522  * meaning there is an error.
7523  *
7524  * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
7525  * This function returns the nn-bits hash as an int or long long.
7526  *
7527  * It's still possible to continue inserting input into the hash state after a
7528  * digest, and generate new hash values later on by invoking `XXH*_digest()`.
7529  *
7530  * When done, release the state using `XXH*_freeState()`.
7531  *
7532  *
7533  * @anchor canonical_representation_example
7534  * **Canonical Representation**
7535  *
7536  * The default return values from XXH functions are unsigned 32, 64 and 128 bit
7537  * integers.
7538  * This the simplest and fastest format for further post-processing.
7539  *
7540  * However, this leaves open the question of what is the order on the byte level,
7541  * since little and big endian conventions will store the same number differently.
7542  *
7543  * The canonical representation settles this issue by mandating big-endian
7544  * convention, the same convention as human-readable numbers (large digits first).
7545  *
7546  * When writing hash values to storage, sending them over a network, or printing
7547  * them, it's highly recommended to use the canonical representation to ensure
7548  * portability across a wider range of systems, present and future.
7549  *
7550  * The following functions allow transformation of hash values to and from
7551  * canonical format.
7552  *
7553  * XXH32_canonicalFromHash(), XXH32_hashFromCanonical(),
7554  * XXH64_canonicalFromHash(), XXH64_hashFromCanonical(),
7555  * XXH128_canonicalFromHash(), XXH128_hashFromCanonical(),
7556  *
7557  * @code{.c}
7558  *   #include <stdio.h>
7559  *   #include "xxhash.h"
7560  *
7561  *   // Example for a function which prints XXH32_hash_t in human readable format
7562  *   void printXxh32(XXH32_hash_t hash)
7563  *   {
7564  *       XXH32_canonical_t cano;
7565  *       XXH32_canonicalFromHash(&cano, hash);
7566  *       size_t i;
7567  *       for(i = 0; i < sizeof(cano.digest); ++i) {
7568  *           printf("%02x", cano.digest[i]);
7569  *       }
7570  *       printf("\n");
7571  *   }
7572  *
7573  *   // Example for a function which converts XXH32_canonical_t to XXH32_hash_t
7574  *   XXH32_hash_t convertCanonicalToXxh32(XXH32_canonical_t cano)
7575  *   {
7576  *       XXH32_hash_t hash = XXH32_hashFromCanonical(&cano);
7577  *       return hash;
7578  *   }
7579  * @endcode
7580  *
7581  *
7582  * @file xxhash.h
7583  * xxHash prototypes and implementation
7584  */
7585
7586 #if defined (__cplusplus)
7587 extern "C" {
7588 #endif
7589
7590 /* ****************************
7591  *  INLINE mode
7592  ******************************/
7593 /*!
7594  * @defgroup public Public API
7595  * Contains details on the public xxHash functions.
7596  * @{
7597  */
7598 #ifdef XXH_DOXYGEN
7599 /*!
7600  * @brief Gives access to internal state declaration, required for static allocation.
7601  *
7602  * Incompatible with dynamic linking, due to risks of ABI changes.
7603  *
7604  * Usage:
7605  * @code{.c}
7606  *     #define XXH_STATIC_LINKING_ONLY
7607  *     #include "xxhash.h"
7608  * @endcode
7609  */
7610 #  define XXH_STATIC_LINKING_ONLY
7611 /* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */
7612
7613 /*!
7614  * @brief Gives access to internal definitions.
7615  *
7616  * Usage:
7617  * @code{.c}
7618  *     #define XXH_STATIC_LINKING_ONLY
7619  *     #define XXH_IMPLEMENTATION
7620  *     #include "xxhash.h"
7621  * @endcode
7622  */
7623 #  define XXH_IMPLEMENTATION
7624 /* Do not undef XXH_IMPLEMENTATION for Doxygen */
7625
7626 /*!
7627  * @brief Exposes the implementation and marks all functions as `inline`.
7628  *
7629  * Use these build macros to inline xxhash into the target unit.
7630  * Inlining improves performance on small inputs, especially when the length is
7631  * expressed as a compile-time constant:
7632  *
7633  *  https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
7634  *
7635  * It also keeps xxHash symbols private to the unit, so they are not exported.
7636  *
7637  * Usage:
7638  * @code{.c}
7639  *     #define XXH_INLINE_ALL
7640  *     #include "xxhash.h"
7641  * @endcode
7642  * Do not compile and link xxhash.o as a separate object, as it is not useful.
7643  */
7644 #  define XXH_INLINE_ALL
7645 #  undef XXH_INLINE_ALL
7646 /*!
7647  * @brief Exposes the implementation without marking functions as inline.
7648  */
7649 #  define XXH_PRIVATE_API
7650 #  undef XXH_PRIVATE_API
7651 /*!
7652  * @brief Emulate a namespace by transparently prefixing all symbols.
7653  *
7654  * If you want to include _and expose_ xxHash functions from within your own
7655  * library, but also want to avoid symbol collisions with other libraries which
7656  * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix
7657  * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE
7658  * (therefore, avoid empty or numeric values).
7659  *
7660  * Note that no change is required within the calling program as long as it
7661  * includes `xxhash.h`: Regular symbol names will be automatically translated
7662  * by this header.
7663  */
7664 #  define XXH_NAMESPACE /* YOUR NAME HERE */
7665 #  undef XXH_NAMESPACE
7666 #endif
7667
7668 #if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
7669     && !defined(XXH_INLINE_ALL_31684351384)
7670    /* this section should be traversed only once */
7671 #  define XXH_INLINE_ALL_31684351384
7672    /* give access to the advanced API, required to compile implementations */
7673 #  undef XXH_STATIC_LINKING_ONLY   /* avoid macro redef */
7674 #  define XXH_STATIC_LINKING_ONLY
7675    /* make all functions private */
7676 #  undef XXH_PUBLIC_API
7677 #  if defined(__GNUC__)
7678 #    define XXH_PUBLIC_API static __inline __attribute__((unused))
7679 #  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
7680 #    define XXH_PUBLIC_API static inline
7681 #  elif defined(_MSC_VER)
7682 #    define XXH_PUBLIC_API static __inline
7683 #  else
7684      /* note: this version may generate warnings for unused static functions */
7685 #    define XXH_PUBLIC_API static
7686 #  endif
7687
7688    /*
7689     * This part deals with the special case where a unit wants to inline xxHash,
7690     * but "xxhash.h" has previously been included without XXH_INLINE_ALL,
7691     * such as part of some previously included *.h header file.
7692     * Without further action, the new include would just be ignored,
7693     * and functions would effectively _not_ be inlined (silent failure).
7694     * The following macros solve this situation by prefixing all inlined names,
7695     * avoiding naming collision with previous inclusions.
7696     */
7697    /* Before that, we unconditionally #undef all symbols,
7698     * in case they were already defined with XXH_NAMESPACE.
7699     * They will then be redefined for XXH_INLINE_ALL
7700     */
7701 #  undef XXH_versionNumber
7702     /* XXH32 */
7703 #  undef XXH32
7704 #  undef XXH32_createState
7705 #  undef XXH32_freeState
7706 #  undef XXH32_reset
7707 #  undef XXH32_update
7708 #  undef XXH32_digest
7709 #  undef XXH32_copyState
7710 #  undef XXH32_canonicalFromHash
7711 #  undef XXH32_hashFromCanonical
7712     /* XXH64 */
7713 #  undef XXH64
7714 #  undef XXH64_createState
7715 #  undef XXH64_freeState
7716 #  undef XXH64_reset
7717 #  undef XXH64_update
7718 #  undef XXH64_digest
7719 #  undef XXH64_copyState
7720 #  undef XXH64_canonicalFromHash
7721 #  undef XXH64_hashFromCanonical
7722     /* XXH3_64bits */
7723 #  undef XXH3_64bits
7724 #  undef XXH3_64bits_withSecret
7725 #  undef XXH3_64bits_withSeed
7726 #  undef XXH3_64bits_withSecretandSeed
7727 #  undef XXH3_createState
7728 #  undef XXH3_freeState
7729 #  undef XXH3_copyState
7730 #  undef XXH3_64bits_reset
7731 #  undef XXH3_64bits_reset_withSeed
7732 #  undef XXH3_64bits_reset_withSecret
7733 #  undef XXH3_64bits_update
7734 #  undef XXH3_64bits_digest
7735 #  undef XXH3_generateSecret
7736     /* XXH3_128bits */
7737 #  undef XXH128
7738 #  undef XXH3_128bits
7739 #  undef XXH3_128bits_withSeed
7740 #  undef XXH3_128bits_withSecret
7741 #  undef XXH3_128bits_reset
7742 #  undef XXH3_128bits_reset_withSeed
7743 #  undef XXH3_128bits_reset_withSecret
7744 #  undef XXH3_128bits_reset_withSecretandSeed
7745 #  undef XXH3_128bits_update
7746 #  undef XXH3_128bits_digest
7747 #  undef XXH128_isEqual
7748 #  undef XXH128_cmp
7749 #  undef XXH128_canonicalFromHash
7750 #  undef XXH128_hashFromCanonical
7751     /* Finally, free the namespace itself */
7752 #  undef XXH_NAMESPACE
7753
7754     /* employ the namespace for XXH_INLINE_ALL */
7755 #  define XXH_NAMESPACE XXH_INLINE_
7756    /*
7757     * Some identifiers (enums, type names) are not symbols,
7758     * but they must nonetheless be renamed to avoid redeclaration.
7759     * Alternative solution: do not redeclare them.
7760     * However, this requires some #ifdefs, and has a more dispersed impact.
7761     * Meanwhile, renaming can be achieved in a single place.
7762     */
7763 #  define XXH_IPREF(Id)   XXH_NAMESPACE ## Id
7764 #  define XXH_OK XXH_IPREF(XXH_OK)
7765 #  define XXH_ERROR XXH_IPREF(XXH_ERROR)
7766 #  define XXH_errorcode XXH_IPREF(XXH_errorcode)
7767 #  define XXH32_canonical_t  XXH_IPREF(XXH32_canonical_t)
7768 #  define XXH64_canonical_t  XXH_IPREF(XXH64_canonical_t)
7769 #  define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
7770 #  define XXH32_state_s XXH_IPREF(XXH32_state_s)
7771 #  define XXH32_state_t XXH_IPREF(XXH32_state_t)
7772 #  define XXH64_state_s XXH_IPREF(XXH64_state_s)
7773 #  define XXH64_state_t XXH_IPREF(XXH64_state_t)
7774 #  define XXH3_state_s  XXH_IPREF(XXH3_state_s)
7775 #  define XXH3_state_t  XXH_IPREF(XXH3_state_t)
7776 #  define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
7777    /* Ensure the header is parsed again, even if it was previously included */
7778 #  undef XXHASH_H_5627135585666179
7779 #  undef XXHASH_H_STATIC_13879238742
7780 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
7781
7782 /* ****************************************************************
7783  *  Stable API
7784  *****************************************************************/
7785 #ifndef XXHASH_H_5627135585666179
7786 #define XXHASH_H_5627135585666179 1
7787
7788 /*! @brief Marks a global symbol. */
7789 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
7790 #  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
7791 #    ifdef XXH_EXPORT
7792 #      define XXH_PUBLIC_API __declspec(dllexport)
7793 #    elif XXH_IMPORT
7794 #      define XXH_PUBLIC_API __declspec(dllimport)
7795 #    endif
7796 #  else
7797 #    define XXH_PUBLIC_API   /* do nothing */
7798 #  endif
7799 #endif
7800
7801 #ifdef XXH_NAMESPACE
7802 #  define XXH_CAT(A,B) A##B
7803 #  define XXH_NAME2(A,B) XXH_CAT(A,B)
7804 #  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
7805 /* XXH32 */
7806 #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
7807 #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
7808 #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
7809 #  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
7810 #  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
7811 #  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
7812 #  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
7813 #  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
7814 #  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
7815 /* XXH64 */
7816 #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
7817 #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
7818 #  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
7819 #  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
7820 #  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
7821 #  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
7822 #  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
7823 #  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
7824 #  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
7825 /* XXH3_64bits */
7826 #  define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
7827 #  define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
7828 #  define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
7829 #  define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
7830 #  define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
7831 #  define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
7832 #  define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
7833 #  define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
7834 #  define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
7835 #  define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
7836 #  define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
7837 #  define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
7838 #  define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
7839 #  define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
7840 #  define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
7841 /* XXH3_128bits */
7842 #  define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
7843 #  define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
7844 #  define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
7845 #  define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
7846 #  define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
7847 #  define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
7848 #  define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
7849 #  define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
7850 #  define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
7851 #  define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
7852 #  define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
7853 #  define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
7854 #  define XXH128_cmp     XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
7855 #  define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
7856 #  define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
7857 #endif
7858
7859
7860 /* *************************************
7861 *  Compiler specifics
7862 ***************************************/
7863
7864 /* specific declaration modes for Windows */
7865 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
7866 #  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
7867 #    ifdef XXH_EXPORT
7868 #      define XXH_PUBLIC_API __declspec(dllexport)
7869 #    elif XXH_IMPORT
7870 #      define XXH_PUBLIC_API __declspec(dllimport)
7871 #    endif
7872 #  else
7873 #    define XXH_PUBLIC_API   /* do nothing */
7874 #  endif
7875 #endif
7876
7877 #if defined (__GNUC__)
7878 # define XXH_CONSTF  __attribute__((const))
7879 # define XXH_PUREF   __attribute__((pure))
7880 #else
7881 # define XXH_CONSTF  /* disable */
7882 # define XXH_PUREF
7883 #endif
7884
7885 /* *************************************
7886 *  Version
7887 ***************************************/
7888 #define XXH_VERSION_MAJOR    0
7889 #define XXH_VERSION_MINOR    8
7890 #define XXH_VERSION_RELEASE  2
7891 /*! @brief Version number, encoded as two digits each */
7892 #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
7893
7894 /*!
7895  * @brief Obtains the xxHash version.
7896  *
7897  * This is mostly useful when xxHash is compiled as a shared library,
7898  * since the returned value comes from the library, as opposed to header file.
7899  *
7900  * @return @ref XXH_VERSION_NUMBER of the invoked library.
7901  */
7902 XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);
7903
7904
7905 /* ****************************
7906 *  Common basic types
7907 ******************************/
7908 #include <stddef.h>   /* size_t */
7909 /*!
7910  * @brief Exit code for the streaming API.
7911  */
7912 typedef enum {
7913     XXH_OK = 0, /*!< OK */
7914     XXH_ERROR   /*!< Error */
7915 } XXH_errorcode;
7916
7917
7918 /*-**********************************************************************
7919 *  32-bit hash
7920 ************************************************************************/
7921 #if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
7922 /*!
7923  * @brief An unsigned 32-bit integer.
7924  *
7925  * Not necessarily defined to `uint32_t` but functionally equivalent.
7926  */
7927 typedef uint32_t XXH32_hash_t;
7928
7929 #elif !defined (__VMS) \
7930   && (defined (__cplusplus) \
7931   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
7932 #   ifdef _AIX
7933 #     include <inttypes.h>
7934 #   else
7935 #     include <stdint.h>
7936 #   endif
7937     typedef uint32_t XXH32_hash_t;
7938
7939 #else
7940 #   include <limits.h>
7941 #   if UINT_MAX == 0xFFFFFFFFUL
7942       typedef unsigned int XXH32_hash_t;
7943 #   elif ULONG_MAX == 0xFFFFFFFFUL
7944       typedef unsigned long XXH32_hash_t;
7945 #   else
7946 #     error "unsupported platform: need a 32-bit type"
7947 #   endif
7948 #endif
7949
7950 /*!
7951  * @}
7952  *
7953  * @defgroup XXH32_family XXH32 family
7954  * @ingroup public
7955  * Contains functions used in the classic 32-bit xxHash algorithm.
7956  *
7957  * @note
7958  *   XXH32 is useful for older platforms, with no or poor 64-bit performance.
7959  *   Note that the @ref XXH3_family provides competitive speed for both 32-bit
7960  *   and 64-bit systems, and offers true 64/128 bit hash results.
7961  *
7962  * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families
7963  * @see @ref XXH32_impl for implementation details
7964  * @{
7965  */
7966
7967 /*!
7968  * @brief Calculates the 32-bit hash of @p input using xxHash32.
7969  *
7970  * @param input The block of data to be hashed, at least @p length bytes in size.
7971  * @param length The length of @p input, in bytes.
7972  * @param seed The 32-bit seed to alter the hash's output predictably.
7973  *
7974  * @pre
7975  *   The memory between @p input and @p input + @p length must be valid,
7976  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
7977  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
7978  *
7979  * @return The calculated 32-bit xxHash32 value.
7980  *
7981  * @see @ref single_shot_example "Single Shot Example" for an example.
7982  */
7983 XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
7984
7985 #ifndef XXH_NO_STREAM
7986 /*!
7987  * @typedef struct XXH32_state_s XXH32_state_t
7988  * @brief The opaque state struct for the XXH32 streaming API.
7989  *
7990  * @see XXH32_state_s for details.
7991  */
7992 typedef struct XXH32_state_s XXH32_state_t;
7993
7994 /*!
7995  * @brief Allocates an @ref XXH32_state_t.
7996  *
7997  * @return An allocated pointer of @ref XXH32_state_t on success.
7998  * @return `NULL` on failure.
7999  *
8000  * @note Must be freed with XXH32_freeState().
8001  */
8002 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
8003 /*!
8004  * @brief Frees an @ref XXH32_state_t.
8005  *
8006  * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
8007  *
8008  * @return @ref XXH_OK.
8009  *
8010  * @note @p statePtr must be allocated with XXH32_createState().
8011  *
8012  */
8013 XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
8014 /*!
8015  * @brief Copies one @ref XXH32_state_t to another.
8016  *
8017  * @param dst_state The state to copy to.
8018  * @param src_state The state to copy from.
8019  * @pre
8020  *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
8021  */
8022 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
8023
8024 /*!
8025  * @brief Resets an @ref XXH32_state_t to begin a new hash.
8026  *
8027  * @param statePtr The state struct to reset.
8028  * @param seed The 32-bit seed to alter the hash result predictably.
8029  *
8030  * @pre
8031  *   @p statePtr must not be `NULL`.
8032  *
8033  * @return @ref XXH_OK on success.
8034  * @return @ref XXH_ERROR on failure.
8035  *
8036  * @note This function resets and seeds a state. Call it before @ref XXH32_update().
8037  */
8038 XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
8039
8040 /*!
8041  * @brief Consumes a block of @p input to an @ref XXH32_state_t.
8042  *
8043  * @param statePtr The state struct to update.
8044  * @param input The block of data to be hashed, at least @p length bytes in size.
8045  * @param length The length of @p input, in bytes.
8046  *
8047  * @pre
8048  *   @p statePtr must not be `NULL`.
8049  * @pre
8050  *   The memory between @p input and @p input + @p length must be valid,
8051  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
8052  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
8053  *
8054  * @return @ref XXH_OK on success.
8055  * @return @ref XXH_ERROR on failure.
8056  *
8057  * @note Call this to incrementally consume blocks of data.
8058  */
8059 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
8060
8061 /*!
8062  * @brief Returns the calculated hash value from an @ref XXH32_state_t.
8063  *
8064  * @param statePtr The state struct to calculate the hash from.
8065  *
8066  * @pre
8067  *  @p statePtr must not be `NULL`.
8068  *
8069  * @return The calculated 32-bit xxHash32 value from that state.
8070  *
8071  * @note
8072  *   Calling XXH32_digest() will not affect @p statePtr, so you can update,
8073  *   digest, and update again.
8074  */
8075 XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
8076 #endif /* !XXH_NO_STREAM */
8077
8078 /*******   Canonical representation   *******/
8079
8080 /*!
8081  * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
8082  */
8083 typedef struct {
8084     unsigned char digest[4]; /*!< Hash bytes, big endian */
8085 } XXH32_canonical_t;
8086
8087 /*!
8088  * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
8089  *
8090  * @param dst  The @ref XXH32_canonical_t pointer to be stored to.
8091  * @param hash The @ref XXH32_hash_t to be converted.
8092  *
8093  * @pre
8094  *   @p dst must not be `NULL`.
8095  *
8096  * @see @ref canonical_representation_example "Canonical Representation Example"
8097  */
8098 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
8099
8100 /*!
8101  * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.
8102  *
8103  * @param src The @ref XXH32_canonical_t to convert.
8104  *
8105  * @pre
8106  *   @p src must not be `NULL`.
8107  *
8108  * @return The converted hash.
8109  *
8110  * @see @ref canonical_representation_example "Canonical Representation Example"
8111  */
8112 XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
8113
8114
8115 /*! @cond Doxygen ignores this part */
8116 #ifdef __has_attribute
8117 # define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
8118 #else
8119 # define XXH_HAS_ATTRIBUTE(x) 0
8120 #endif
8121 /*! @endcond */
8122
8123 /*! @cond Doxygen ignores this part */
8124 /*
8125  * C23 __STDC_VERSION__ number hasn't been specified yet. For now
8126  * leave as `201711L` (C17 + 1).
8127  * TODO: Update to correct value when its been specified.
8128  */
8129 #define XXH_C23_VN 201711L
8130 /*! @endcond */
8131
8132 /*! @cond Doxygen ignores this part */
8133 /* C-language Attributes are added in C23. */
8134 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
8135 # define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
8136 #else
8137 # define XXH_HAS_C_ATTRIBUTE(x) 0
8138 #endif
8139 /*! @endcond */
8140
8141 /*! @cond Doxygen ignores this part */
8142 #if defined(__cplusplus) && defined(__has_cpp_attribute)
8143 # define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
8144 #else
8145 # define XXH_HAS_CPP_ATTRIBUTE(x) 0
8146 #endif
8147 /*! @endcond */
8148
8149 /*! @cond Doxygen ignores this part */
8150 /*
8151  * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
8152  * introduced in CPP17 and C23.
8153  * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
8154  * C23   : https://en.cppreference.com/w/c/language/attributes/fallthrough
8155  */
8156 #if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
8157 # define XXH_FALLTHROUGH [[fallthrough]]
8158 #elif XXH_HAS_ATTRIBUTE(__fallthrough__)
8159 # define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
8160 #else
8161 # define XXH_FALLTHROUGH /* fallthrough */
8162 #endif
8163 /*! @endcond */
8164
8165 /*! @cond Doxygen ignores this part */
8166 /*
8167  * Define XXH_NOESCAPE for annotated pointers in public API.
8168  * https://clang.llvm.org/docs/AttributeReference.html#noescape
8169  * As of writing this, only supported by clang.
8170  */
8171 #if XXH_HAS_ATTRIBUTE(noescape)
8172 # define XXH_NOESCAPE __attribute__((noescape))
8173 #else
8174 # define XXH_NOESCAPE
8175 #endif
8176 /*! @endcond */
8177
8178
8179 /*!
8180  * @}
8181  * @ingroup public
8182  * @{
8183  */
8184
8185 #ifndef XXH_NO_LONG_LONG
8186 /*-**********************************************************************
8187 *  64-bit hash
8188 ************************************************************************/
8189 #if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
8190 /*!
8191  * @brief An unsigned 64-bit integer.
8192  *
8193  * Not necessarily defined to `uint64_t` but functionally equivalent.
8194  */
8195 typedef uint64_t XXH64_hash_t;
8196 #elif !defined (__VMS) \
8197   && (defined (__cplusplus) \
8198   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
8199 #   ifdef _AIX
8200 #     include <inttypes.h>
8201 #   else
8202 #     include <stdint.h>
8203 #   endif
8204    typedef uint64_t XXH64_hash_t;
8205 #else
8206 #  include <limits.h>
8207 #  if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
8208      /* LP64 ABI says uint64_t is unsigned long */
8209      typedef unsigned long XXH64_hash_t;
8210 #  else
8211      /* the following type must have a width of 64-bit */
8212      typedef unsigned long long XXH64_hash_t;
8213 #  endif
8214 #endif
8215
8216 /*!
8217  * @}
8218  *
8219  * @defgroup XXH64_family XXH64 family
8220  * @ingroup public
8221  * @{
8222  * Contains functions used in the classic 64-bit xxHash algorithm.
8223  *
8224  * @note
8225  *   XXH3 provides competitive speed for both 32-bit and 64-bit systems,
8226  *   and offers true 64/128 bit hash results.
8227  *   It provides better speed for systems with vector processing capabilities.
8228  */
8229
8230 /*!
8231  * @brief Calculates the 64-bit hash of @p input using xxHash64.
8232  *
8233  * @param input The block of data to be hashed, at least @p length bytes in size.
8234  * @param length The length of @p input, in bytes.
8235  * @param seed The 64-bit seed to alter the hash's output predictably.
8236  *
8237  * @pre
8238  *   The memory between @p input and @p input + @p length must be valid,
8239  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
8240  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
8241  *
8242  * @return The calculated 64-bit xxHash64 value.
8243  *
8244  * @see @ref single_shot_example "Single Shot Example" for an example.
8245  */
8246 XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
8247
8248 /*******   Streaming   *******/
8249 #ifndef XXH_NO_STREAM
8250 /*!
8251  * @brief The opaque state struct for the XXH64 streaming API.
8252  *
8253  * @see XXH64_state_s for details.
8254  */
8255 typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
8256
8257 /*!
8258  * @brief Allocates an @ref XXH64_state_t.
8259  *
8260  * @return An allocated pointer of @ref XXH64_state_t on success.
8261  * @return `NULL` on failure.
8262  *
8263  * @note Must be freed with XXH64_freeState().
8264  */
8265 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
8266
8267 /*!
8268  * @brief Frees an @ref XXH64_state_t.
8269  *
8270  * @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState().
8271  *
8272  * @return @ref XXH_OK.
8273  *
8274  * @note @p statePtr must be allocated with XXH64_createState().
8275  */
8276 XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
8277
8278 /*!
8279  * @brief Copies one @ref XXH64_state_t to another.
8280  *
8281  * @param dst_state The state to copy to.
8282  * @param src_state The state to copy from.
8283  * @pre
8284  *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
8285  */
8286 XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state);
8287
8288 /*!
8289  * @brief Resets an @ref XXH64_state_t to begin a new hash.
8290  *
8291  * @param statePtr The state struct to reset.
8292  * @param seed The 64-bit seed to alter the hash result predictably.
8293  *
8294  * @pre
8295  *   @p statePtr must not be `NULL`.
8296  *
8297  * @return @ref XXH_OK on success.
8298  * @return @ref XXH_ERROR on failure.
8299  *
8300  * @note This function resets and seeds a state. Call it before @ref XXH64_update().
8301  */
8302 XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
8303
8304 /*!
8305  * @brief Consumes a block of @p input to an @ref XXH64_state_t.
8306  *
8307  * @param statePtr The state struct to update.
8308  * @param input The block of data to be hashed, at least @p length bytes in size.
8309  * @param length The length of @p input, in bytes.
8310  *
8311  * @pre
8312  *   @p statePtr must not be `NULL`.
8313  * @pre
8314  *   The memory between @p input and @p input + @p length must be valid,
8315  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
8316  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
8317  *
8318  * @return @ref XXH_OK on success.
8319  * @return @ref XXH_ERROR on failure.
8320  *
8321  * @note Call this to incrementally consume blocks of data.
8322  */
8323 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
8324
8325 /*!
8326  * @brief Returns the calculated hash value from an @ref XXH64_state_t.
8327  *
8328  * @param statePtr The state struct to calculate the hash from.
8329  *
8330  * @pre
8331  *  @p statePtr must not be `NULL`.
8332  *
8333  * @return The calculated 64-bit xxHash64 value from that state.
8334  *
8335  * @note
8336  *   Calling XXH64_digest() will not affect @p statePtr, so you can update,
8337  *   digest, and update again.
8338  */
8339 XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);
8340 #endif /* !XXH_NO_STREAM */
8341 /*******   Canonical representation   *******/
8342
8343 /*!
8344  * @brief Canonical (big endian) representation of @ref XXH64_hash_t.
8345  */
8346 typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
8347
8348 /*!
8349  * @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t.
8350  *
8351  * @param dst The @ref XXH64_canonical_t pointer to be stored to.
8352  * @param hash The @ref XXH64_hash_t to be converted.
8353  *
8354  * @pre
8355  *   @p dst must not be `NULL`.
8356  *
8357  * @see @ref canonical_representation_example "Canonical Representation Example"
8358  */
8359 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);
8360
8361 /*!
8362  * @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t.
8363  *
8364  * @param src The @ref XXH64_canonical_t to convert.
8365  *
8366  * @pre
8367  *   @p src must not be `NULL`.
8368  *
8369  * @return The converted hash.
8370  *
8371  * @see @ref canonical_representation_example "Canonical Representation Example"
8372  */
8373 XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);
8374
8375 #ifndef XXH_NO_XXH3
8376
8377 /*!
8378  * @}
8379  * ************************************************************************
8380  * @defgroup XXH3_family XXH3 family
8381  * @ingroup public
8382  * @{
8383  *
8384  * XXH3 is a more recent hash algorithm featuring:
8385  *  - Improved speed for both small and large inputs
8386  *  - True 64-bit and 128-bit outputs
8387  *  - SIMD acceleration
8388  *  - Improved 32-bit viability
8389  *
8390  * Speed analysis methodology is explained here:
8391  *
8392  *    https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
8393  *
8394  * Compared to XXH64, expect XXH3 to run approximately
8395  * ~2x faster on large inputs and >3x faster on small ones,
8396  * exact differences vary depending on platform.
8397  *
8398  * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
8399  * but does not require it.
8400  * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3
8401  * at competitive speeds, even without vector support. Further details are
8402  * explained in the implementation.
8403  *
8404  * XXH3 has a fast scalar implementation, but it also includes accelerated SIMD
8405  * implementations for many common platforms:
8406  *   - AVX512
8407  *   - AVX2
8408  *   - SSE2
8409  *   - ARM NEON
8410  *   - WebAssembly SIMD128
8411  *   - POWER8 VSX
8412  *   - s390x ZVector
8413  * This can be controlled via the @ref XXH_VECTOR macro, but it automatically
8414  * selects the best version according to predefined macros. For the x86 family, an
8415  * automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c.
8416  *
8417  * XXH3 implementation is portable:
8418  * it has a generic C90 formulation that can be compiled on any platform,
8419  * all implementations generate exactly the same hash value on all platforms.
8420  * Starting from v0.8.0, it's also labelled "stable", meaning that
8421  * any future version will also generate the same hash value.
8422  *
8423  * XXH3 offers 2 variants, _64bits and _128bits.
8424  *
8425  * When only 64 bits are needed, prefer invoking the _64bits variant, as it
8426  * reduces the amount of mixing, resulting in faster speed on small inputs.
8427  * It's also generally simpler to manipulate a scalar return type than a struct.
8428  *
8429  * The API supports one-shot hashing, streaming mode, and custom secrets.
8430  */
8431 /*-**********************************************************************
8432 *  XXH3 64-bit variant
8433 ************************************************************************/
8434
8435 /*!
8436  * @brief Calculates 64-bit unseeded variant of XXH3 hash of @p input.
8437  *
8438  * @param input  The block of data to be hashed, at least @p length bytes in size.
8439  * @param length The length of @p input, in bytes.
8440  *
8441  * @pre
8442  *   The memory between @p input and @p input + @p length must be valid,
8443  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
8444  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
8445  *
8446  * @return The calculated 64-bit XXH3 hash value.
8447  *
8448  * @note
8449  *   This is equivalent to @ref XXH3_64bits_withSeed() with a seed of `0`, however
8450  *   it may have slightly better performance due to constant propagation of the
8451  *   defaults.
8452  *
8453  * @see
8454  *    XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
8455  * @see @ref single_shot_example "Single Shot Example" for an example.
8456  */
8457 XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);
8458
8459 /*!
8460  * @brief Calculates 64-bit seeded variant of XXH3 hash of @p input.
8461  *
8462  * @param input  The block of data to be hashed, at least @p length bytes in size.
8463  * @param length The length of @p input, in bytes.
8464  * @param seed   The 64-bit seed to alter the hash result predictably.
8465  *
8466  * @pre
8467  *   The memory between @p input and @p input + @p length must be valid,
8468  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
8469  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
8470  *
8471  * @return The calculated 64-bit XXH3 hash value.
8472  *
8473  * @note
8474  *    seed == 0 produces the same results as @ref XXH3_64bits().
8475  *
8476  * This variant generates a custom secret on the fly based on default secret
8477  * altered using the @p seed value.
8478  *
8479  * While this operation is decently fast, note that it's not completely free.
8480  *
8481  * @see @ref single_shot_example "Single Shot Example" for an example.
8482  */
8483 XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
8484
8485 /*!
8486  * The bare minimum size for a custom secret.
8487  *
8488  * @see
8489  *  XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),
8490  *  XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().
8491  */
8492 #define XXH3_SECRET_SIZE_MIN 136
8493
8494 /*!
8495  * @brief Calculates 64-bit variant of XXH3 with a custom "secret".
8496  *
8497  * @param data       The block of data to be hashed, at least @p len bytes in size.
8498  * @param len        The length of @p data, in bytes.
8499  * @param secret     The secret data.
8500  * @param secretSize The length of @p secret, in bytes.
8501  *
8502  * @return The calculated 64-bit XXH3 hash value.
8503  *
8504  * @pre
8505  *   The memory between @p data and @p data + @p len must be valid,
8506  *   readable, contiguous memory. However, if @p length is `0`, @p data may be
8507  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
8508  *
8509  * It's possible to provide any blob of bytes as a "secret" to generate the hash.
8510  * This makes it more difficult for an external actor to prepare an intentional collision.
8511  * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
8512  * However, the quality of the secret impacts the dispersion of the hash algorithm.
8513  * Therefore, the secret _must_ look like a bunch of random bytes.
8514  * Avoid "trivial" or structured data such as repeated sequences or a text document.
8515  * Whenever in doubt about the "randomness" of the blob of bytes,
8516  * consider employing @ref XXH3_generateSecret() instead (see below).
8517  * It will generate a proper high entropy secret derived from the blob of bytes.
8518  * Another advantage of using XXH3_generateSecret() is that
8519  * it guarantees that all bits within the initial blob of bytes
8520  * will impact every bit of the output.
8521  * This is not necessarily the case when using the blob of bytes directly
8522  * because, when hashing _small_ inputs, only a portion of the secret is employed.
8523  *
8524  * @see @ref single_shot_example "Single Shot Example" for an example.
8525  */
8526 XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
8527
8528
8529 /*******   Streaming   *******/
8530 #ifndef XXH_NO_STREAM
8531 /*
8532  * Streaming requires state maintenance.
8533  * This operation costs memory and CPU.
8534  * As a consequence, streaming is slower than one-shot hashing.
8535  * For better performance, prefer one-shot functions whenever applicable.
8536  */
8537
8538 /*!
8539  * @brief The opaque state struct for the XXH3 streaming API.
8540  *
8541  * @see XXH3_state_s for details.
8542  */
8543 typedef struct XXH3_state_s XXH3_state_t;
8544 XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
8545 XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
8546
8547 /*!
8548  * @brief Copies one @ref XXH3_state_t to another.
8549  *
8550  * @param dst_state The state to copy to.
8551  * @param src_state The state to copy from.
8552  * @pre
8553  *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
8554  */
8555 XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state);
8556
8557 /*!
8558  * @brief Resets an @ref XXH3_state_t to begin a new hash.
8559  *
8560  * @param statePtr The state struct to reset.
8561  *
8562  * @pre
8563  *   @p statePtr must not be `NULL`.
8564  *
8565  * @return @ref XXH_OK on success.
8566  * @return @ref XXH_ERROR on failure.
8567  *
8568  * @note
8569  *   - This function resets `statePtr` and generate a secret with default parameters.
8570  *   - Call this function before @ref XXH3_64bits_update().
8571  *   - Digest will be equivalent to `XXH3_64bits()`.
8572  *
8573  */
8574 XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
8575
8576 /*!
8577  * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
8578  *
8579  * @param statePtr The state struct to reset.
8580  * @param seed     The 64-bit seed to alter the hash result predictably.
8581  *
8582  * @pre
8583  *   @p statePtr must not be `NULL`.
8584  *
8585  * @return @ref XXH_OK on success.
8586  * @return @ref XXH_ERROR on failure.
8587  *
8588  * @note
8589  *   - This function resets `statePtr` and generate a secret from `seed`.
8590  *   - Call this function before @ref XXH3_64bits_update().
8591  *   - Digest will be equivalent to `XXH3_64bits_withSeed()`.
8592  *
8593  */
8594 XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
8595
8596 /*!
8597  * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
8598  *
8599  * @param statePtr The state struct to reset.
8600  * @param secret     The secret data.
8601  * @param secretSize The length of @p secret, in bytes.
8602  *
8603  * @pre
8604  *   @p statePtr must not be `NULL`.
8605  *
8606  * @return @ref XXH_OK on success.
8607  * @return @ref XXH_ERROR on failure.
8608  *
8609  * @note
8610  *   `secret` is referenced, it _must outlive_ the hash streaming session.
8611  *
8612  * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
8613  * and the quality of produced hash values depends on secret's entropy
8614  * (secret's content should look like a bunch of random bytes).
8615  * When in doubt about the randomness of a candidate `secret`,
8616  * consider employing `XXH3_generateSecret()` instead (see below).
8617  */
8618 XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
8619
8620 /*!
8621  * @brief Consumes a block of @p input to an @ref XXH3_state_t.
8622  *
8623  * @param statePtr The state struct to update.
8624  * @param input The block of data to be hashed, at least @p length bytes in size.
8625  * @param length The length of @p input, in bytes.
8626  *
8627  * @pre
8628  *   @p statePtr must not be `NULL`.
8629  * @pre
8630  *   The memory between @p input and @p input + @p length must be valid,
8631  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
8632  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
8633  *
8634  * @return @ref XXH_OK on success.
8635  * @return @ref XXH_ERROR on failure.
8636  *
8637  * @note Call this to incrementally consume blocks of data.
8638  */
8639 XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
8640
8641 /*!
8642  * @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t.
8643  *
8644  * @param statePtr The state struct to calculate the hash from.
8645  *
8646  * @pre
8647  *  @p statePtr must not be `NULL`.
8648  *
8649  * @return The calculated XXH3 64-bit hash value from that state.
8650  *
8651  * @note
8652  *   Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update,
8653  *   digest, and update again.
8654  */
8655 XXH_PUBLIC_API XXH_PUREF XXH64_hash_t  XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
8656 #endif /* !XXH_NO_STREAM */
8657
8658 /* note : canonical representation of XXH3 is the same as XXH64
8659  * since they both produce XXH64_hash_t values */
8660
8661
8662 /*-**********************************************************************
8663 *  XXH3 128-bit variant
8664 ************************************************************************/
8665
8666 /*!
8667  * @brief The return value from 128-bit hashes.
8668  *
8669  * Stored in little endian order, although the fields themselves are in native
8670  * endianness.
8671  */
8672 typedef struct {
8673     XXH64_hash_t low64;   /*!< `value & 0xFFFFFFFFFFFFFFFF` */
8674     XXH64_hash_t high64;  /*!< `value >> 64` */
8675 } XXH128_hash_t;
8676
8677 /*!
8678  * @brief Calculates 128-bit unseeded variant of XXH3 of @p data.
8679  *
8680  * @param data The block of data to be hashed, at least @p length bytes in size.
8681  * @param len  The length of @p data, in bytes.
8682  *
8683  * @return The calculated 128-bit variant of XXH3 value.
8684  *
8685  * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
8686  * for shorter inputs.
8687  *
8688  * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of `0`, however
8689  * it may have slightly better performance due to constant propagation of the
8690  * defaults.
8691  *
8692  * @see XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
8693  * @see @ref single_shot_example "Single Shot Example" for an example.
8694  */
8695 XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);
8696 /*! @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
8697  *
8698  * @param data The block of data to be hashed, at least @p length bytes in size.
8699  * @param len  The length of @p data, in bytes.
8700  * @param seed The 64-bit seed to alter the hash result predictably.
8701  *
8702  * @return The calculated 128-bit variant of XXH3 value.
8703  *
8704  * @note
8705  *    seed == 0 produces the same results as @ref XXH3_64bits().
8706  *
8707  * This variant generates a custom secret on the fly based on default secret
8708  * altered using the @p seed value.
8709  *
8710  * While this operation is decently fast, note that it's not completely free.
8711  *
8712  * @see XXH3_128bits(), XXH3_128bits_withSecret(): other seeding variants
8713  * @see @ref single_shot_example "Single Shot Example" for an example.
8714  */
8715 XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
8716 /*!
8717  * @brief Calculates 128-bit variant of XXH3 with a custom "secret".
8718  *
8719  * @param data       The block of data to be hashed, at least @p len bytes in size.
8720  * @param len        The length of @p data, in bytes.
8721  * @param secret     The secret data.
8722  * @param secretSize The length of @p secret, in bytes.
8723  *
8724  * @return The calculated 128-bit variant of XXH3 value.
8725  *
8726  * It's possible to provide any blob of bytes as a "secret" to generate the hash.
8727  * This makes it more difficult for an external actor to prepare an intentional collision.
8728  * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
8729  * However, the quality of the secret impacts the dispersion of the hash algorithm.
8730  * Therefore, the secret _must_ look like a bunch of random bytes.
8731  * Avoid "trivial" or structured data such as repeated sequences or a text document.
8732  * Whenever in doubt about the "randomness" of the blob of bytes,
8733  * consider employing @ref XXH3_generateSecret() instead (see below).
8734  * It will generate a proper high entropy secret derived from the blob of bytes.
8735  * Another advantage of using XXH3_generateSecret() is that
8736  * it guarantees that all bits within the initial blob of bytes
8737  * will impact every bit of the output.
8738  * This is not necessarily the case when using the blob of bytes directly
8739  * because, when hashing _small_ inputs, only a portion of the secret is employed.
8740  *
8741  * @see @ref single_shot_example "Single Shot Example" for an example.
8742  */
8743 XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
8744
8745 /*******   Streaming   *******/
8746 #ifndef XXH_NO_STREAM
8747 /*
8748  * Streaming requires state maintenance.
8749  * This operation costs memory and CPU.
8750  * As a consequence, streaming is slower than one-shot hashing.
8751  * For better performance, prefer one-shot functions whenever applicable.
8752  *
8753  * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
8754  * Use already declared XXH3_createState() and XXH3_freeState().
8755  *
8756  * All reset and streaming functions have same meaning as their 64-bit counterpart.
8757  */
8758
8759 /*!
8760  * @brief Resets an @ref XXH3_state_t to begin a new hash.
8761  *
8762  * @param statePtr The state struct to reset.
8763  *
8764  * @pre
8765  *   @p statePtr must not be `NULL`.
8766  *
8767  * @return @ref XXH_OK on success.
8768  * @return @ref XXH_ERROR on failure.
8769  *
8770  * @note
8771  *   - This function resets `statePtr` and generate a secret with default parameters.
8772  *   - Call it before @ref XXH3_128bits_update().
8773  *   - Digest will be equivalent to `XXH3_128bits()`.
8774  */
8775 XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
8776
8777 /*!
8778  * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
8779  *
8780  * @param statePtr The state struct to reset.
8781  * @param seed     The 64-bit seed to alter the hash result predictably.
8782  *
8783  * @pre
8784  *   @p statePtr must not be `NULL`.
8785  *
8786  * @return @ref XXH_OK on success.
8787  * @return @ref XXH_ERROR on failure.
8788  *
8789  * @note
8790  *   - This function resets `statePtr` and generate a secret from `seed`.
8791  *   - Call it before @ref XXH3_128bits_update().
8792  *   - Digest will be equivalent to `XXH3_128bits_withSeed()`.
8793  */
8794 XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
8795 /*!
8796  * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
8797  *
8798  * @param statePtr   The state struct to reset.
8799  * @param secret     The secret data.
8800  * @param secretSize The length of @p secret, in bytes.
8801  *
8802  * @pre
8803  *   @p statePtr must not be `NULL`.
8804  *
8805  * @return @ref XXH_OK on success.
8806  * @return @ref XXH_ERROR on failure.
8807  *
8808  * `secret` is referenced, it _must outlive_ the hash streaming session.
8809  * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
8810  * and the quality of produced hash values depends on secret's entropy
8811  * (secret's content should look like a bunch of random bytes).
8812  * When in doubt about the randomness of a candidate `secret`,
8813  * consider employing `XXH3_generateSecret()` instead (see below).
8814  */
8815 XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
8816
8817 /*!
8818  * @brief Consumes a block of @p input to an @ref XXH3_state_t.
8819  *
8820  * Call this to incrementally consume blocks of data.
8821  *
8822  * @param statePtr The state struct to update.
8823  * @param input The block of data to be hashed, at least @p length bytes in size.
8824  * @param length The length of @p input, in bytes.
8825  *
8826  * @pre
8827  *   @p statePtr must not be `NULL`.
8828  *
8829  * @return @ref XXH_OK on success.
8830  * @return @ref XXH_ERROR on failure.
8831  *
8832  * @note
8833  *   The memory between @p input and @p input + @p length must be valid,
8834  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
8835  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
8836  *
8837  */
8838 XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
8839
8840 /*!
8841  * @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t.
8842  *
8843  * @param statePtr The state struct to calculate the hash from.
8844  *
8845  * @pre
8846  *  @p statePtr must not be `NULL`.
8847  *
8848  * @return The calculated XXH3 128-bit hash value from that state.
8849  *
8850  * @note
8851  *   Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update,
8852  *   digest, and update again.
8853  *
8854  */
8855 XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
8856 #endif /* !XXH_NO_STREAM */
8857
8858 /* Following helper functions make it possible to compare XXH128_hast_t values.
8859  * Since XXH128_hash_t is a structure, this capability is not offered by the language.
8860  * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
8861
8862 /*!
8863  * @brief Check equality of two XXH128_hash_t values
8864  *
8865  * @param h1 The 128-bit hash value.
8866  * @param h2 Another 128-bit hash value.
8867  *
8868  * @return `1` if `h1` and `h2` are equal.
8869  * @return `0` if they are not.
8870  */
8871 XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
8872
8873 /*!
8874  * @brief Compares two @ref XXH128_hash_t
8875  *
8876  * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
8877  *
8878  * @param h128_1 Left-hand side value
8879  * @param h128_2 Right-hand side value
8880  *
8881  * @return >0 if @p h128_1  > @p h128_2
8882  * @return =0 if @p h128_1 == @p h128_2
8883  * @return <0 if @p h128_1  < @p h128_2
8884  */
8885 XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
8886
8887
8888 /*******   Canonical representation   *******/
8889 typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
8890
8891
8892 /*!
8893  * @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t.
8894  *
8895  * @param dst  The @ref XXH128_canonical_t pointer to be stored to.
8896  * @param hash The @ref XXH128_hash_t to be converted.
8897  *
8898  * @pre
8899  *   @p dst must not be `NULL`.
8900  * @see @ref canonical_representation_example "Canonical Representation Example"
8901  */
8902 XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash);
8903
8904 /*!
8905  * @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t.
8906  *
8907  * @param src The @ref XXH128_canonical_t to convert.
8908  *
8909  * @pre
8910  *   @p src must not be `NULL`.
8911  *
8912  * @return The converted hash.
8913  * @see @ref canonical_representation_example "Canonical Representation Example"
8914  */
8915 XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);
8916
8917
8918 #endif  /* !XXH_NO_XXH3 */
8919 #endif  /* XXH_NO_LONG_LONG */
8920
8921 /*!
8922  * @}
8923  */
8924 #endif /* XXHASH_H_5627135585666179 */
8925
8926
8927
8928 #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
8929 #define XXHASH_H_STATIC_13879238742
8930 /* ****************************************************************************
8931  * This section contains declarations which are not guaranteed to remain stable.
8932  * They may change in future versions, becoming incompatible with a different
8933  * version of the library.
8934  * These declarations should only be used with static linking.
8935  * Never use them in association with dynamic linking!
8936  ***************************************************************************** */
8937
8938 /*
8939  * These definitions are only present to allow static allocation
8940  * of XXH states, on stack or in a struct, for example.
8941  * Never **ever** access their members directly.
8942  */
8943
8944 /*!
8945  * @internal
8946  * @brief Structure for XXH32 streaming API.
8947  *
8948  * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
8949  * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
8950  * an opaque type. This allows fields to safely be changed.
8951  *
8952  * Typedef'd to @ref XXH32_state_t.
8953  * Do not access the members of this struct directly.
8954  * @see XXH64_state_s, XXH3_state_s
8955  */
8956 struct XXH32_state_s {
8957    XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
8958    XXH32_hash_t large_len;    /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
8959    XXH32_hash_t v[4];         /*!< Accumulator lanes */
8960    XXH32_hash_t mem32[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
8961    XXH32_hash_t memsize;      /*!< Amount of data in @ref mem32 */
8962    XXH32_hash_t reserved;     /*!< Reserved field. Do not read nor write to it. */
8963 };   /* typedef'd to XXH32_state_t */
8964
8965
8966 #ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */
8967
8968 /*!
8969  * @internal
8970  * @brief Structure for XXH64 streaming API.
8971  *
8972  * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
8973  * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
8974  * an opaque type. This allows fields to safely be changed.
8975  *
8976  * Typedef'd to @ref XXH64_state_t.
8977  * Do not access the members of this struct directly.
8978  * @see XXH32_state_s, XXH3_state_s
8979  */
8980 struct XXH64_state_s {
8981    XXH64_hash_t total_len;    /*!< Total length hashed. This is always 64-bit. */
8982    XXH64_hash_t v[4];         /*!< Accumulator lanes */
8983    XXH64_hash_t mem64[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
8984    XXH32_hash_t memsize;      /*!< Amount of data in @ref mem64 */
8985    XXH32_hash_t reserved32;   /*!< Reserved field, needed for padding anyways*/
8986    XXH64_hash_t reserved64;   /*!< Reserved field. Do not read or write to it. */
8987 };   /* typedef'd to XXH64_state_t */
8988
8989 #ifndef XXH_NO_XXH3
8990
8991 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
8992 #  include <stdalign.h>
8993 #  define XXH_ALIGN(n)      alignas(n)
8994 #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
8995 /* In C++ alignas() is a keyword */
8996 #  define XXH_ALIGN(n)      alignas(n)
8997 #elif defined(__GNUC__)
8998 #  define XXH_ALIGN(n)      __attribute__ ((aligned(n)))
8999 #elif defined(_MSC_VER)
9000 #  define XXH_ALIGN(n)      __declspec(align(n))
9001 #else
9002 #  define XXH_ALIGN(n)   /* disabled */
9003 #endif
9004
9005 /* Old GCC versions only accept the attribute after the type in structures. */
9006 #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))   /* C11+ */ \
9007     && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \
9008     && defined(__GNUC__)
9009 #   define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
9010 #else
9011 #   define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
9012 #endif
9013
9014 /*!
9015  * @brief The size of the internal XXH3 buffer.
9016  *
9017  * This is the optimal update size for incremental hashing.
9018  *
9019  * @see XXH3_64b_update(), XXH3_128b_update().
9020  */
9021 #define XXH3_INTERNALBUFFER_SIZE 256
9022
9023 /*!
9024  * @internal
9025  * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
9026  *
9027  * This is the size used in @ref XXH3_kSecret and the seeded functions.
9028  *
9029  * Not to be confused with @ref XXH3_SECRET_SIZE_MIN.
9030  */
9031 #define XXH3_SECRET_DEFAULT_SIZE 192
9032
9033 /*!
9034  * @internal
9035  * @brief Structure for XXH3 streaming API.
9036  *
9037  * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
9038  * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.
9039  * Otherwise it is an opaque type.
9040  * Never use this definition in combination with dynamic library.
9041  * This allows fields to safely be changed in the future.
9042  *
9043  * @note ** This structure has a strict alignment requirement of 64 bytes!! **
9044  * Do not allocate this with `malloc()` or `new`,
9045  * it will not be sufficiently aligned.
9046  * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation.
9047  *
9048  * Typedef'd to @ref XXH3_state_t.
9049  * Do never access the members of this struct directly.
9050  *
9051  * @see XXH3_INITSTATE() for stack initialization.
9052  * @see XXH3_createState(), XXH3_freeState().
9053  * @see XXH32_state_s, XXH64_state_s
9054  */
9055 struct XXH3_state_s {
9056    XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
9057        /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */
9058    XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
9059        /*!< Used to store a custom secret generated from a seed. */
9060    XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
9061        /*!< The internal buffer. @see XXH32_state_s::mem32 */
9062    XXH32_hash_t bufferedSize;
9063        /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
9064    XXH32_hash_t useSeed;
9065        /*!< Reserved field. Needed for padding on 64-bit. */
9066    size_t nbStripesSoFar;
9067        /*!< Number or stripes processed. */
9068    XXH64_hash_t totalLen;
9069        /*!< Total length hashed. 64-bit even on 32-bit targets. */
9070    size_t nbStripesPerBlock;
9071        /*!< Number of stripes per block. */
9072    size_t secretLimit;
9073        /*!< Size of @ref customSecret or @ref extSecret */
9074    XXH64_hash_t seed;
9075        /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */
9076    XXH64_hash_t reserved64;
9077        /*!< Reserved field. */
9078    const unsigned char* extSecret;
9079        /*!< Reference to an external secret for the _withSecret variants, NULL
9080         *   for other variants. */
9081    /* note: there may be some padding at the end due to alignment on 64 bytes */
9082 }; /* typedef'd to XXH3_state_t */
9083
9084 #undef XXH_ALIGN_MEMBER
9085
9086 /*!
9087  * @brief Initializes a stack-allocated `XXH3_state_s`.
9088  *
9089  * When the @ref XXH3_state_t structure is merely emplaced on stack,
9090  * it should be initialized with XXH3_INITSTATE() or a memset()
9091  * in case its first reset uses XXH3_NNbits_reset_withSeed().
9092  * This init can be omitted if the first reset uses default or _withSecret mode.
9093  * This operation isn't necessary when the state is created with XXH3_createState().
9094  * Note that this doesn't prepare the state for a streaming operation,
9095  * it's still necessary to use XXH3_NNbits_reset*() afterwards.
9096  */
9097 #define XXH3_INITSTATE(XXH3_state_ptr)                       \
9098     do {                                                     \
9099         XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
9100         tmp_xxh3_state_ptr->seed = 0;                        \
9101         tmp_xxh3_state_ptr->extSecret = NULL;                \
9102     } while(0)
9103
9104
9105 /*!
9106  * @brief Calculates the 128-bit hash of @p data using XXH3.
9107  *
9108  * @param data The block of data to be hashed, at least @p len bytes in size.
9109  * @param len  The length of @p data, in bytes.
9110  * @param seed The 64-bit seed to alter the hash's output predictably.
9111  *
9112  * @pre
9113  *   The memory between @p data and @p data + @p len must be valid,
9114  *   readable, contiguous memory. However, if @p len is `0`, @p data may be
9115  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
9116  *
9117  * @return The calculated 128-bit XXH3 value.
9118  *
9119  * @see @ref single_shot_example "Single Shot Example" for an example.
9120  */
9121 XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
9122
9123
9124 /* ===   Experimental API   === */
9125 /* Symbols defined below must be considered tied to a specific library version. */
9126
9127 /*!
9128  * @brief Derive a high-entropy secret from any user-defined content, named customSeed.
9129  *
9130  * @param secretBuffer    A writable buffer for derived high-entropy secret data.
9131  * @param secretSize      Size of secretBuffer, in bytes.  Must be >= XXH3_SECRET_DEFAULT_SIZE.
9132  * @param customSeed      A user-defined content.
9133  * @param customSeedSize  Size of customSeed, in bytes.
9134  *
9135  * @return @ref XXH_OK on success.
9136  * @return @ref XXH_ERROR on failure.
9137  *
9138  * The generated secret can be used in combination with `*_withSecret()` functions.
9139  * The `_withSecret()` variants are useful to provide a higher level of protection
9140  * than 64-bit seed, as it becomes much more difficult for an external actor to
9141  * guess how to impact the calculation logic.
9142  *
9143  * The function accepts as input a custom seed of any length and any content,
9144  * and derives from it a high-entropy secret of length @p secretSize into an
9145  * already allocated buffer @p secretBuffer.
9146  *
9147  * The generated secret can then be used with any `*_withSecret()` variant.
9148  * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),
9149  * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()
9150  * are part of this list. They all accept a `secret` parameter
9151  * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)
9152  * _and_ feature very high entropy (consist of random-looking bytes).
9153  * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can
9154  * be employed to ensure proper quality.
9155  *
9156  * @p customSeed can be anything. It can have any size, even small ones,
9157  * and its content can be anything, even "poor entropy" sources such as a bunch
9158  * of zeroes. The resulting `secret` will nonetheless provide all required qualities.
9159  *
9160  * @pre
9161  *   - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN
9162  *   - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
9163  *
9164  * Example code:
9165  * @code{.c}
9166  *    #include <stdio.h>
9167  *    #include <stdlib.h>
9168  *    #include <string.h>
9169  *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
9170  *    #include "xxhash.h"
9171  *    // Hashes argv[2] using the entropy from argv[1].
9172  *    int main(int argc, char* argv[])
9173  *    {
9174  *        char secret[XXH3_SECRET_SIZE_MIN];
9175  *        if (argv != 3) { return 1; }
9176  *        XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));
9177  *        XXH64_hash_t h = XXH3_64bits_withSecret(
9178  *             argv[2], strlen(argv[2]),
9179  *             secret, sizeof(secret)
9180  *        );
9181  *        printf("%016llx\n", (unsigned long long) h);
9182  *    }
9183  * @endcode
9184  */
9185 XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
9186
9187 /*!
9188  * @brief Generate the same secret as the _withSeed() variants.
9189  *
9190  * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
9191  * @param seed         The 64-bit seed to alter the hash result predictably.
9192  *
9193  * The generated secret can be used in combination with
9194  *`*_withSecret()` and `_withSecretandSeed()` variants.
9195  *
9196  * Example C++ `std::string` hash class:
9197  * @code{.cpp}
9198  *    #include <string>
9199  *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
9200  *    #include "xxhash.h"
9201  *    // Slow, seeds each time
9202  *    class HashSlow {
9203  *        XXH64_hash_t seed;
9204  *    public:
9205  *        HashSlow(XXH64_hash_t s) : seed{s} {}
9206  *        size_t operator()(const std::string& x) const {
9207  *            return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};
9208  *        }
9209  *    };
9210  *    // Fast, caches the seeded secret for future uses.
9211  *    class HashFast {
9212  *        unsigned char secret[XXH3_SECRET_SIZE_MIN];
9213  *    public:
9214  *        HashFast(XXH64_hash_t s) {
9215  *            XXH3_generateSecret_fromSeed(secret, seed);
9216  *        }
9217  *        size_t operator()(const std::string& x) const {
9218  *            return size_t{
9219  *                XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))
9220  *            };
9221  *        }
9222  *    };
9223  * @endcode
9224  */
9225 XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
9226
9227 /*!
9228  * @brief Calculates 64/128-bit seeded variant of XXH3 hash of @p data.
9229  *
9230  * @param data       The block of data to be hashed, at least @p len bytes in size.
9231  * @param len        The length of @p data, in bytes.
9232  * @param secret     The secret data.
9233  * @param secretSize The length of @p secret, in bytes.
9234  * @param seed       The 64-bit seed to alter the hash result predictably.
9235  *
9236  * These variants generate hash values using either
9237  * @p seed for "short" keys (< @ref XXH3_MIDSIZE_MAX = 240 bytes)
9238  * or @p secret for "large" keys (>= @ref XXH3_MIDSIZE_MAX).
9239  *
9240  * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
9241  * `_withSeed()` has to generate the secret on the fly for "large" keys.
9242  * It's fast, but can be perceptible for "not so large" keys (< 1 KB).
9243  * `_withSecret()` has to generate the masks on the fly for "small" keys,
9244  * which requires more instructions than _withSeed() variants.
9245  * Therefore, _withSecretandSeed variant combines the best of both worlds.
9246  *
9247  * When @p secret has been generated by XXH3_generateSecret_fromSeed(),
9248  * this variant produces *exactly* the same results as `_withSeed()` variant,
9249  * hence offering only a pure speed benefit on "large" input,
9250  * by skipping the need to regenerate the secret for every large input.
9251  *
9252  * Another usage scenario is to hash the secret to a 64-bit hash value,
9253  * for example with XXH3_64bits(), which then becomes the seed,
9254  * and then employ both the seed and the secret in _withSecretandSeed().
9255  * On top of speed, an added benefit is that each bit in the secret
9256  * has a 50% chance to swap each bit in the output, via its impact to the seed.
9257  *
9258  * This is not guaranteed when using the secret directly in "small data" scenarios,
9259  * because only portions of the secret are employed for small data.
9260  */
9261 XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
9262 XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
9263                               XXH_NOESCAPE const void* secret, size_t secretSize,
9264                               XXH64_hash_t seed);
9265 /*!
9266  * @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
9267  *
9268  * @param input      The block of data to be hashed, at least @p len bytes in size.
9269  * @param length     The length of @p data, in bytes.
9270  * @param secret     The secret data.
9271  * @param secretSize The length of @p secret, in bytes.
9272  * @param seed64     The 64-bit seed to alter the hash result predictably.
9273  *
9274  * @return @ref XXH_OK on success.
9275  * @return @ref XXH_ERROR on failure.
9276  *
9277  * @see XXH3_64bits_withSecretandSeed()
9278  */
9279 XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
9280 XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
9281                                XXH_NOESCAPE const void* secret, size_t secretSize,
9282                                XXH64_hash_t seed64);
9283 #ifndef XXH_NO_STREAM
9284 /*!
9285  * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
9286  *
9287  * @param statePtr   A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
9288  * @param secret     The secret data.
9289  * @param secretSize The length of @p secret, in bytes.
9290  * @param seed64     The 64-bit seed to alter the hash result predictably.
9291  *
9292  * @return @ref XXH_OK on success.
9293  * @return @ref XXH_ERROR on failure.
9294  *
9295  * @see XXH3_64bits_withSecretandSeed()
9296  */
9297 XXH_PUBLIC_API XXH_errorcode
9298 XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
9299                                     XXH_NOESCAPE const void* secret, size_t secretSize,
9300                                     XXH64_hash_t seed64);
9301 /*!
9302  * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
9303  *
9304  * @param statePtr   A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
9305  * @param secret     The secret data.
9306  * @param secretSize The length of @p secret, in bytes.
9307  * @param seed64     The 64-bit seed to alter the hash result predictably.
9308  *
9309  * @return @ref XXH_OK on success.
9310  * @return @ref XXH_ERROR on failure.
9311  *
9312  * @see XXH3_64bits_withSecretandSeed()
9313  */
9314 XXH_PUBLIC_API XXH_errorcode
9315 XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
9316                                      XXH_NOESCAPE const void* secret, size_t secretSize,
9317                                      XXH64_hash_t seed64);
9318 #endif /* !XXH_NO_STREAM */
9319
9320 #endif  /* !XXH_NO_XXH3 */
9321 #endif  /* XXH_NO_LONG_LONG */
9322 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
9323 #  define XXH_IMPLEMENTATION
9324 #endif
9325
9326 #endif  /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
9327
9328
9329 /* ======================================================================== */
9330 /* ======================================================================== */
9331 /* ======================================================================== */
9332
9333
9334 /*-**********************************************************************
9335  * xxHash implementation
9336  *-**********************************************************************
9337  * xxHash's implementation used to be hosted inside xxhash.c.
9338  *
9339  * However, inlining requires implementation to be visible to the compiler,
9340  * hence be included alongside the header.
9341  * Previously, implementation was hosted inside xxhash.c,
9342  * which was then #included when inlining was activated.
9343  * This construction created issues with a few build and install systems,
9344  * as it required xxhash.c to be stored in /include directory.
9345  *
9346  * xxHash implementation is now directly integrated within xxhash.h.
9347  * As a consequence, xxhash.c is no longer needed in /include.
9348  *
9349  * xxhash.c is still available and is still useful.
9350  * In a "normal" setup, when xxhash is not inlined,
9351  * xxhash.h only exposes the prototypes and public symbols,
9352  * while xxhash.c can be built into an object file xxhash.o
9353  * which can then be linked into the final binary.
9354  ************************************************************************/
9355
9356 #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
9357    || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
9358 #  define XXH_IMPLEM_13a8737387
9359
9360 /* *************************************
9361 *  Tuning parameters
9362 ***************************************/
9363
9364 /*!
9365  * @defgroup tuning Tuning parameters
9366  * @{
9367  *
9368  * Various macros to control xxHash's behavior.
9369  */
9370 #ifdef XXH_DOXYGEN
9371 /*!
9372  * @brief Define this to disable 64-bit code.
9373  *
9374  * Useful if only using the @ref XXH32_family and you have a strict C90 compiler.
9375  */
9376 #  define XXH_NO_LONG_LONG
9377 #  undef XXH_NO_LONG_LONG /* don't actually */
9378 /*!
9379  * @brief Controls how unaligned memory is accessed.
9380  *
9381  * By default, access to unaligned memory is controlled by `memcpy()`, which is
9382  * safe and portable.
9383  *
9384  * Unfortunately, on some target/compiler combinations, the generated assembly
9385  * is sub-optimal.
9386  *
9387  * The below switch allow selection of a different access method
9388  * in the search for improved performance.
9389  *
9390  * @par Possible options:
9391  *
9392  *  - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`
9393  *   @par
9394  *     Use `memcpy()`. Safe and portable. Note that most modern compilers will
9395  *     eliminate the function call and treat it as an unaligned access.
9396  *
9397  *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
9398  *   @par
9399  *     Depends on compiler extensions and is therefore not portable.
9400  *     This method is safe _if_ your compiler supports it,
9401  *     and *generally* as fast or faster than `memcpy`.
9402  *
9403  *  - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast
9404  *  @par
9405  *     Casts directly and dereferences. This method doesn't depend on the
9406  *     compiler, but it violates the C standard as it directly dereferences an
9407  *     unaligned pointer. It can generate buggy code on targets which do not
9408  *     support unaligned memory accesses, but in some circumstances, it's the
9409  *     only known way to get the most performance.
9410  *
9411  *  - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift
9412  *  @par
9413  *     Also portable. This can generate the best code on old compilers which don't
9414  *     inline small `memcpy()` calls, and it might also be faster on big-endian
9415  *     systems which lack a native byteswap instruction. However, some compilers
9416  *     will emit literal byteshifts even if the target supports unaligned access.
9417  *
9418  *
9419  * @warning
9420  *   Methods 1 and 2 rely on implementation-defined behavior. Use these with
9421  *   care, as what works on one compiler/platform/optimization level may cause
9422  *   another to read garbage data or even crash.
9423  *
9424  * See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
9425  *
9426  * Prefer these methods in priority order (0 > 3 > 1 > 2)
9427  */
9428 #  define XXH_FORCE_MEMORY_ACCESS 0
9429
9430 /*!
9431  * @def XXH_SIZE_OPT
9432  * @brief Controls how much xxHash optimizes for size.
9433  *
9434  * xxHash, when compiled, tends to result in a rather large binary size. This
9435  * is mostly due to heavy usage to forced inlining and constant folding of the
9436  * @ref XXH3_family to increase performance.
9437  *
9438  * However, some developers prefer size over speed. This option can
9439  * significantly reduce the size of the generated code. When using the `-Os`
9440  * or `-Oz` options on GCC or Clang, this is defined to 1 by default,
9441  * otherwise it is defined to 0.
9442  *
9443  * Most of these size optimizations can be controlled manually.
9444  *
9445  * This is a number from 0-2.
9446  *  - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
9447  *    comes first.
9448  *  - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
9449  *    conservative and disables hacks that increase code size. It implies the
9450  *    options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
9451  *    and @ref XXH3_NEON_LANES == 8 if they are not already defined.
9452  *  - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
9453  *    Performance may cry. For example, the single shot functions just use the
9454  *    streaming API.
9455  */
9456 #  define XXH_SIZE_OPT 0
9457
9458 /*!
9459  * @def XXH_FORCE_ALIGN_CHECK
9460  * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
9461  * and XXH64() only).
9462  *
9463  * This is an important performance trick for architectures without decent
9464  * unaligned memory access performance.
9465  *
9466  * It checks for input alignment, and when conditions are met, uses a "fast
9467  * path" employing direct 32-bit/64-bit reads, resulting in _dramatically
9468  * faster_ read speed.
9469  *
9470  * The check costs one initial branch per hash, which is generally negligible,
9471  * but not zero.
9472  *
9473  * Moreover, it's not useful to generate an additional code path if memory
9474  * access uses the same instruction for both aligned and unaligned
9475  * addresses (e.g. x86 and aarch64).
9476  *
9477  * In these cases, the alignment check can be removed by setting this macro to 0.
9478  * Then the code will always use unaligned memory access.
9479  * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
9480  * which are platforms known to offer good unaligned memory accesses performance.
9481  *
9482  * It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
9483  *
9484  * This option does not affect XXH3 (only XXH32 and XXH64).
9485  */
9486 #  define XXH_FORCE_ALIGN_CHECK 0
9487
9488 /*!
9489  * @def XXH_NO_INLINE_HINTS
9490  * @brief When non-zero, sets all functions to `static`.
9491  *
9492  * By default, xxHash tries to force the compiler to inline almost all internal
9493  * functions.
9494  *
9495  * This can usually improve performance due to reduced jumping and improved
9496  * constant folding, but significantly increases the size of the binary which
9497  * might not be favorable.
9498  *
9499  * Additionally, sometimes the forced inlining can be detrimental to performance,
9500  * depending on the architecture.
9501  *
9502  * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
9503  * compiler full control on whether to inline or not.
9504  *
9505  * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
9506  * @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
9507  */
9508 #  define XXH_NO_INLINE_HINTS 0
9509
9510 /*!
9511  * @def XXH3_INLINE_SECRET
9512  * @brief Determines whether to inline the XXH3 withSecret code.
9513  *
9514  * When the secret size is known, the compiler can improve the performance
9515  * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret().
9516  *
9517  * However, if the secret size is not known, it doesn't have any benefit. This
9518  * happens when xxHash is compiled into a global symbol. Therefore, if
9519  * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0.
9520  *
9521  * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers
9522  * that are *sometimes* force inline on -Og, and it is impossible to automatically
9523  * detect this optimization level.
9524  */
9525 #  define XXH3_INLINE_SECRET 0
9526
9527 /*!
9528  * @def XXH32_ENDJMP
9529  * @brief Whether to use a jump for `XXH32_finalize`.
9530  *
9531  * For performance, `XXH32_finalize` uses multiple branches in the finalizer.
9532  * This is generally preferable for performance,
9533  * but depending on exact architecture, a jmp may be preferable.
9534  *
9535  * This setting is only possibly making a difference for very small inputs.
9536  */
9537 #  define XXH32_ENDJMP 0
9538
9539 /*!
9540  * @internal
9541  * @brief Redefines old internal names.
9542  *
9543  * For compatibility with code that uses xxHash's internals before the names
9544  * were changed to improve namespacing. There is no other reason to use this.
9545  */
9546 #  define XXH_OLD_NAMES
9547 #  undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
9548
9549 /*!
9550  * @def XXH_NO_STREAM
9551  * @brief Disables the streaming API.
9552  *
9553  * When xxHash is not inlined and the streaming functions are not used, disabling
9554  * the streaming functions can improve code size significantly, especially with
9555  * the @ref XXH3_family which tends to make constant folded copies of itself.
9556  */
9557 #  define XXH_NO_STREAM
9558 #  undef XXH_NO_STREAM /* don't actually */
9559 #endif /* XXH_DOXYGEN */
9560 /*!
9561  * @}
9562  */
9563
9564 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
9565    /* prefer __packed__ structures (method 1) for GCC
9566     * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
9567     * which for some reason does unaligned loads. */
9568 #  if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
9569 #    define XXH_FORCE_MEMORY_ACCESS 1
9570 #  endif
9571 #endif
9572
9573 #ifndef XXH_SIZE_OPT
9574    /* default to 1 for -Os or -Oz */
9575 #  if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
9576 #    define XXH_SIZE_OPT 1
9577 #  else
9578 #    define XXH_SIZE_OPT 0
9579 #  endif
9580 #endif
9581
9582 #ifndef XXH_FORCE_ALIGN_CHECK  /* can be defined externally */
9583    /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
9584 #  if XXH_SIZE_OPT >= 1 || \
9585       defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
9586    || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64)    || defined(_M_ARM) /* visual */
9587 #    define XXH_FORCE_ALIGN_CHECK 0
9588 #  else
9589 #    define XXH_FORCE_ALIGN_CHECK 1
9590 #  endif
9591 #endif
9592
9593 #ifndef XXH_NO_INLINE_HINTS
9594 #  if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__)  /* -O0, -fno-inline */
9595 #    define XXH_NO_INLINE_HINTS 1
9596 #  else
9597 #    define XXH_NO_INLINE_HINTS 0
9598 #  endif
9599 #endif
9600
9601 #ifndef XXH3_INLINE_SECRET
9602 #  if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
9603      || !defined(XXH_INLINE_ALL)
9604 #    define XXH3_INLINE_SECRET 0
9605 #  else
9606 #    define XXH3_INLINE_SECRET 1
9607 #  endif
9608 #endif
9609
9610 #ifndef XXH32_ENDJMP
9611 /* generally preferable for performance */
9612 #  define XXH32_ENDJMP 0
9613 #endif
9614
9615 /*!
9616  * @defgroup impl Implementation
9617  * @{
9618  */
9619
9620
9621 /* *************************************
9622 *  Includes & Memory related functions
9623 ***************************************/
9624 #if defined(XXH_NO_STREAM)
9625 /* nothing */
9626 #elif defined(XXH_NO_STDLIB)
9627
9628 /* When requesting to disable any mention of stdlib,
9629  * the library loses the ability to invoked malloc / free.
9630  * In practice, it means that functions like `XXH*_createState()`
9631  * will always fail, and return NULL.
9632  * This flag is useful in situations where
9633  * xxhash.h is integrated into some kernel, embedded or limited environment
9634  * without access to dynamic allocation.
9635  */
9636
9637 static void XXH_free(void* p) { (void)p; }
9638
9639 #else
9640
9641 /*
9642  * Modify the local functions below should you wish to use
9643  * different memory routines for malloc() and free()
9644  */
9645 #include <stdlib.h>
9646
9647
9648 /*!
9649  * @internal
9650  * @brief Modify this function to use a different routine than free().
9651  */
9652 static void XXH_free(void* p) { VG_(free)(p); }
9653
9654 #endif  /* XXH_NO_STDLIB */
9655
9656 #include <string.h>
9657
9658 /*!
9659  * @internal
9660  * @brief Modify this function to use a different routine than memcpy().
9661  */
9662 static void* XXH_memcpy(void* dest, const void* src, size_t size)
9663 {
9664     return VG_(memcpy)(dest,src,size);
9665 }
9666
9667 #include <limits.h>   /* ULLONG_MAX */
9668
9669
9670 /* *************************************
9671 *  Compiler Specific Options
9672 ***************************************/
9673 #ifdef _MSC_VER /* Visual Studio warning fix */
9674 #  pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
9675 #endif
9676
9677 #if XXH_NO_INLINE_HINTS  /* disable inlining hints */
9678 #  if defined(__GNUC__) || defined(__clang__)
9679 #    define XXH_FORCE_INLINE static __attribute__((unused))
9680 #  else
9681 #    define XXH_FORCE_INLINE static
9682 #  endif
9683 #  define XXH_NO_INLINE static
9684 /* enable inlining hints */
9685 #elif defined(__GNUC__) || defined(__clang__)
9686 #  define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
9687 #  define XXH_NO_INLINE static __attribute__((noinline))
9688 #elif defined(_MSC_VER)  /* Visual Studio */
9689 #  define XXH_FORCE_INLINE static __forceinline
9690 #  define XXH_NO_INLINE static __declspec(noinline)
9691 #elif defined (__cplusplus) \
9692   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* C99 */
9693 #  define XXH_FORCE_INLINE static inline
9694 #  define XXH_NO_INLINE static
9695 #else
9696 #  define XXH_FORCE_INLINE static
9697 #  define XXH_NO_INLINE static
9698 #endif
9699
9700 #if XXH3_INLINE_SECRET
9701 #  define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
9702 #else
9703 #  define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
9704 #endif
9705
9706
9707 /* *************************************
9708 *  Debug
9709 ***************************************/
9710 /*!
9711  * @ingroup tuning
9712  * @def XXH_DEBUGLEVEL
9713  * @brief Sets the debugging level.
9714  *
9715  * XXH_DEBUGLEVEL is expected to be defined externally, typically via the
9716  * compiler's command line options. The value must be a number.
9717  */
9718 #ifndef XXH_DEBUGLEVEL
9719 #  ifdef DEBUGLEVEL /* backwards compat */
9720 #    define XXH_DEBUGLEVEL DEBUGLEVEL
9721 #  else
9722 #    define XXH_DEBUGLEVEL 0
9723 #  endif
9724 #endif
9725
9726 #if (XXH_DEBUGLEVEL>=1)
9727 #  include <assert.h>   /* note: can still be disabled with NDEBUG */
9728 #  define XXH_ASSERT(c)   assert(c)
9729 #else
9730 #  if defined(__INTEL_COMPILER)
9731 #    define XXH_ASSERT(c)   XXH_ASSUME((unsigned char) (c))
9732 #  else
9733 #    define XXH_ASSERT(c)   XXH_ASSUME(c)
9734 #  endif
9735 #endif
9736
9737 /* note: use after variable declarations */
9738 #ifndef XXH_STATIC_ASSERT
9739 #  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)    /* C11 */
9740 #    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
9741 #  elif defined(__cplusplus) && (__cplusplus >= 201103L)            /* C++11 */
9742 #    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
9743 #  else
9744 #    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
9745 #  endif
9746 #  define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
9747 #endif
9748
9749 /*!
9750  * @internal
9751  * @def XXH_COMPILER_GUARD(var)
9752  * @brief Used to prevent unwanted optimizations for @p var.
9753  *
9754  * It uses an empty GCC inline assembly statement with a register constraint
9755  * which forces @p var into a general purpose register (eg eax, ebx, ecx
9756  * on x86) and marks it as modified.
9757  *
9758  * This is used in a few places to avoid unwanted autovectorization (e.g.
9759  * XXH32_round()). All vectorization we want is explicit via intrinsics,
9760  * and _usually_ isn't wanted elsewhere.
9761  *
9762  * We also use it to prevent unwanted constant folding for AArch64 in
9763  * XXH3_initCustomSecret_scalar().
9764  */
9765 #if defined(__GNUC__) || defined(__clang__)
9766 #  define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
9767 #else
9768 #  define XXH_COMPILER_GUARD(var) ((void)0)
9769 #endif
9770
9771 /* Specifically for NEON vectors which use the "w" constraint, on
9772  * Clang. */
9773 #if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__)
9774 #  define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w" (var))
9775 #else
9776 #  define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0)
9777 #endif
9778
9779 /* *************************************
9780 *  Basic Types
9781 ***************************************/
9782 #if !defined (__VMS) \
9783  && (defined (__cplusplus) \
9784  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
9785 # ifdef _AIX
9786 #   include <inttypes.h>
9787 # else
9788 #   include <stdint.h>
9789 # endif
9790   typedef uint8_t xxh_u8;
9791 #else
9792   typedef unsigned char xxh_u8;
9793 #endif
9794 typedef XXH32_hash_t xxh_u32;
9795
9796 #ifdef XXH_OLD_NAMES
9797 #  warning "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly"
9798 #  define BYTE xxh_u8
9799 #  define U8   xxh_u8
9800 #  define U32  xxh_u32
9801 #endif
9802
9803 /* ***   Memory access   *** */
9804
9805 /*!
9806  * @internal
9807  * @fn xxh_u32 XXH_read32(const void* ptr)
9808  * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.
9809  *
9810  * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
9811  *
9812  * @param ptr The pointer to read from.
9813  * @return The 32-bit native endian integer from the bytes at @p ptr.
9814  */
9815
9816 /*!
9817  * @internal
9818  * @fn xxh_u32 XXH_readLE32(const void* ptr)
9819  * @brief Reads an unaligned 32-bit little endian integer from @p ptr.
9820  *
9821  * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
9822  *
9823  * @param ptr The pointer to read from.
9824  * @return The 32-bit little endian integer from the bytes at @p ptr.
9825  */
9826
9827 /*!
9828  * @internal
9829  * @fn xxh_u32 XXH_readBE32(const void* ptr)
9830  * @brief Reads an unaligned 32-bit big endian integer from @p ptr.
9831  *
9832  * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
9833  *
9834  * @param ptr The pointer to read from.
9835  * @return The 32-bit big endian integer from the bytes at @p ptr.
9836  */
9837
9838 /*!
9839  * @internal
9840  * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)
9841  * @brief Like @ref XXH_readLE32(), but has an option for aligned reads.
9842  *
9843  * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
9844  * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is
9845  * always @ref XXH_alignment::XXH_unaligned.
9846  *
9847  * @param ptr The pointer to read from.
9848  * @param align Whether @p ptr is aligned.
9849  * @pre
9850  *   If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte
9851  *   aligned.
9852  * @return The 32-bit little endian integer from the bytes at @p ptr.
9853  */
9854
9855 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
9856 /*
9857  * Manual byteshift. Best for old compilers which don't inline memcpy.
9858  * We actually directly use XXH_readLE32 and XXH_readBE32.
9859  */
9860 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
9861
9862 /*
9863  * Force direct memory access. Only works on CPU which support unaligned memory
9864  * access in hardware.
9865  */
9866 static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
9867
9868 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
9869
9870 /*
9871  * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
9872  * documentation claimed that it only increased the alignment, but actually it
9873  * can decrease it on gcc, clang, and icc:
9874  * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
9875  * https://gcc.godbolt.org/z/xYez1j67Y.
9876  */
9877 #ifdef XXH_OLD_NAMES
9878 typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
9879 #endif
9880 static xxh_u32 XXH_read32(const void* ptr)
9881 {
9882     typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
9883     return *((const xxh_unalign32*)ptr);
9884 }
9885
9886 #else
9887
9888 /*
9889  * Portable and safe solution. Generally efficient.
9890  * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
9891  */
9892 static xxh_u32 XXH_read32(const void* memPtr)
9893 {
9894     xxh_u32 val;
9895     XXH_memcpy(&val, memPtr, sizeof(val));
9896     return val;
9897 }
9898
9899 #endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
9900
9901
9902 /* ***   Endianness   *** */
9903
9904 /*!
9905  * @ingroup tuning
9906  * @def XXH_CPU_LITTLE_ENDIAN
9907  * @brief Whether the target is little endian.
9908  *
9909  * Defined to 1 if the target is little endian, or 0 if it is big endian.
9910  * It can be defined externally, for example on the compiler command line.
9911  *
9912  * If it is not defined,
9913  * a runtime check (which is usually constant folded) is used instead.
9914  *
9915  * @note
9916  *   This is not necessarily defined to an integer constant.
9917  *
9918  * @see XXH_isLittleEndian() for the runtime check.
9919  */
9920 #ifndef XXH_CPU_LITTLE_ENDIAN
9921 /*
9922  * Try to detect endianness automatically, to avoid the nonstandard behavior
9923  * in `XXH_isLittleEndian()`
9924  */
9925 #  if defined(_WIN32) /* Windows is always little endian */ \
9926      || defined(__LITTLE_ENDIAN__) \
9927      || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
9928 #    define XXH_CPU_LITTLE_ENDIAN 1
9929 #  elif defined(__BIG_ENDIAN__) \
9930      || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
9931 #    define XXH_CPU_LITTLE_ENDIAN 0
9932 #  else
9933 /*!
9934  * @internal
9935  * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.
9936  *
9937  * Most compilers will constant fold this.
9938  */
9939 static int XXH_isLittleEndian(void)
9940 {
9941     /*
9942      * Portable and well-defined behavior.
9943      * Don't use static: it is detrimental to performance.
9944      */
9945     const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };
9946     return one.c[0];
9947 }
9948 #   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
9949 #  endif
9950 #endif
9951
9952
9953
9954
9955 /* ****************************************
9956 *  Compiler-specific Functions and Macros
9957 ******************************************/
9958 #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
9959
9960 #ifdef __has_builtin
9961 #  define XXH_HAS_BUILTIN(x) __has_builtin(x)
9962 #else
9963 #  define XXH_HAS_BUILTIN(x) 0
9964 #endif
9965
9966
9967
9968 /*
9969  * C23 and future versions have standard "unreachable()".
9970  * Once it has been implemented reliably we can add it as an
9971  * additional case:
9972  *
9973  * ```
9974  * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN)
9975  * #  include <stddef.h>
9976  * #  ifdef unreachable
9977  * #    define XXH_UNREACHABLE() unreachable()
9978  * #  endif
9979  * #endif
9980  * ```
9981  *
9982  * Note C++23 also has std::unreachable() which can be detected
9983  * as follows:
9984  * ```
9985  * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
9986  * #  include <utility>
9987  * #  define XXH_UNREACHABLE() std::unreachable()
9988  * #endif
9989  * ```
9990  * NB: `__cpp_lib_unreachable` is defined in the `<version>` header.
9991  * We don't use that as including `<utility>` in `extern "C"` blocks
9992  * doesn't work on GCC12
9993  */
9994
9995 #if XXH_HAS_BUILTIN(__builtin_unreachable)
9996 #  define XXH_UNREACHABLE() __builtin_unreachable()
9997
9998 #elif defined(_MSC_VER)
9999 #  define XXH_UNREACHABLE() __assume(0)
10000
10001 #else
10002 #  define XXH_UNREACHABLE()
10003 #endif
10004
10005 #if XXH_HAS_BUILTIN(__builtin_assume)
10006 #  define XXH_ASSUME(c) __builtin_assume(c)
10007 #else
10008 #  define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
10009 #endif
10010
10011 /*!
10012  * @internal
10013  * @def XXH_rotl32(x,r)
10014  * @brief 32-bit rotate left.
10015  *
10016  * @param x The 32-bit integer to be rotated.
10017  * @param r The number of bits to rotate.
10018  * @pre
10019  *   @p r > 0 && @p r < 32
10020  * @note
10021  *   @p x and @p r may be evaluated multiple times.
10022  * @return The rotated result.
10023  */
10024 #if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
10025                                && XXH_HAS_BUILTIN(__builtin_rotateleft64)
10026 #  define XXH_rotl32 __builtin_rotateleft32
10027 #  define XXH_rotl64 __builtin_rotateleft64
10028 /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
10029 #elif defined(_MSC_VER)
10030 #  define XXH_rotl32(x,r) _rotl(x,r)
10031 #  define XXH_rotl64(x,r) _rotl64(x,r)
10032 #else
10033 #  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
10034 #  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
10035 #endif
10036
10037 /*!
10038  * @internal
10039  * @fn xxh_u32 XXH_swap32(xxh_u32 x)
10040  * @brief A 32-bit byteswap.
10041  *
10042  * @param x The 32-bit integer to byteswap.
10043  * @return @p x, byteswapped.
10044  */
10045 #if defined(_MSC_VER)     /* Visual Studio */
10046 #  define XXH_swap32 _byteswap_ulong
10047 #elif XXH_GCC_VERSION >= 403
10048 #  define XXH_swap32 __builtin_bswap32
10049 #else
10050 static xxh_u32 XXH_swap32 (xxh_u32 x)
10051 {
10052     return  ((x << 24) & 0xff000000 ) |
10053             ((x <<  8) & 0x00ff0000 ) |
10054             ((x >>  8) & 0x0000ff00 ) |
10055             ((x >> 24) & 0x000000ff );
10056 }
10057 #endif
10058
10059
10060 /* ***************************
10061 *  Memory reads
10062 *****************************/
10063
10064 /*!
10065  * @internal
10066  * @brief Enum to indicate whether a pointer is aligned.
10067  */
10068 typedef enum {
10069     XXH_aligned,  /*!< Aligned */
10070     XXH_unaligned /*!< Possibly unaligned */
10071 } XXH_alignment;
10072
10073 /*
10074  * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
10075  *
10076  * This is ideal for older compilers which don't inline memcpy.
10077  */
10078 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
10079
10080 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
10081 {
10082     const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
10083     return bytePtr[0]
10084          | ((xxh_u32)bytePtr[1] << 8)
10085          | ((xxh_u32)bytePtr[2] << 16)
10086          | ((xxh_u32)bytePtr[3] << 24);
10087 }
10088
10089 XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)
10090 {
10091     const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
10092     return bytePtr[3]
10093          | ((xxh_u32)bytePtr[2] << 8)
10094          | ((xxh_u32)bytePtr[1] << 16)
10095          | ((xxh_u32)bytePtr[0] << 24);
10096 }
10097
10098 #else
10099 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
10100 {
10101     return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
10102 }
10103
10104 static xxh_u32 XXH_readBE32(const void* ptr)
10105 {
10106     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
10107 }
10108 #endif
10109
10110 XXH_FORCE_INLINE xxh_u32
10111 XXH_readLE32_align(const void* ptr, XXH_alignment align)
10112 {
10113     if (align==XXH_unaligned) {
10114         return XXH_readLE32(ptr);
10115     } else {
10116         return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
10117     }
10118 }
10119
10120
10121 /* *************************************
10122 *  Misc
10123 ***************************************/
10124 /*! @ingroup public */
10125 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
10126
10127
10128 /* *******************************************************************
10129 *  32-bit hash functions
10130 *********************************************************************/
10131 /*!
10132  * @}
10133  * @defgroup XXH32_impl XXH32 implementation
10134  * @ingroup impl
10135  *
10136  * Details on the XXH32 implementation.
10137  * @{
10138  */
10139  /* #define instead of static const, to be used as initializers */
10140 #define XXH_PRIME32_1  0x9E3779B1U  /*!< 0b10011110001101110111100110110001 */
10141 #define XXH_PRIME32_2  0x85EBCA77U  /*!< 0b10000101111010111100101001110111 */
10142 #define XXH_PRIME32_3  0xC2B2AE3DU  /*!< 0b11000010101100101010111000111101 */
10143 #define XXH_PRIME32_4  0x27D4EB2FU  /*!< 0b00100111110101001110101100101111 */
10144 #define XXH_PRIME32_5  0x165667B1U  /*!< 0b00010110010101100110011110110001 */
10145
10146 #ifdef XXH_OLD_NAMES
10147 #  define PRIME32_1 XXH_PRIME32_1
10148 #  define PRIME32_2 XXH_PRIME32_2
10149 #  define PRIME32_3 XXH_PRIME32_3
10150 #  define PRIME32_4 XXH_PRIME32_4
10151 #  define PRIME32_5 XXH_PRIME32_5
10152 #endif
10153
10154 /*!
10155  * @internal
10156  * @brief Normal stripe processing routine.
10157  *
10158  * This shuffles the bits so that any bit from @p input impacts several bits in
10159  * @p acc.
10160  *
10161  * @param acc The accumulator lane.
10162  * @param input The stripe of input to mix.
10163  * @return The mixed accumulator lane.
10164  */
10165 static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
10166 {
10167     acc += input * XXH_PRIME32_2;
10168     acc  = XXH_rotl32(acc, 13);
10169     acc *= XXH_PRIME32_1;
10170 #if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
10171     /*
10172      * UGLY HACK:
10173      * A compiler fence is the only thing that prevents GCC and Clang from
10174      * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
10175      * reason) without globally disabling SSE4.1.
10176      *
10177      * The reason we want to avoid vectorization is because despite working on
10178      * 4 integers at a time, there are multiple factors slowing XXH32 down on
10179      * SSE4:
10180      * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
10181      *   newer chips!) making it slightly slower to multiply four integers at
10182      *   once compared to four integers independently. Even when pmulld was
10183      *   fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
10184      *   just to multiply unless doing a long operation.
10185      *
10186      * - Four instructions are required to rotate,
10187      *      movqda tmp,  v // not required with VEX encoding
10188      *      pslld  tmp, 13 // tmp <<= 13
10189      *      psrld  v,   19 // x >>= 19
10190      *      por    v,  tmp // x |= tmp
10191      *   compared to one for scalar:
10192      *      roll   v, 13    // reliably fast across the board
10193      *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
10194      *
10195      * - Instruction level parallelism is actually more beneficial here because
10196      *   the SIMD actually serializes this operation: While v1 is rotating, v2
10197      *   can load data, while v3 can multiply. SSE forces them to operate
10198      *   together.
10199      *
10200      * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing
10201      * the loop. NEON is only faster on the A53, and with the newer cores, it is less
10202      * than half the speed.
10203      *
10204      * Additionally, this is used on WASM SIMD128 because it JITs to the same
10205      * SIMD instructions and has the same issue.
10206      */
10207     XXH_COMPILER_GUARD(acc);
10208 #endif
10209     return acc;
10210 }
10211
10212 /*!
10213  * @internal
10214  * @brief Mixes all bits to finalize the hash.
10215  *
10216  * The final mix ensures that all input bits have a chance to impact any bit in
10217  * the output digest, resulting in an unbiased distribution.
10218  *
10219  * @param hash The hash to avalanche.
10220  * @return The avalanched hash.
10221  */
10222 static xxh_u32 XXH32_avalanche(xxh_u32 hash)
10223 {
10224     hash ^= hash >> 15;
10225     hash *= XXH_PRIME32_2;
10226     hash ^= hash >> 13;
10227     hash *= XXH_PRIME32_3;
10228     hash ^= hash >> 16;
10229     return hash;
10230 }
10231
10232 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
10233
10234 /*!
10235  * @internal
10236  * @brief Processes the last 0-15 bytes of @p ptr.
10237  *
10238  * There may be up to 15 bytes remaining to consume from the input.
10239  * This final stage will digest them to ensure that all input bytes are present
10240  * in the final mix.
10241  *
10242  * @param hash The hash to finalize.
10243  * @param ptr The pointer to the remaining input.
10244  * @param len The remaining length, modulo 16.
10245  * @param align Whether @p ptr is aligned.
10246  * @return The finalized hash.
10247  * @see XXH64_finalize().
10248  */
10249 static XXH_PUREF xxh_u32
10250 XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
10251 {
10252 #define XXH_PROCESS1 do {                             \
10253     hash += (*ptr++) * XXH_PRIME32_5;                 \
10254     hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1;      \
10255 } while (0)
10256
10257 #define XXH_PROCESS4 do {                             \
10258     hash += XXH_get32bits(ptr) * XXH_PRIME32_3;       \
10259     ptr += 4;                                         \
10260     hash  = XXH_rotl32(hash, 17) * XXH_PRIME32_4;     \
10261 } while (0)
10262
10263     if (ptr==NULL) XXH_ASSERT(len == 0);
10264
10265     /* Compact rerolled version; generally faster */
10266     if (!XXH32_ENDJMP) {
10267         len &= 15;
10268         while (len >= 4) {
10269             XXH_PROCESS4;
10270             len -= 4;
10271         }
10272         while (len > 0) {
10273             XXH_PROCESS1;
10274             --len;
10275         }
10276         return XXH32_avalanche(hash);
10277     } else {
10278          switch(len&15) /* or switch(bEnd - p) */ {
10279            case 12:      XXH_PROCESS4;
10280                          XXH_FALLTHROUGH;  /* fallthrough */
10281            case 8:       XXH_PROCESS4;
10282                          XXH_FALLTHROUGH;  /* fallthrough */
10283            case 4:       XXH_PROCESS4;
10284                          return XXH32_avalanche(hash);
10285
10286            case 13:      XXH_PROCESS4;
10287                          XXH_FALLTHROUGH;  /* fallthrough */
10288            case 9:       XXH_PROCESS4;
10289                          XXH_FALLTHROUGH;  /* fallthrough */
10290            case 5:       XXH_PROCESS4;
10291                          XXH_PROCESS1;
10292                          return XXH32_avalanche(hash);
10293
10294            case 14:      XXH_PROCESS4;
10295                          XXH_FALLTHROUGH;  /* fallthrough */
10296            case 10:      XXH_PROCESS4;
10297                          XXH_FALLTHROUGH;  /* fallthrough */
10298            case 6:       XXH_PROCESS4;
10299                          XXH_PROCESS1;
10300                          XXH_PROCESS1;
10301                          return XXH32_avalanche(hash);
10302
10303            case 15:      XXH_PROCESS4;
10304                          XXH_FALLTHROUGH;  /* fallthrough */
10305            case 11:      XXH_PROCESS4;
10306                          XXH_FALLTHROUGH;  /* fallthrough */
10307            case 7:       XXH_PROCESS4;
10308                          XXH_FALLTHROUGH;  /* fallthrough */
10309            case 3:       XXH_PROCESS1;
10310                          XXH_FALLTHROUGH;  /* fallthrough */
10311            case 2:       XXH_PROCESS1;
10312                          XXH_FALLTHROUGH;  /* fallthrough */
10313            case 1:       XXH_PROCESS1;
10314                          XXH_FALLTHROUGH;  /* fallthrough */
10315            case 0:       return XXH32_avalanche(hash);
10316         }
10317         XXH_ASSERT(0);
10318         return hash;   /* reaching this point is deemed impossible */
10319     }
10320 }
10321
10322 #ifdef XXH_OLD_NAMES
10323 #  define PROCESS1 XXH_PROCESS1
10324 #  define PROCESS4 XXH_PROCESS4
10325 #else
10326 #  undef XXH_PROCESS1
10327 #  undef XXH_PROCESS4
10328 #endif
10329
10330 /*!
10331  * @internal
10332  * @brief The implementation for @ref XXH32().
10333  *
10334  * @param input , len , seed Directly passed from @ref XXH32().
10335  * @param align Whether @p input is aligned.
10336  * @return The calculated hash.
10337  */
10338 XXH_FORCE_INLINE XXH_PUREF xxh_u32
10339 XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
10340 {
10341     xxh_u32 h32;
10342
10343     if (input==NULL) XXH_ASSERT(len == 0);
10344
10345     if (len>=16) {
10346         const xxh_u8* const bEnd = input + len;
10347         const xxh_u8* const limit = bEnd - 15;
10348         xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
10349         xxh_u32 v2 = seed + XXH_PRIME32_2;
10350         xxh_u32 v3 = seed + 0;
10351         xxh_u32 v4 = seed - XXH_PRIME32_1;
10352
10353         do {
10354             v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
10355             v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
10356             v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
10357             v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
10358         } while (input < limit);
10359
10360         h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
10361             + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
10362     } else {
10363         h32  = seed + XXH_PRIME32_5;
10364     }
10365
10366     h32 += (xxh_u32)len;
10367
10368     return XXH32_finalize(h32, input, len&15, align);
10369 }
10370
10371 /*! @ingroup XXH32_family */
10372 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
10373 {
10374 #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
10375     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
10376     XXH32_state_t state;
10377     XXH32_reset(&state, seed);
10378     XXH32_update(&state, (const xxh_u8*)input, len);
10379     return XXH32_digest(&state);
10380 #else
10381     if (XXH_FORCE_ALIGN_CHECK) {
10382         if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
10383             return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
10384     }   }
10385
10386     return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
10387 #endif
10388 }
10389
10390
10391
10392 /*******   Hash streaming   *******/
10393 #ifndef XXH_NO_STREAM
10394 /*! @ingroup XXH32_family */
10395 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
10396 {
10397     return (XXH32_state_t*)VG_(malloc)("zstddeclib.XXH32_createState.1", sizeof(XXH32_state_t));
10398 }
10399 /*! @ingroup XXH32_family */
10400 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
10401 {
10402     XXH_free(statePtr);
10403     return XXH_OK;
10404 }
10405
10406 /*! @ingroup XXH32_family */
10407 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
10408 {
10409     XXH_memcpy(dstState, srcState, sizeof(*dstState));
10410 }
10411
10412 /*! @ingroup XXH32_family */
10413 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
10414 {
10415     XXH_ASSERT(statePtr != NULL);
10416     VG_(memset)(statePtr, 0, sizeof(*statePtr));
10417     statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
10418     statePtr->v[1] = seed + XXH_PRIME32_2;
10419     statePtr->v[2] = seed + 0;
10420     statePtr->v[3] = seed - XXH_PRIME32_1;
10421     return XXH_OK;
10422 }
10423
10424
10425 /*! @ingroup XXH32_family */
10426 XXH_PUBLIC_API XXH_errorcode
10427 XXH32_update(XXH32_state_t* state, const void* input, size_t len)
10428 {
10429     if (input==NULL) {
10430         XXH_ASSERT(len == 0);
10431         return XXH_OK;
10432     }
10433
10434     {   const xxh_u8* p = (const xxh_u8*)input;
10435         const xxh_u8* const bEnd = p + len;
10436
10437         state->total_len_32 += (XXH32_hash_t)len;
10438         state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
10439
10440         if (state->memsize + len < 16)  {   /* fill in tmp buffer */
10441             XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
10442             state->memsize += (XXH32_hash_t)len;
10443             return XXH_OK;
10444         }
10445
10446         if (state->memsize) {   /* some data left from previous update */
10447             XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
10448             {   const xxh_u32* p32 = state->mem32;
10449                 state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
10450                 state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
10451                 state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
10452                 state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
10453             }
10454             p += 16-state->memsize;
10455             state->memsize = 0;
10456         }
10457
10458         if (p <= bEnd-16) {
10459             const xxh_u8* const limit = bEnd - 16;
10460
10461             do {
10462                 state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
10463                 state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
10464                 state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
10465                 state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
10466             } while (p<=limit);
10467
10468         }
10469
10470         if (p < bEnd) {
10471             XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
10472             state->memsize = (unsigned)(bEnd-p);
10473         }
10474     }
10475
10476     return XXH_OK;
10477 }
10478
10479
10480 /*! @ingroup XXH32_family */
10481 XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
10482 {
10483     xxh_u32 h32;
10484
10485     if (state->large_len) {
10486         h32 = XXH_rotl32(state->v[0], 1)
10487             + XXH_rotl32(state->v[1], 7)
10488             + XXH_rotl32(state->v[2], 12)
10489             + XXH_rotl32(state->v[3], 18);
10490     } else {
10491         h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
10492     }
10493
10494     h32 += state->total_len_32;
10495
10496     return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
10497 }
10498 #endif /* !XXH_NO_STREAM */
10499
10500 /*******   Canonical representation   *******/
10501
10502 /*! @ingroup XXH32_family */
10503 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
10504 {
10505     XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
10506     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
10507     XXH_memcpy(dst, &hash, sizeof(*dst));
10508 }
10509 /*! @ingroup XXH32_family */
10510 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
10511 {
10512     return XXH_readBE32(src);
10513 }
10514
10515
10516 #ifndef XXH_NO_LONG_LONG
10517
10518 /* *******************************************************************
10519 *  64-bit hash functions
10520 *********************************************************************/
10521 /*!
10522  * @}
10523  * @ingroup impl
10524  * @{
10525  */
10526 /*******   Memory access   *******/
10527
10528 typedef XXH64_hash_t xxh_u64;
10529
10530 #ifdef XXH_OLD_NAMES
10531 #  define U64 xxh_u64
10532 #endif
10533
10534 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
10535 /*
10536  * Manual byteshift. Best for old compilers which don't inline memcpy.
10537  * We actually directly use XXH_readLE64 and XXH_readBE64.
10538  */
10539 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
10540
10541 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
10542 static xxh_u64 XXH_read64(const void* memPtr)
10543 {
10544     return *(const xxh_u64*) memPtr;
10545 }
10546
10547 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
10548
10549 /*
10550  * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
10551  * documentation claimed that it only increased the alignment, but actually it
10552  * can decrease it on gcc, clang, and icc:
10553  * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
10554  * https://gcc.godbolt.org/z/xYez1j67Y.
10555  */
10556 #ifdef XXH_OLD_NAMES
10557 typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
10558 #endif
10559 static xxh_u64 XXH_read64(const void* ptr)
10560 {
10561     typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
10562     return *((const xxh_unalign64*)ptr);
10563 }
10564
10565 #else
10566
10567 /*
10568  * Portable and safe solution. Generally efficient.
10569  * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
10570  */
10571 static xxh_u64 XXH_read64(const void* memPtr)
10572 {
10573     xxh_u64 val;
10574     XXH_memcpy(&val, memPtr, sizeof(val));
10575     return val;
10576 }
10577
10578 #endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
10579
10580 #if defined(_MSC_VER)     /* Visual Studio */
10581 #  define XXH_swap64 _byteswap_uint64
10582 #elif XXH_GCC_VERSION >= 403
10583 #  define XXH_swap64 __builtin_bswap64
10584 #else
10585 static xxh_u64 XXH_swap64(xxh_u64 x)
10586 {
10587     return  ((x << 56) & 0xff00000000000000ULL) |
10588             ((x << 40) & 0x00ff000000000000ULL) |
10589             ((x << 24) & 0x0000ff0000000000ULL) |
10590             ((x << 8)  & 0x000000ff00000000ULL) |
10591             ((x >> 8)  & 0x00000000ff000000ULL) |
10592             ((x >> 24) & 0x0000000000ff0000ULL) |
10593             ((x >> 40) & 0x000000000000ff00ULL) |
10594             ((x >> 56) & 0x00000000000000ffULL);
10595 }
10596 #endif
10597
10598
10599 /* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
10600 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
10601
10602 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
10603 {
10604     const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
10605     return bytePtr[0]
10606          | ((xxh_u64)bytePtr[1] << 8)
10607          | ((xxh_u64)bytePtr[2] << 16)
10608          | ((xxh_u64)bytePtr[3] << 24)
10609          | ((xxh_u64)bytePtr[4] << 32)
10610          | ((xxh_u64)bytePtr[5] << 40)
10611          | ((xxh_u64)bytePtr[6] << 48)
10612          | ((xxh_u64)bytePtr[7] << 56);
10613 }
10614
10615 XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)
10616 {
10617     const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
10618     return bytePtr[7]
10619          | ((xxh_u64)bytePtr[6] << 8)
10620          | ((xxh_u64)bytePtr[5] << 16)
10621          | ((xxh_u64)bytePtr[4] << 24)
10622          | ((xxh_u64)bytePtr[3] << 32)
10623          | ((xxh_u64)bytePtr[2] << 40)
10624          | ((xxh_u64)bytePtr[1] << 48)
10625          | ((xxh_u64)bytePtr[0] << 56);
10626 }
10627
10628 #else
10629 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
10630 {
10631     return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
10632 }
10633
10634 static xxh_u64 XXH_readBE64(const void* ptr)
10635 {
10636     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
10637 }
10638 #endif
10639
10640 XXH_FORCE_INLINE xxh_u64
10641 XXH_readLE64_align(const void* ptr, XXH_alignment align)
10642 {
10643     if (align==XXH_unaligned)
10644         return XXH_readLE64(ptr);
10645     else
10646         return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
10647 }
10648
10649
10650 /*******   xxh64   *******/
10651 /*!
10652  * @}
10653  * @defgroup XXH64_impl XXH64 implementation
10654  * @ingroup impl
10655  *
10656  * Details on the XXH64 implementation.
10657  * @{
10658  */
10659 /* #define rather that static const, to be used as initializers */
10660 #define XXH_PRIME64_1  0x9E3779B185EBCA87ULL  /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
10661 #define XXH_PRIME64_2  0xC2B2AE3D27D4EB4FULL  /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
10662 #define XXH_PRIME64_3  0x165667B19E3779F9ULL  /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
10663 #define XXH_PRIME64_4  0x85EBCA77C2B2AE63ULL  /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
10664 #define XXH_PRIME64_5  0x27D4EB2F165667C5ULL  /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
10665
10666 #ifdef XXH_OLD_NAMES
10667 #  define PRIME64_1 XXH_PRIME64_1
10668 #  define PRIME64_2 XXH_PRIME64_2
10669 #  define PRIME64_3 XXH_PRIME64_3
10670 #  define PRIME64_4 XXH_PRIME64_4
10671 #  define PRIME64_5 XXH_PRIME64_5
10672 #endif
10673
10674 /*! @copydoc XXH32_round */
10675 static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
10676 {
10677     acc += input * XXH_PRIME64_2;
10678     acc  = XXH_rotl64(acc, 31);
10679     acc *= XXH_PRIME64_1;
10680 #if (defined(__AVX512F__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
10681     /*
10682      * DISABLE AUTOVECTORIZATION:
10683      * A compiler fence is used to prevent GCC and Clang from
10684      * autovectorizing the XXH64 loop (pragmas and attributes don't work for some
10685      * reason) without globally disabling AVX512.
10686      *
10687      * Autovectorization of XXH64 tends to be detrimental,
10688      * though the exact outcome may change depending on exact cpu and compiler version.
10689      * For information, it has been reported as detrimental for Skylake-X,
10690      * but possibly beneficial for Zen4.
10691      *
10692      * The default is to disable auto-vectorization,
10693      * but you can select to enable it instead using `XXH_ENABLE_AUTOVECTORIZE` build variable.
10694      */
10695     XXH_COMPILER_GUARD(acc);
10696 #endif
10697     return acc;
10698 }
10699
10700 static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
10701 {
10702     val  = XXH64_round(0, val);
10703     acc ^= val;
10704     acc  = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
10705     return acc;
10706 }
10707
10708 /*! @copydoc XXH32_avalanche */
10709 static xxh_u64 XXH64_avalanche(xxh_u64 hash)
10710 {
10711     hash ^= hash >> 33;
10712     hash *= XXH_PRIME64_2;
10713     hash ^= hash >> 29;
10714     hash *= XXH_PRIME64_3;
10715     hash ^= hash >> 32;
10716     return hash;
10717 }
10718
10719
10720 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
10721
10722 /*!
10723  * @internal
10724  * @brief Processes the last 0-31 bytes of @p ptr.
10725  *
10726  * There may be up to 31 bytes remaining to consume from the input.
10727  * This final stage will digest them to ensure that all input bytes are present
10728  * in the final mix.
10729  *
10730  * @param hash The hash to finalize.
10731  * @param ptr The pointer to the remaining input.
10732  * @param len The remaining length, modulo 32.
10733  * @param align Whether @p ptr is aligned.
10734  * @return The finalized hash
10735  * @see XXH32_finalize().
10736  */
10737 static XXH_PUREF xxh_u64
10738 XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
10739 {
10740     if (ptr==NULL) XXH_ASSERT(len == 0);
10741     len &= 31;
10742     while (len >= 8) {
10743         xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
10744         ptr += 8;
10745         hash ^= k1;
10746         hash  = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
10747         len -= 8;
10748     }
10749     if (len >= 4) {
10750         hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
10751         ptr += 4;
10752         hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
10753         len -= 4;
10754     }
10755     while (len > 0) {
10756         hash ^= (*ptr++) * XXH_PRIME64_5;
10757         hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
10758         --len;
10759     }
10760     return  XXH64_avalanche(hash);
10761 }
10762
10763 #ifdef XXH_OLD_NAMES
10764 #  define PROCESS1_64 XXH_PROCESS1_64
10765 #  define PROCESS4_64 XXH_PROCESS4_64
10766 #  define PROCESS8_64 XXH_PROCESS8_64
10767 #else
10768 #  undef XXH_PROCESS1_64
10769 #  undef XXH_PROCESS4_64
10770 #  undef XXH_PROCESS8_64
10771 #endif
10772
10773 /*!
10774  * @internal
10775  * @brief The implementation for @ref XXH64().
10776  *
10777  * @param input , len , seed Directly passed from @ref XXH64().
10778  * @param align Whether @p input is aligned.
10779  * @return The calculated hash.
10780  */
10781 XXH_FORCE_INLINE XXH_PUREF xxh_u64
10782 XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
10783 {
10784     xxh_u64 h64;
10785     if (input==NULL) XXH_ASSERT(len == 0);
10786
10787     if (len>=32) {
10788         const xxh_u8* const bEnd = input + len;
10789         const xxh_u8* const limit = bEnd - 31;
10790         xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
10791         xxh_u64 v2 = seed + XXH_PRIME64_2;
10792         xxh_u64 v3 = seed + 0;
10793         xxh_u64 v4 = seed - XXH_PRIME64_1;
10794
10795         do {
10796             v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
10797             v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
10798             v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
10799             v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
10800         } while (input<limit);
10801
10802         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
10803         h64 = XXH64_mergeRound(h64, v1);
10804         h64 = XXH64_mergeRound(h64, v2);
10805         h64 = XXH64_mergeRound(h64, v3);
10806         h64 = XXH64_mergeRound(h64, v4);
10807
10808     } else {
10809         h64  = seed + XXH_PRIME64_5;
10810     }
10811
10812     h64 += (xxh_u64) len;
10813
10814     return XXH64_finalize(h64, input, len, align);
10815 }
10816
10817
10818 /*! @ingroup XXH64_family */
10819 XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
10820 {
10821 #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
10822     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
10823     XXH64_state_t state;
10824     XXH64_reset(&state, seed);
10825     XXH64_update(&state, (const xxh_u8*)input, len);
10826     return XXH64_digest(&state);
10827 #else
10828     if (XXH_FORCE_ALIGN_CHECK) {
10829         if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
10830             return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
10831     }   }
10832
10833     return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
10834
10835 #endif
10836 }
10837
10838 /*******   Hash Streaming   *******/
10839 #ifndef XXH_NO_STREAM
10840 /*! @ingroup XXH64_family*/
10841 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
10842 {
10843     return (XXH64_state_t*)VG_(malloc)("zstddeclib.XXH64_createState.1", sizeof(XXH64_state_t));
10844 }
10845 /*! @ingroup XXH64_family */
10846 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
10847 {
10848     XXH_free(statePtr);
10849     return XXH_OK;
10850 }
10851
10852 /*! @ingroup XXH64_family */
10853 XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState)
10854 {
10855     XXH_memcpy(dstState, srcState, sizeof(*dstState));
10856 }
10857
10858 /*! @ingroup XXH64_family */
10859 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed)
10860 {
10861     XXH_ASSERT(statePtr != NULL);
10862     VG_(memset)(statePtr, 0, sizeof(*statePtr));
10863     statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
10864     statePtr->v[1] = seed + XXH_PRIME64_2;
10865     statePtr->v[2] = seed + 0;
10866     statePtr->v[3] = seed - XXH_PRIME64_1;
10867     return XXH_OK;
10868 }
10869
10870 /*! @ingroup XXH64_family */
10871 XXH_PUBLIC_API XXH_errorcode
10872 XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
10873 {
10874     if (input==NULL) {
10875         XXH_ASSERT(len == 0);
10876         return XXH_OK;
10877     }
10878
10879     {   const xxh_u8* p = (const xxh_u8*)input;
10880         const xxh_u8* const bEnd = p + len;
10881
10882         state->total_len += len;
10883
10884         if (state->memsize + len < 32) {  /* fill in tmp buffer */
10885             XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
10886             state->memsize += (xxh_u32)len;
10887             return XXH_OK;
10888         }
10889
10890         if (state->memsize) {   /* tmp buffer is full */
10891             XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
10892             state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
10893             state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
10894             state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
10895             state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
10896             p += 32 - state->memsize;
10897             state->memsize = 0;
10898         }
10899
10900         if (p+32 <= bEnd) {
10901             const xxh_u8* const limit = bEnd - 32;
10902
10903             do {
10904                 state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
10905                 state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
10906                 state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
10907                 state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
10908             } while (p<=limit);
10909
10910         }
10911
10912         if (p < bEnd) {
10913             XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
10914             state->memsize = (unsigned)(bEnd-p);
10915         }
10916     }
10917
10918     return XXH_OK;
10919 }
10920
10921
10922 /*! @ingroup XXH64_family */
10923 XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state)
10924 {
10925     xxh_u64 h64;
10926
10927     if (state->total_len >= 32) {
10928         h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
10929         h64 = XXH64_mergeRound(h64, state->v[0]);
10930         h64 = XXH64_mergeRound(h64, state->v[1]);
10931         h64 = XXH64_mergeRound(h64, state->v[2]);
10932         h64 = XXH64_mergeRound(h64, state->v[3]);
10933     } else {
10934         h64  = state->v[2] /*seed*/ + XXH_PRIME64_5;
10935     }
10936
10937     h64 += (xxh_u64) state->total_len;
10938
10939     return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
10940 }
10941 #endif /* !XXH_NO_STREAM */
10942
10943 /******* Canonical representation   *******/
10944
10945 /*! @ingroup XXH64_family */
10946 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash)
10947 {
10948     XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
10949     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
10950     XXH_memcpy(dst, &hash, sizeof(*dst));
10951 }
10952
10953 /*! @ingroup XXH64_family */
10954 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src)
10955 {
10956     return XXH_readBE64(src);
10957 }
10958
10959 #ifndef XXH_NO_XXH3
10960
10961 /* *********************************************************************
10962 *  XXH3
10963 *  New generation hash designed for speed on small keys and vectorization
10964 ************************************************************************ */
10965 /*!
10966  * @}
10967  * @defgroup XXH3_impl XXH3 implementation
10968  * @ingroup impl
10969  * @{
10970  */
10971
10972 /* ===   Compiler specifics   === */
10973
10974 #if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
10975 #  define XXH_RESTRICT   /* disable */
10976 #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
10977 #  define XXH_RESTRICT   restrict
10978 #elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
10979    || (defined (__clang__)) \
10980    || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
10981    || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
10982 /*
10983  * There are a LOT more compilers that recognize __restrict but this
10984  * covers the major ones.
10985  */
10986 #  define XXH_RESTRICT   __restrict
10987 #else
10988 #  define XXH_RESTRICT   /* disable */
10989 #endif
10990
10991 #if (defined(__GNUC__) && (__GNUC__ >= 3))  \
10992   || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
10993   || defined(__clang__)
10994 #    define XXH_likely(x) __builtin_expect(x, 1)
10995 #    define XXH_unlikely(x) __builtin_expect(x, 0)
10996 #else
10997 #    define XXH_likely(x) (x)
10998 #    define XXH_unlikely(x) (x)
10999 #endif
11000
11001 #ifndef XXH_HAS_INCLUDE
11002 #  ifdef __has_include
11003 /*
11004  * Not defined as XXH_HAS_INCLUDE(x) (function-like) because
11005  * this causes segfaults in Apple Clang 4.2 (on Mac OS X 10.7 Lion)
11006  */
11007 #    define XXH_HAS_INCLUDE __has_include
11008 #  else
11009 #    define XXH_HAS_INCLUDE(x) 0
11010 #  endif
11011 #endif
11012
11013 #if defined(__GNUC__) || defined(__clang__)
11014 #  if defined(__ARM_FEATURE_SVE)
11015 #    include <arm_sve.h>
11016 #  endif
11017 #  if defined(__ARM_NEON__) || defined(__ARM_NEON) \
11018    || (defined(_M_ARM) && _M_ARM >= 7) \
11019    || defined(_M_ARM64) || defined(_M_ARM64EC) \
11020    || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* WASM SIMD128 via SIMDe */
11021 #    define inline __inline__  /* circumvent a clang bug */
11022 #    include <arm_neon.h>
11023 #    undef inline
11024 #  elif defined(__AVX2__)
11025 #    include <immintrin.h>
11026 #  elif defined(__SSE2__)
11027 #    include <emmintrin.h>
11028 #  endif
11029 #endif
11030
11031 #if defined(_MSC_VER)
11032 #  include <intrin.h>
11033 #endif
11034
11035 /*
11036  * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
11037  * remaining a true 64-bit/128-bit hash function.
11038  *
11039  * This is done by prioritizing a subset of 64-bit operations that can be
11040  * emulated without too many steps on the average 32-bit machine.
11041  *
11042  * For example, these two lines seem similar, and run equally fast on 64-bit:
11043  *
11044  *   xxh_u64 x;
11045  *   x ^= (x >> 47); // good
11046  *   x ^= (x >> 13); // bad
11047  *
11048  * However, to a 32-bit machine, there is a major difference.
11049  *
11050  * x ^= (x >> 47) looks like this:
11051  *
11052  *   x.lo ^= (x.hi >> (47 - 32));
11053  *
11054  * while x ^= (x >> 13) looks like this:
11055  *
11056  *   // note: funnel shifts are not usually cheap.
11057  *   x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
11058  *   x.hi ^= (x.hi >> 13);
11059  *
11060  * The first one is significantly faster than the second, simply because the
11061  * shift is larger than 32. This means:
11062  *  - All the bits we need are in the upper 32 bits, so we can ignore the lower
11063  *    32 bits in the shift.
11064  *  - The shift result will always fit in the lower 32 bits, and therefore,
11065  *    we can ignore the upper 32 bits in the xor.
11066  *
11067  * Thanks to this optimization, XXH3 only requires these features to be efficient:
11068  *
11069  *  - Usable unaligned access
11070  *  - A 32-bit or 64-bit ALU
11071  *      - If 32-bit, a decent ADC instruction
11072  *  - A 32 or 64-bit multiply with a 64-bit result
11073  *  - For the 128-bit variant, a decent byteswap helps short inputs.
11074  *
11075  * The first two are already required by XXH32, and almost all 32-bit and 64-bit
11076  * platforms which can run XXH32 can run XXH3 efficiently.
11077  *
11078  * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
11079  * notable exception.
11080  *
11081  * First of all, Thumb-1 lacks support for the UMULL instruction which
11082  * performs the important long multiply. This means numerous __aeabi_lmul
11083  * calls.
11084  *
11085  * Second of all, the 8 functional registers are just not enough.
11086  * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
11087  * Lo registers, and this shuffling results in thousands more MOVs than A32.
11088  *
11089  * A32 and T32 don't have this limitation. They can access all 14 registers,
11090  * do a 32->64 multiply with UMULL, and the flexible operand allowing free
11091  * shifts is helpful, too.
11092  *
11093  * Therefore, we do a quick sanity check.
11094  *
11095  * If compiling Thumb-1 for a target which supports ARM instructions, we will
11096  * emit a warning, as it is not a "sane" platform to compile for.
11097  *
11098  * Usually, if this happens, it is because of an accident and you probably need
11099  * to specify -march, as you likely meant to compile for a newer architecture.
11100  *
11101  * Credit: large sections of the vectorial and asm source code paths
11102  *         have been contributed by @easyaspi314
11103  */
11104 #if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
11105 #   warning "XXH3 is highly inefficient without ARM or Thumb-2."
11106 #endif
11107
11108 /* ==========================================
11109  * Vectorization detection
11110  * ========================================== */
11111
11112 #ifdef XXH_DOXYGEN
11113 /*!
11114  * @ingroup tuning
11115  * @brief Overrides the vectorization implementation chosen for XXH3.
11116  *
11117  * Can be defined to 0 to disable SIMD or any of the values mentioned in
11118  * @ref XXH_VECTOR_TYPE.
11119  *
11120  * If this is not defined, it uses predefined macros to determine the best
11121  * implementation.
11122  */
11123 #  define XXH_VECTOR XXH_SCALAR
11124 /*!
11125  * @ingroup tuning
11126  * @brief Possible values for @ref XXH_VECTOR.
11127  *
11128  * Note that these are actually implemented as macros.
11129  *
11130  * If this is not defined, it is detected automatically.
11131  * internal macro XXH_X86DISPATCH overrides this.
11132  */
11133 enum XXH_VECTOR_TYPE /* fake enum */ {
11134     XXH_SCALAR = 0,  /*!< Portable scalar version */
11135     XXH_SSE2   = 1,  /*!<
11136                       * SSE2 for Pentium 4, Opteron, all x86_64.
11137                       *
11138                       * @note SSE2 is also guaranteed on Windows 10, macOS, and
11139                       * Android x86.
11140                       */
11141     XXH_AVX2   = 2,  /*!< AVX2 for Haswell and Bulldozer */
11142     XXH_AVX512 = 3,  /*!< AVX512 for Skylake and Icelake */
11143     XXH_NEON   = 4,  /*!<
11144                        * NEON for most ARMv7-A, all AArch64, and WASM SIMD128
11145                        * via the SIMDeverywhere polyfill provided with the
11146                        * Emscripten SDK.
11147                        */
11148     XXH_VSX    = 5,  /*!< VSX and ZVector for POWER8/z13 (64-bit) */
11149     XXH_SVE    = 6,  /*!< SVE for some ARMv8-A and ARMv9-A */
11150 };
11151 /*!
11152  * @ingroup tuning
11153  * @brief Selects the minimum alignment for XXH3's accumulators.
11154  *
11155  * When using SIMD, this should match the alignment required for said vector
11156  * type, so, for example, 32 for AVX2.
11157  *
11158  * Default: Auto detected.
11159  */
11160 #  define XXH_ACC_ALIGN 8
11161 #endif
11162
11163 /* Actual definition */
11164 #ifndef XXH_DOXYGEN
11165 #  define XXH_SCALAR 0
11166 #  define XXH_SSE2   1
11167 #  define XXH_AVX2   2
11168 #  define XXH_AVX512 3
11169 #  define XXH_NEON   4
11170 #  define XXH_VSX    5
11171 #  define XXH_SVE    6
11172 #endif
11173
11174 #ifndef XXH_VECTOR    /* can be defined on command line */
11175 #  if defined(__ARM_FEATURE_SVE)
11176 #    define XXH_VECTOR XXH_SVE
11177 #  elif ( \
11178         defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
11179      || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
11180      || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* wasm simd128 via SIMDe */ \
11181    ) && ( \
11182         defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
11183     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
11184    )
11185 #    define XXH_VECTOR XXH_NEON
11186 #  elif defined(__AVX512F__)
11187 #    define XXH_VECTOR XXH_AVX512
11188 #  elif defined(__AVX2__)
11189 #    define XXH_VECTOR XXH_AVX2
11190 #  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
11191 #    define XXH_VECTOR XXH_SSE2
11192 #  elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
11193      || (defined(__s390x__) && defined(__VEC__)) \
11194      && defined(__GNUC__) /* TODO: IBM XL */
11195 #    define XXH_VECTOR XXH_VSX
11196 #  else
11197 #    define XXH_VECTOR XXH_SCALAR
11198 #  endif
11199 #endif
11200
11201 /* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
11202 #if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
11203 #  ifdef _MSC_VER
11204 #    pragma warning(once : 4606)
11205 #  else
11206 #    warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
11207 #  endif
11208 #  undef XXH_VECTOR
11209 #  define XXH_VECTOR XXH_SCALAR
11210 #endif
11211
11212 /*
11213  * Controls the alignment of the accumulator,
11214  * for compatibility with aligned vector loads, which are usually faster.
11215  */
11216 #ifndef XXH_ACC_ALIGN
11217 #  if defined(XXH_X86DISPATCH)
11218 #     define XXH_ACC_ALIGN 64  /* for compatibility with avx512 */
11219 #  elif XXH_VECTOR == XXH_SCALAR  /* scalar */
11220 #     define XXH_ACC_ALIGN 8
11221 #  elif XXH_VECTOR == XXH_SSE2  /* sse2 */
11222 #     define XXH_ACC_ALIGN 16
11223 #  elif XXH_VECTOR == XXH_AVX2  /* avx2 */
11224 #     define XXH_ACC_ALIGN 32
11225 #  elif XXH_VECTOR == XXH_NEON  /* neon */
11226 #     define XXH_ACC_ALIGN 16
11227 #  elif XXH_VECTOR == XXH_VSX   /* vsx */
11228 #     define XXH_ACC_ALIGN 16
11229 #  elif XXH_VECTOR == XXH_AVX512  /* avx512 */
11230 #     define XXH_ACC_ALIGN 64
11231 #  elif XXH_VECTOR == XXH_SVE   /* sve */
11232 #     define XXH_ACC_ALIGN 64
11233 #  endif
11234 #endif
11235
11236 #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
11237     || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
11238 #  define XXH_SEC_ALIGN XXH_ACC_ALIGN
11239 #elif XXH_VECTOR == XXH_SVE
11240 #  define XXH_SEC_ALIGN XXH_ACC_ALIGN
11241 #else
11242 #  define XXH_SEC_ALIGN 8
11243 #endif
11244
11245 #if defined(__GNUC__) || defined(__clang__)
11246 #  define XXH_ALIASING __attribute__((may_alias))
11247 #else
11248 #  define XXH_ALIASING /* nothing */
11249 #endif
11250
11251 /*
11252  * UGLY HACK:
11253  * GCC usually generates the best code with -O3 for xxHash.
11254  *
11255  * However, when targeting AVX2, it is overzealous in its unrolling resulting
11256  * in code roughly 3/4 the speed of Clang.
11257  *
11258  * There are other issues, such as GCC splitting _mm256_loadu_si256 into
11259  * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
11260  * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
11261  *
11262  * That is why when compiling the AVX2 version, it is recommended to use either
11263  *   -O2 -mavx2 -march=haswell
11264  * or
11265  *   -O2 -mavx2 -mno-avx256-split-unaligned-load
11266  * for decent performance, or to use Clang instead.
11267  *
11268  * Fortunately, we can control the first one with a pragma that forces GCC into
11269  * -O2, but the other one we can't control without "failed to inline always
11270  * inline function due to target mismatch" warnings.
11271  */
11272 #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
11273   && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
11274   && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
11275 #  pragma GCC push_options
11276 #  pragma GCC optimize("-O2")
11277 #endif
11278
11279 #if XXH_VECTOR == XXH_NEON
11280
11281 /*
11282  * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3
11283  * optimizes out the entire hashLong loop because of the aliasing violation.
11284  *
11285  * However, GCC is also inefficient at load-store optimization with vld1q/vst1q,
11286  * so the only option is to mark it as aliasing.
11287  */
11288 typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
11289
11290 /*!
11291  * @internal
11292  * @brief `vld1q_u64` but faster and alignment-safe.
11293  *
11294  * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
11295  * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
11296  *
11297  * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
11298  * prohibits load-store optimizations. Therefore, a direct dereference is used.
11299  *
11300  * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
11301  * unaligned load.
11302  */
11303 #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
11304 XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
11305 {
11306     return *(xxh_aliasing_uint64x2_t const *)ptr;
11307 }
11308 #else
11309 XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
11310 {
11311     return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
11312 }
11313 #endif
11314
11315 /*!
11316  * @internal
11317  * @brief `vmlal_u32` on low and high halves of a vector.
11318  *
11319  * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with
11320  * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32`
11321  * with `vmlal_u32`.
11322  */
11323 #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
11324 XXH_FORCE_INLINE uint64x2_t
11325 XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
11326 {
11327     /* Inline assembly is the only way */
11328     __asm__("umlal   %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs));
11329     return acc;
11330 }
11331 XXH_FORCE_INLINE uint64x2_t
11332 XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
11333 {
11334     /* This intrinsic works as expected */
11335     return vmlal_high_u32(acc, lhs, rhs);
11336 }
11337 #else
11338 /* Portable intrinsic versions */
11339 XXH_FORCE_INLINE uint64x2_t
11340 XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
11341 {
11342     return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));
11343 }
11344 /*! @copydoc XXH_vmlal_low_u32
11345  * Assume the compiler converts this to vmlal_high_u32 on aarch64 */
11346 XXH_FORCE_INLINE uint64x2_t
11347 XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
11348 {
11349     return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));
11350 }
11351 #endif
11352
11353 /*!
11354  * @ingroup tuning
11355  * @brief Controls the NEON to scalar ratio for XXH3
11356  *
11357  * This can be set to 2, 4, 6, or 8.
11358  *
11359  * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used.
11360  *
11361  * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those
11362  * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU
11363  * bandwidth.
11364  *
11365  * This is even more noticeable on the more advanced cores like the Cortex-A76 which
11366  * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
11367  *
11368  * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes
11369  * and 2 scalar lanes, which is chosen by default.
11370  *
11371  * This does not apply to Apple processors or 32-bit processors, which run better with
11372  * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes.
11373  *
11374  * This change benefits CPUs with large micro-op buffers without negatively affecting
11375  * most other CPUs:
11376  *
11377  *  | Chipset               | Dispatch type       | NEON only | 6:2 hybrid | Diff. |
11378  *  |:----------------------|:--------------------|----------:|-----------:|------:|
11379  *  | Snapdragon 730 (A76)  | 2 NEON/8 micro-ops  |  8.8 GB/s |  10.1 GB/s |  ~16% |
11380  *  | Snapdragon 835 (A73)  | 2 NEON/3 micro-ops  |  5.1 GB/s |   5.3 GB/s |   ~5% |
11381  *  | Marvell PXA1928 (A53) | In-order dual-issue |  1.9 GB/s |   1.9 GB/s |    0% |
11382  *  | Apple M1              | 4 NEON/8 micro-ops  | 37.3 GB/s |  36.1 GB/s |  ~-3% |
11383  *
11384  * It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
11385  *
11386  * When using WASM SIMD128, if this is 2 or 6, SIMDe will scalarize 2 of the lanes meaning
11387  * it effectively becomes worse 4.
11388  *
11389  * @see XXH3_accumulate_512_neon()
11390  */
11391 # ifndef XXH3_NEON_LANES
11392 #  if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
11393    && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
11394 #   define XXH3_NEON_LANES 6
11395 #  else
11396 #   define XXH3_NEON_LANES XXH_ACC_NB
11397 #  endif
11398 # endif
11399 #endif  /* XXH_VECTOR == XXH_NEON */
11400
11401 /*
11402  * VSX and Z Vector helpers.
11403  *
11404  * This is very messy, and any pull requests to clean this up are welcome.
11405  *
11406  * There are a lot of problems with supporting VSX and s390x, due to
11407  * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
11408  */
11409 #if XXH_VECTOR == XXH_VSX
11410 /* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
11411  * and `pixel`. This is a problem for obvious reasons.
11412  *
11413  * These keywords are unnecessary; the spec literally says they are
11414  * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
11415  * after including the header.
11416  *
11417  * We use pragma push_macro/pop_macro to keep the namespace clean. */
11418 #  pragma push_macro("bool")
11419 #  pragma push_macro("vector")
11420 #  pragma push_macro("pixel")
11421 /* silence potential macro redefined warnings */
11422 #  undef bool
11423 #  undef vector
11424 #  undef pixel
11425
11426 #  if defined(__s390x__)
11427 #    include <s390intrin.h>
11428 #  else
11429 #    include <altivec.h>
11430 #  endif
11431
11432 /* Restore the original macro values, if applicable. */
11433 #  pragma pop_macro("pixel")
11434 #  pragma pop_macro("vector")
11435 #  pragma pop_macro("bool")
11436
11437 typedef __vector unsigned long long xxh_u64x2;
11438 typedef __vector unsigned char xxh_u8x16;
11439 typedef __vector unsigned xxh_u32x4;
11440
11441 /*
11442  * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.
11443  */
11444 typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
11445
11446 # ifndef XXH_VSX_BE
11447 #  if defined(__BIG_ENDIAN__) \
11448   || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
11449 #    define XXH_VSX_BE 1
11450 #  elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
11451 #    warning "-maltivec=be is not recommended. Please use native endianness."
11452 #    define XXH_VSX_BE 1
11453 #  else
11454 #    define XXH_VSX_BE 0
11455 #  endif
11456 # endif /* !defined(XXH_VSX_BE) */
11457
11458 # if XXH_VSX_BE
11459 #  if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
11460 #    define XXH_vec_revb vec_revb
11461 #  else
11462 /*!
11463  * A polyfill for POWER9's vec_revb().
11464  */
11465 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
11466 {
11467     xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
11468                                   0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
11469     return vec_perm(val, val, vByteSwap);
11470 }
11471 #  endif
11472 # endif /* XXH_VSX_BE */
11473
11474 /*!
11475  * Performs an unaligned vector load and byte swaps it on big endian.
11476  */
11477 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
11478 {
11479     xxh_u64x2 ret;
11480     XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
11481 # if XXH_VSX_BE
11482     ret = XXH_vec_revb(ret);
11483 # endif
11484     return ret;
11485 }
11486
11487 /*
11488  * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
11489  *
11490  * These intrinsics weren't added until GCC 8, despite existing for a while,
11491  * and they are endian dependent. Also, their meaning swap depending on version.
11492  * */
11493 # if defined(__s390x__)
11494  /* s390x is always big endian, no issue on this platform */
11495 #  define XXH_vec_mulo vec_mulo
11496 #  define XXH_vec_mule vec_mule
11497 # elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
11498 /* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
11499  /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
11500 #  define XXH_vec_mulo __builtin_altivec_vmulouw
11501 #  define XXH_vec_mule __builtin_altivec_vmuleuw
11502 # else
11503 /* gcc needs inline assembly */
11504 /* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
11505 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
11506 {
11507     xxh_u64x2 result;
11508     __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
11509     return result;
11510 }
11511 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
11512 {
11513     xxh_u64x2 result;
11514     __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
11515     return result;
11516 }
11517 # endif /* XXH_vec_mulo, XXH_vec_mule */
11518 #endif /* XXH_VECTOR == XXH_VSX */
11519
11520 #if XXH_VECTOR == XXH_SVE
11521 #define ACCRND(acc, offset) \
11522 do { \
11523     svuint64_t input_vec = svld1_u64(mask, xinput + offset);         \
11524     svuint64_t secret_vec = svld1_u64(mask, xsecret + offset);       \
11525     svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec);     \
11526     svuint64_t swapped = svtbl_u64(input_vec, kSwap);                \
11527     svuint64_t mixed_lo = svextw_u64_x(mask, mixed);                 \
11528     svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32);            \
11529     svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
11530     acc = svadd_u64_x(mask, acc, mul);                               \
11531 } while (0)
11532 #endif /* XXH_VECTOR == XXH_SVE */
11533
11534 /* prefetch
11535  * can be disabled, by declaring XXH_NO_PREFETCH build macro */
11536 #if defined(XXH_NO_PREFETCH)
11537 #  define XXH_PREFETCH(ptr)  (void)(ptr)  /* disabled */
11538 #else
11539 #  if XXH_SIZE_OPT >= 1
11540 #    define XXH_PREFETCH(ptr) (void)(ptr)
11541 #  elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))  /* _mm_prefetch() not defined outside of x86/x64 */
11542 #    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
11543 #    define XXH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
11544 #  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
11545 #    define XXH_PREFETCH(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
11546 #  else
11547 #    define XXH_PREFETCH(ptr) (void)(ptr)  /* disabled */
11548 #  endif
11549 #endif  /* XXH_NO_PREFETCH */
11550
11551
11552 /* ==========================================
11553  * XXH3 default settings
11554  * ========================================== */
11555
11556 #define XXH_SECRET_DEFAULT_SIZE 192   /* minimum XXH3_SECRET_SIZE_MIN */
11557
11558 #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
11559 #  error "default keyset is not large enough"
11560 #endif
11561
11562 /*! Pseudorandom secret taken directly from FARSH. */
11563 XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
11564     0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
11565     0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
11566     0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
11567     0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
11568     0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
11569     0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
11570     0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
11571     0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
11572     0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
11573     0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
11574     0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
11575     0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
11576 };
11577
11578 static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL;  /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */
11579 static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL;  /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */
11580
11581 #ifdef XXH_OLD_NAMES
11582 #  define kSecret XXH3_kSecret
11583 #endif
11584
11585 #ifdef XXH_DOXYGEN
11586 /*!
11587  * @brief Calculates a 32-bit to 64-bit long multiply.
11588  *
11589  * Implemented as a macro.
11590  *
11591  * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't
11592  * need to (but it shouldn't need to anyways, it is about 7 instructions to do
11593  * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we
11594  * use that instead of the normal method.
11595  *
11596  * If you are compiling for platforms like Thumb-1 and don't have a better option,
11597  * you may also want to write your own long multiply routine here.
11598  *
11599  * @param x, y Numbers to be multiplied
11600  * @return 64-bit product of the low 32 bits of @p x and @p y.
11601  */
11602 XXH_FORCE_INLINE xxh_u64
11603 XXH_mult32to64(xxh_u64 x, xxh_u64 y)
11604 {
11605    return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
11606 }
11607 #elif defined(_MSC_VER) && defined(_M_IX86)
11608 #    define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
11609 #else
11610 /*
11611  * Downcast + upcast is usually better than masking on older compilers like
11612  * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
11613  *
11614  * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
11615  * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
11616  */
11617 #    define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
11618 #endif
11619
11620 /*!
11621  * @brief Calculates a 64->128-bit long multiply.
11622  *
11623  * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
11624  * version.
11625  *
11626  * @param lhs , rhs The 64-bit integers to be multiplied
11627  * @return The 128-bit result represented in an @ref XXH128_hash_t.
11628  */
11629 static XXH128_hash_t
11630 XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
11631 {
11632     /*
11633      * GCC/Clang __uint128_t method.
11634      *
11635      * On most 64-bit targets, GCC and Clang define a __uint128_t type.
11636      * This is usually the best way as it usually uses a native long 64-bit
11637      * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
11638      *
11639      * Usually.
11640      *
11641      * Despite being a 32-bit platform, Clang (and emscripten) define this type
11642      * despite not having the arithmetic for it. This results in a laggy
11643      * compiler builtin call which calculates a full 128-bit multiply.
11644      * In that case it is best to use the portable one.
11645      * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
11646      */
11647 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
11648     && defined(__SIZEOF_INT128__) \
11649     || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
11650
11651     __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
11652     XXH128_hash_t r128;
11653     r128.low64  = (xxh_u64)(product);
11654     r128.high64 = (xxh_u64)(product >> 64);
11655     return r128;
11656
11657     /*
11658      * MSVC for x64's _umul128 method.
11659      *
11660      * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
11661      *
11662      * This compiles to single operand MUL on x64.
11663      */
11664 #elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
11665
11666 #ifndef _MSC_VER
11667 #   pragma intrinsic(_umul128)
11668 #endif
11669     xxh_u64 product_high;
11670     xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
11671     XXH128_hash_t r128;
11672     r128.low64  = product_low;
11673     r128.high64 = product_high;
11674     return r128;
11675
11676     /*
11677      * MSVC for ARM64's __umulh method.
11678      *
11679      * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
11680      */
11681 #elif defined(_M_ARM64) || defined(_M_ARM64EC)
11682
11683 #ifndef _MSC_VER
11684 #   pragma intrinsic(__umulh)
11685 #endif
11686     XXH128_hash_t r128;
11687     r128.low64  = lhs * rhs;
11688     r128.high64 = __umulh(lhs, rhs);
11689     return r128;
11690
11691 #else
11692     /*
11693      * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
11694      *
11695      * This is a fast and simple grade school multiply, which is shown below
11696      * with base 10 arithmetic instead of base 0x100000000.
11697      *
11698      *           9 3 // D2 lhs = 93
11699      *         x 7 5 // D2 rhs = 75
11700      *     ----------
11701      *           1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
11702      *         4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
11703      *         2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
11704      *     + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
11705      *     ---------
11706      *         2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
11707      *     + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
11708      *     ---------
11709      *       6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
11710      *
11711      * The reasons for adding the products like this are:
11712      *  1. It avoids manual carry tracking. Just like how
11713      *     (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
11714      *     This avoids a lot of complexity.
11715      *
11716      *  2. It hints for, and on Clang, compiles to, the powerful UMAAL
11717      *     instruction available in ARM's Digital Signal Processing extension
11718      *     in 32-bit ARMv6 and later, which is shown below:
11719      *
11720      *         void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
11721      *         {
11722      *             xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
11723      *             *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
11724      *             *RdHi = (xxh_u32)(product >> 32);
11725      *         }
11726      *
11727      *     This instruction was designed for efficient long multiplication, and
11728      *     allows this to be calculated in only 4 instructions at speeds
11729      *     comparable to some 64-bit ALUs.
11730      *
11731      *  3. It isn't terrible on other platforms. Usually this will be a couple
11732      *     of 32-bit ADD/ADCs.
11733      */
11734
11735     /* First calculate all of the cross products. */
11736     xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
11737     xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32,        rhs & 0xFFFFFFFF);
11738     xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
11739     xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32,        rhs >> 32);
11740
11741     /* Now add the products together. These will never overflow. */
11742     xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
11743     xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32)        + hi_hi;
11744     xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
11745
11746     XXH128_hash_t r128;
11747     r128.low64  = lower;
11748     r128.high64 = upper;
11749     return r128;
11750 #endif
11751 }
11752
11753 /*!
11754  * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.
11755  *
11756  * The reason for the separate function is to prevent passing too many structs
11757  * around by value. This will hopefully inline the multiply, but we don't force it.
11758  *
11759  * @param lhs , rhs The 64-bit integers to multiply
11760  * @return The low 64 bits of the product XOR'd by the high 64 bits.
11761  * @see XXH_mult64to128()
11762  */
11763 static xxh_u64
11764 XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
11765 {
11766     XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
11767     return product.low64 ^ product.high64;
11768 }
11769
11770 /*! Seems to produce slightly better code on GCC for some reason. */
11771 XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
11772 {
11773     XXH_ASSERT(0 <= shift && shift < 64);
11774     return v64 ^ (v64 >> shift);
11775 }
11776
11777 /*
11778  * This is a fast avalanche stage,
11779  * suitable when input bits are already partially mixed
11780  */
11781 static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
11782 {
11783     h64 = XXH_xorshift64(h64, 37);
11784     h64 *= PRIME_MX1;
11785     h64 = XXH_xorshift64(h64, 32);
11786     return h64;
11787 }
11788
11789 /*
11790  * This is a stronger avalanche,
11791  * inspired by Pelle Evensen's rrmxmx
11792  * preferable when input has not been previously mixed
11793  */
11794 static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
11795 {
11796     /* this mix is inspired by Pelle Evensen's rrmxmx */
11797     h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
11798     h64 *= PRIME_MX2;
11799     h64 ^= (h64 >> 35) + len ;
11800     h64 *= PRIME_MX2;
11801     return XXH_xorshift64(h64, 28);
11802 }
11803
11804
11805 /* ==========================================
11806  * Short keys
11807  * ==========================================
11808  * One of the shortcomings of XXH32 and XXH64 was that their performance was
11809  * sub-optimal on short lengths. It used an iterative algorithm which strongly
11810  * favored lengths that were a multiple of 4 or 8.
11811  *
11812  * Instead of iterating over individual inputs, we use a set of single shot
11813  * functions which piece together a range of lengths and operate in constant time.
11814  *
11815  * Additionally, the number of multiplies has been significantly reduced. This
11816  * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
11817  *
11818  * Depending on the platform, this may or may not be faster than XXH32, but it
11819  * is almost guaranteed to be faster than XXH64.
11820  */
11821
11822 /*
11823  * At very short lengths, there isn't enough input to fully hide secrets, or use
11824  * the entire secret.
11825  *
11826  * There is also only a limited amount of mixing we can do before significantly
11827  * impacting performance.
11828  *
11829  * Therefore, we use different sections of the secret and always mix two secret
11830  * samples with an XOR. This should have no effect on performance on the
11831  * seedless or withSeed variants because everything _should_ be constant folded
11832  * by modern compilers.
11833  *
11834  * The XOR mixing hides individual parts of the secret and increases entropy.
11835  *
11836  * This adds an extra layer of strength for custom secrets.
11837  */
11838 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
11839 XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
11840 {
11841     XXH_ASSERT(input != NULL);
11842     XXH_ASSERT(1 <= len && len <= 3);
11843     XXH_ASSERT(secret != NULL);
11844     /*
11845      * len = 1: combined = { input[0], 0x01, input[0], input[0] }
11846      * len = 2: combined = { input[1], 0x02, input[0], input[1] }
11847      * len = 3: combined = { input[2], 0x03, input[0], input[1] }
11848      */
11849     {   xxh_u8  const c1 = input[0];
11850         xxh_u8  const c2 = input[len >> 1];
11851         xxh_u8  const c3 = input[len - 1];
11852         xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2  << 24)
11853                                | ((xxh_u32)c3 <<  0) | ((xxh_u32)len << 8);
11854         xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
11855         xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;
11856         return XXH64_avalanche(keyed);
11857     }
11858 }
11859
11860 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
11861 XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
11862 {
11863     XXH_ASSERT(input != NULL);
11864     XXH_ASSERT(secret != NULL);
11865     XXH_ASSERT(4 <= len && len <= 8);
11866     seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
11867     {   xxh_u32 const input1 = XXH_readLE32(input);
11868         xxh_u32 const input2 = XXH_readLE32(input + len - 4);
11869         xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
11870         xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);
11871         xxh_u64 const keyed = input64 ^ bitflip;
11872         return XXH3_rrmxmx(keyed, len);
11873     }
11874 }
11875
11876 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
11877 XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
11878 {
11879     XXH_ASSERT(input != NULL);
11880     XXH_ASSERT(secret != NULL);
11881     XXH_ASSERT(9 <= len && len <= 16);
11882     {   xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
11883         xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
11884         xxh_u64 const input_lo = XXH_readLE64(input)           ^ bitflip1;
11885         xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
11886         xxh_u64 const acc = len
11887                           + XXH_swap64(input_lo) + input_hi
11888                           + XXH3_mul128_fold64(input_lo, input_hi);
11889         return XXH3_avalanche(acc);
11890     }
11891 }
11892
11893 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
11894 XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
11895 {
11896     XXH_ASSERT(len <= 16);
11897     {   if (XXH_likely(len >  8)) return XXH3_len_9to16_64b(input, len, secret, seed);
11898         if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
11899         if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
11900         return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
11901     }
11902 }
11903
11904 /*
11905  * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
11906  * multiplication by zero, affecting hashes of lengths 17 to 240.
11907  *
11908  * However, they are very unlikely.
11909  *
11910  * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
11911  * unseeded non-cryptographic hashes, it does not attempt to defend itself
11912  * against specially crafted inputs, only random inputs.
11913  *
11914  * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
11915  * cancelling out the secret is taken an arbitrary number of times (addressed
11916  * in XXH3_accumulate_512), this collision is very unlikely with random inputs
11917  * and/or proper seeding:
11918  *
11919  * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
11920  * function that is only called up to 16 times per hash with up to 240 bytes of
11921  * input.
11922  *
11923  * This is not too bad for a non-cryptographic hash function, especially with
11924  * only 64 bit outputs.
11925  *
11926  * The 128-bit variant (which trades some speed for strength) is NOT affected
11927  * by this, although it is always a good idea to use a proper seed if you care
11928  * about strength.
11929  */
11930 XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
11931                                      const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
11932 {
11933 #if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
11934   && defined(__i386__) && defined(__SSE2__)  /* x86 + SSE2 */ \
11935   && !defined(XXH_ENABLE_AUTOVECTORIZE)      /* Define to disable like XXH32 hack */
11936     /*
11937      * UGLY HACK:
11938      * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
11939      * slower code.
11940      *
11941      * By forcing seed64 into a register, we disrupt the cost model and
11942      * cause it to scalarize. See `XXH32_round()`
11943      *
11944      * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
11945      * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
11946      * GCC 9.2, despite both emitting scalar code.
11947      *
11948      * GCC generates much better scalar code than Clang for the rest of XXH3,
11949      * which is why finding a more optimal codepath is an interest.
11950      */
11951     XXH_COMPILER_GUARD(seed64);
11952 #endif
11953     {   xxh_u64 const input_lo = XXH_readLE64(input);
11954         xxh_u64 const input_hi = XXH_readLE64(input+8);
11955         return XXH3_mul128_fold64(
11956             input_lo ^ (XXH_readLE64(secret)   + seed64),
11957             input_hi ^ (XXH_readLE64(secret+8) - seed64)
11958         );
11959     }
11960 }
11961
11962 /* For mid range keys, XXH3 uses a Mum-hash variant. */
11963 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
11964 XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
11965                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
11966                      XXH64_hash_t seed)
11967 {
11968     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
11969     XXH_ASSERT(16 < len && len <= 128);
11970
11971     {   xxh_u64 acc = len * XXH_PRIME64_1;
11972 #if XXH_SIZE_OPT >= 1
11973         /* Smaller and cleaner, but slightly slower. */
11974         unsigned int i = (unsigned int)(len - 1) / 32;
11975         do {
11976             acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
11977             acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
11978         } while (i-- != 0);
11979 #else
11980         if (len > 32) {
11981             if (len > 64) {
11982                 if (len > 96) {
11983                     acc += XXH3_mix16B(input+48, secret+96, seed);
11984                     acc += XXH3_mix16B(input+len-64, secret+112, seed);
11985                 }
11986                 acc += XXH3_mix16B(input+32, secret+64, seed);
11987                 acc += XXH3_mix16B(input+len-48, secret+80, seed);
11988             }
11989             acc += XXH3_mix16B(input+16, secret+32, seed);
11990             acc += XXH3_mix16B(input+len-32, secret+48, seed);
11991         }
11992         acc += XXH3_mix16B(input+0, secret+0, seed);
11993         acc += XXH3_mix16B(input+len-16, secret+16, seed);
11994 #endif
11995         return XXH3_avalanche(acc);
11996     }
11997 }
11998
11999 /*!
12000  * @brief Maximum size of "short" key in bytes.
12001  */
12002 #define XXH3_MIDSIZE_MAX 240
12003
12004 XXH_NO_INLINE XXH_PUREF XXH64_hash_t
12005 XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
12006                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
12007                       XXH64_hash_t seed)
12008 {
12009     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
12010     XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
12011
12012     #define XXH3_MIDSIZE_STARTOFFSET 3
12013     #define XXH3_MIDSIZE_LASTOFFSET  17
12014
12015     {   xxh_u64 acc = len * XXH_PRIME64_1;
12016         xxh_u64 acc_end;
12017         unsigned int const nbRounds = (unsigned int)len / 16;
12018         unsigned int i;
12019         XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
12020         for (i=0; i<8; i++) {
12021             acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
12022         }
12023         /* last bytes */
12024         acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
12025         XXH_ASSERT(nbRounds >= 8);
12026         acc = XXH3_avalanche(acc);
12027 #if defined(__clang__)                                /* Clang */ \
12028     && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
12029     && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
12030         /*
12031          * UGLY HACK:
12032          * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
12033          * In everywhere else, it uses scalar code.
12034          *
12035          * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
12036          * would still be slower than UMAAL (see XXH_mult64to128).
12037          *
12038          * Unfortunately, Clang doesn't handle the long multiplies properly and
12039          * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
12040          * scalarized into an ugly mess of VMOV.32 instructions.
12041          *
12042          * This mess is difficult to avoid without turning autovectorization
12043          * off completely, but they are usually relatively minor and/or not
12044          * worth it to fix.
12045          *
12046          * This loop is the easiest to fix, as unlike XXH32, this pragma
12047          * _actually works_ because it is a loop vectorization instead of an
12048          * SLP vectorization.
12049          */
12050         #pragma clang loop vectorize(disable)
12051 #endif
12052         for (i=8 ; i < nbRounds; i++) {
12053             /*
12054              * Prevents clang for unrolling the acc loop and interleaving with this one.
12055              */
12056             XXH_COMPILER_GUARD(acc);
12057             acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
12058         }
12059         return XXH3_avalanche(acc + acc_end);
12060     }
12061 }
12062
12063
12064 /* =======     Long Keys     ======= */
12065
12066 #define XXH_STRIPE_LEN 64
12067 #define XXH_SECRET_CONSUME_RATE 8   /* nb of secret bytes consumed at each accumulation */
12068 #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
12069
12070 #ifdef XXH_OLD_NAMES
12071 #  define STRIPE_LEN XXH_STRIPE_LEN
12072 #  define ACC_NB XXH_ACC_NB
12073 #endif
12074
12075 #ifndef XXH_PREFETCH_DIST
12076 #  ifdef __clang__
12077 #    define XXH_PREFETCH_DIST 320
12078 #  else
12079 #    if (XXH_VECTOR == XXH_AVX512)
12080 #      define XXH_PREFETCH_DIST 512
12081 #    else
12082 #      define XXH_PREFETCH_DIST 384
12083 #    endif
12084 #  endif  /* __clang__ */
12085 #endif  /* XXH_PREFETCH_DIST */
12086
12087 /*
12088  * These macros are to generate an XXH3_accumulate() function.
12089  * The two arguments select the name suffix and target attribute.
12090  *
12091  * The name of this symbol is XXH3_accumulate_<name>() and it calls
12092  * XXH3_accumulate_512_<name>().
12093  *
12094  * It may be useful to hand implement this function if the compiler fails to
12095  * optimize the inline function.
12096  */
12097 #define XXH3_ACCUMULATE_TEMPLATE(name)                      \
12098 void                                                        \
12099 XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc,           \
12100                        const xxh_u8* XXH_RESTRICT input,    \
12101                        const xxh_u8* XXH_RESTRICT secret,   \
12102                        size_t nbStripes)                    \
12103 {                                                           \
12104     size_t n;                                               \
12105     for (n = 0; n < nbStripes; n++ ) {                      \
12106         const xxh_u8* const in = input + n*XXH_STRIPE_LEN;  \
12107         XXH_PREFETCH(in + XXH_PREFETCH_DIST);               \
12108         XXH3_accumulate_512_##name(                         \
12109                  acc,                                       \
12110                  in,                                        \
12111                  secret + n*XXH_SECRET_CONSUME_RATE);       \
12112     }                                                       \
12113 }
12114
12115
12116 XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
12117 {
12118     if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
12119     XXH_memcpy(dst, &v64, sizeof(v64));
12120 }
12121
12122 /* Several intrinsic functions below are supposed to accept __int64 as argument,
12123  * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
12124  * However, several environments do not define __int64 type,
12125  * requiring a workaround.
12126  */
12127 #if !defined (__VMS) \
12128   && (defined (__cplusplus) \
12129   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
12130     typedef int64_t xxh_i64;
12131 #else
12132     /* the following type must have a width of 64-bit */
12133     typedef long long xxh_i64;
12134 #endif
12135
12136
12137 /*
12138  * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
12139  *
12140  * It is a hardened version of UMAC, based off of FARSH's implementation.
12141  *
12142  * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
12143  * implementations, and it is ridiculously fast.
12144  *
12145  * We harden it by mixing the original input to the accumulators as well as the product.
12146  *
12147  * This means that in the (relatively likely) case of a multiply by zero, the
12148  * original input is preserved.
12149  *
12150  * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
12151  * cross-pollination, as otherwise the upper and lower halves would be
12152  * essentially independent.
12153  *
12154  * This doesn't matter on 64-bit hashes since they all get merged together in
12155  * the end, so we skip the extra step.
12156  *
12157  * Both XXH3_64bits and XXH3_128bits use this subroutine.
12158  */
12159
12160 #if (XXH_VECTOR == XXH_AVX512) \
12161      || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
12162
12163 #ifndef XXH_TARGET_AVX512
12164 # define XXH_TARGET_AVX512  /* disable attribute target */
12165 #endif
12166
12167 XXH_FORCE_INLINE XXH_TARGET_AVX512 void
12168 XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
12169                      const void* XXH_RESTRICT input,
12170                      const void* XXH_RESTRICT secret)
12171 {
12172     __m512i* const xacc = (__m512i *) acc;
12173     XXH_ASSERT((((size_t)acc) & 63) == 0);
12174     XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
12175
12176     {
12177         /* data_vec    = input[0]; */
12178         __m512i const data_vec    = _mm512_loadu_si512   (input);
12179         /* key_vec     = secret[0]; */
12180         __m512i const key_vec     = _mm512_loadu_si512   (secret);
12181         /* data_key    = data_vec ^ key_vec; */
12182         __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
12183         /* data_key_lo = data_key >> 32; */
12184         __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
12185         /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
12186         __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);
12187         /* xacc[0] += swap(data_vec); */
12188         __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
12189         __m512i const sum       = _mm512_add_epi64(*xacc, data_swap);
12190         /* xacc[0] += product; */
12191         *xacc = _mm512_add_epi64(product, sum);
12192     }
12193 }
12194 XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
12195
12196 /*
12197  * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
12198  *
12199  * Multiplication isn't perfect, as explained by Google in HighwayHash:
12200  *
12201  *  // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
12202  *  // varying degrees. In descending order of goodness, bytes
12203  *  // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
12204  *  // As expected, the upper and lower bytes are much worse.
12205  *
12206  * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
12207  *
12208  * Since our algorithm uses a pseudorandom secret to add some variance into the
12209  * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
12210  *
12211  * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
12212  * extraction.
12213  *
12214  * Both XXH3_64bits and XXH3_128bits use this subroutine.
12215  */
12216
12217 XXH_FORCE_INLINE XXH_TARGET_AVX512 void
12218 XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
12219 {
12220     XXH_ASSERT((((size_t)acc) & 63) == 0);
12221     XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
12222     {   __m512i* const xacc = (__m512i*) acc;
12223         const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
12224
12225         /* xacc[0] ^= (xacc[0] >> 47) */
12226         __m512i const acc_vec     = *xacc;
12227         __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);
12228         /* xacc[0] ^= secret; */
12229         __m512i const key_vec     = _mm512_loadu_si512   (secret);
12230         __m512i const data_key    = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
12231
12232         /* xacc[0] *= XXH_PRIME32_1; */
12233         __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
12234         __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);
12235         __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);
12236         *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
12237     }
12238 }
12239
12240 XXH_FORCE_INLINE XXH_TARGET_AVX512 void
12241 XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
12242 {
12243     XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
12244     XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
12245     XXH_ASSERT(((size_t)customSecret & 63) == 0);
12246     (void)(&XXH_writeLE64);
12247     {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
12248         __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
12249         __m512i const seed     = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
12250
12251         const __m512i* const src  = (const __m512i*) ((const void*) XXH3_kSecret);
12252               __m512i* const dest = (      __m512i*) customSecret;
12253         int i;
12254         XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
12255         XXH_ASSERT(((size_t)dest & 63) == 0);
12256         for (i=0; i < nbRounds; ++i) {
12257             dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
12258     }   }
12259 }
12260
12261 #endif
12262
12263 #if (XXH_VECTOR == XXH_AVX2) \
12264     || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
12265
12266 #ifndef XXH_TARGET_AVX2
12267 # define XXH_TARGET_AVX2  /* disable attribute target */
12268 #endif
12269
12270 XXH_FORCE_INLINE XXH_TARGET_AVX2 void
12271 XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
12272                     const void* XXH_RESTRICT input,
12273                     const void* XXH_RESTRICT secret)
12274 {
12275     XXH_ASSERT((((size_t)acc) & 31) == 0);
12276     {   __m256i* const xacc    =       (__m256i *) acc;
12277         /* Unaligned. This is mainly for pointer arithmetic, and because
12278          * _mm256_loadu_si256 requires  a const __m256i * pointer for some reason. */
12279         const         __m256i* const xinput  = (const __m256i *) input;
12280         /* Unaligned. This is mainly for pointer arithmetic, and because
12281          * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
12282         const         __m256i* const xsecret = (const __m256i *) secret;
12283
12284         size_t i;
12285         for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
12286             /* data_vec    = xinput[i]; */
12287             __m256i const data_vec    = _mm256_loadu_si256    (xinput+i);
12288             /* key_vec     = xsecret[i]; */
12289             __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
12290             /* data_key    = data_vec ^ key_vec; */
12291             __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
12292             /* data_key_lo = data_key >> 32; */
12293             __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
12294             /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
12295             __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);
12296             /* xacc[i] += swap(data_vec); */
12297             __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
12298             __m256i const sum       = _mm256_add_epi64(xacc[i], data_swap);
12299             /* xacc[i] += product; */
12300             xacc[i] = _mm256_add_epi64(product, sum);
12301     }   }
12302 }
12303 XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
12304
12305 XXH_FORCE_INLINE XXH_TARGET_AVX2 void
12306 XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
12307 {
12308     XXH_ASSERT((((size_t)acc) & 31) == 0);
12309     {   __m256i* const xacc = (__m256i*) acc;
12310         /* Unaligned. This is mainly for pointer arithmetic, and because
12311          * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
12312         const         __m256i* const xsecret = (const __m256i *) secret;
12313         const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
12314
12315         size_t i;
12316         for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
12317             /* xacc[i] ^= (xacc[i] >> 47) */
12318             __m256i const acc_vec     = xacc[i];
12319             __m256i const shifted     = _mm256_srli_epi64    (acc_vec, 47);
12320             __m256i const data_vec    = _mm256_xor_si256     (acc_vec, shifted);
12321             /* xacc[i] ^= xsecret; */
12322             __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
12323             __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
12324
12325             /* xacc[i] *= XXH_PRIME32_1; */
12326             __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
12327             __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
12328             __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
12329             xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
12330         }
12331     }
12332 }
12333
12334 XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
12335 {
12336     XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
12337     XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
12338     XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
12339     (void)(&XXH_writeLE64);
12340     XXH_PREFETCH(customSecret);
12341     {   __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
12342
12343         const __m256i* const src  = (const __m256i*) ((const void*) XXH3_kSecret);
12344               __m256i*       dest = (      __m256i*) customSecret;
12345
12346 #       if defined(__GNUC__) || defined(__clang__)
12347         /*
12348          * On GCC & Clang, marking 'dest' as modified will cause the compiler:
12349          *   - do not extract the secret from sse registers in the internal loop
12350          *   - use less common registers, and avoid pushing these reg into stack
12351          */
12352         XXH_COMPILER_GUARD(dest);
12353 #       endif
12354         XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */
12355         XXH_ASSERT(((size_t)dest & 31) == 0);
12356
12357         /* GCC -O2 need unroll loop manually */
12358         dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
12359         dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
12360         dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
12361         dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
12362         dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
12363         dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
12364     }
12365 }
12366
12367 #endif
12368
12369 /* x86dispatch always generates SSE2 */
12370 #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
12371
12372 #ifndef XXH_TARGET_SSE2
12373 # define XXH_TARGET_SSE2  /* disable attribute target */
12374 #endif
12375
12376 XXH_FORCE_INLINE XXH_TARGET_SSE2 void
12377 XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
12378                     const void* XXH_RESTRICT input,
12379                     const void* XXH_RESTRICT secret)
12380 {
12381     /* SSE2 is just a half-scale version of the AVX2 version. */
12382     XXH_ASSERT((((size_t)acc) & 15) == 0);
12383     {   __m128i* const xacc    =       (__m128i *) acc;
12384         /* Unaligned. This is mainly for pointer arithmetic, and because
12385          * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
12386         const         __m128i* const xinput  = (const __m128i *) input;
12387         /* Unaligned. This is mainly for pointer arithmetic, and because
12388          * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
12389         const         __m128i* const xsecret = (const __m128i *) secret;
12390
12391         size_t i;
12392         for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
12393             /* data_vec    = xinput[i]; */
12394             __m128i const data_vec    = _mm_loadu_si128   (xinput+i);
12395             /* key_vec     = xsecret[i]; */
12396             __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
12397             /* data_key    = data_vec ^ key_vec; */
12398             __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
12399             /* data_key_lo = data_key >> 32; */
12400             __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
12401             /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
12402             __m128i const product     = _mm_mul_epu32     (data_key, data_key_lo);
12403             /* xacc[i] += swap(data_vec); */
12404             __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
12405             __m128i const sum       = _mm_add_epi64(xacc[i], data_swap);
12406             /* xacc[i] += product; */
12407             xacc[i] = _mm_add_epi64(product, sum);
12408     }   }
12409 }
12410 XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
12411
12412 XXH_FORCE_INLINE XXH_TARGET_SSE2 void
12413 XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
12414 {
12415     XXH_ASSERT((((size_t)acc) & 15) == 0);
12416     {   __m128i* const xacc = (__m128i*) acc;
12417         /* Unaligned. This is mainly for pointer arithmetic, and because
12418          * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
12419         const         __m128i* const xsecret = (const __m128i *) secret;
12420         const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
12421
12422         size_t i;
12423         for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
12424             /* xacc[i] ^= (xacc[i] >> 47) */
12425             __m128i const acc_vec     = xacc[i];
12426             __m128i const shifted     = _mm_srli_epi64    (acc_vec, 47);
12427             __m128i const data_vec    = _mm_xor_si128     (acc_vec, shifted);
12428             /* xacc[i] ^= xsecret[i]; */
12429             __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
12430             __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
12431
12432             /* xacc[i] *= XXH_PRIME32_1; */
12433             __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
12434             __m128i const prod_lo     = _mm_mul_epu32     (data_key, prime32);
12435             __m128i const prod_hi     = _mm_mul_epu32     (data_key_hi, prime32);
12436             xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
12437         }
12438     }
12439 }
12440
12441 XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
12442 {
12443     XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
12444     (void)(&XXH_writeLE64);
12445     {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
12446
12447 #       if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
12448         /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
12449         XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
12450         __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
12451 #       else
12452         __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
12453 #       endif
12454         int i;
12455
12456         const void* const src16 = XXH3_kSecret;
12457         __m128i* dst16 = (__m128i*) customSecret;
12458 #       if defined(__GNUC__) || defined(__clang__)
12459         /*
12460          * On GCC & Clang, marking 'dest' as modified will cause the compiler:
12461          *   - do not extract the secret from sse registers in the internal loop
12462          *   - use less common registers, and avoid pushing these reg into stack
12463          */
12464         XXH_COMPILER_GUARD(dst16);
12465 #       endif
12466         XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */
12467         XXH_ASSERT(((size_t)dst16 & 15) == 0);
12468
12469         for (i=0; i < nbRounds; ++i) {
12470             dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);
12471     }   }
12472 }
12473
12474 #endif
12475
12476 #if (XXH_VECTOR == XXH_NEON)
12477
12478 /* forward declarations for the scalar routines */
12479 XXH_FORCE_INLINE void
12480 XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
12481                  void const* XXH_RESTRICT secret, size_t lane);
12482
12483 XXH_FORCE_INLINE void
12484 XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
12485                          void const* XXH_RESTRICT secret, size_t lane);
12486
12487 /*!
12488  * @internal
12489  * @brief The bulk processing loop for NEON and WASM SIMD128.
12490  *
12491  * The NEON code path is actually partially scalar when running on AArch64. This
12492  * is to optimize the pipelining and can have up to 15% speedup depending on the
12493  * CPU, and it also mitigates some GCC codegen issues.
12494  *
12495  * @see XXH3_NEON_LANES for configuring this and details about this optimization.
12496  *
12497  * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit
12498  * integers instead of the other platforms which mask full 64-bit vectors,
12499  * so the setup is more complicated than just shifting right.
12500  *
12501  * Additionally, there is an optimization for 4 lanes at once noted below.
12502  *
12503  * Since, as stated, the most optimal amount of lanes for Cortexes is 6,
12504  * there needs to be *three* versions of the accumulate operation used
12505  * for the remaining 2 lanes.
12506  *
12507  * WASM's SIMD128 uses SIMDe's arm_neon.h polyfill because the intrinsics overlap
12508  * nearly perfectly.
12509  */
12510
12511 XXH_FORCE_INLINE void
12512 XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
12513                     const void* XXH_RESTRICT input,
12514                     const void* XXH_RESTRICT secret)
12515 {
12516     XXH_ASSERT((((size_t)acc) & 15) == 0);
12517     XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
12518     {   /* GCC for darwin arm64 does not like aliasing here */
12519         xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;
12520         /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
12521         uint8_t const* xinput = (const uint8_t *) input;
12522         uint8_t const* xsecret  = (const uint8_t *) secret;
12523
12524         size_t i;
12525 #ifdef __wasm_simd128__
12526         /*
12527          * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret
12528          * is constant propagated, which results in it converting it to this
12529          * inside the loop:
12530          *
12531          *    a = v128.load(XXH3_kSecret +  0 + $secret_offset, offset = 0)
12532          *    b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0)
12533          *    ...
12534          *
12535          * This requires a full 32-bit address immediate (and therefore a 6 byte
12536          * instruction) as well as an add for each offset.
12537          *
12538          * Putting an asm guard prevents it from folding (at the cost of losing
12539          * the alignment hint), and uses the free offset in `v128.load` instead
12540          * of adding secret_offset each time which overall reduces code size by
12541          * about a kilobyte and improves performance.
12542          */
12543         XXH_COMPILER_GUARD(xsecret);
12544 #endif
12545         /* Scalar lanes use the normal scalarRound routine */
12546         for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
12547             XXH3_scalarRound(acc, input, secret, i);
12548         }
12549         i = 0;
12550         /* 4 NEON lanes at a time. */
12551         for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
12552             /* data_vec = xinput[i]; */
12553             uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput  + (i * 16));
12554             uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput  + ((i+1) * 16));
12555             /* key_vec  = xsecret[i];  */
12556             uint64x2_t key_vec_1  = XXH_vld1q_u64(xsecret + (i * 16));
12557             uint64x2_t key_vec_2  = XXH_vld1q_u64(xsecret + ((i+1) * 16));
12558             /* data_swap = swap(data_vec) */
12559             uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
12560             uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
12561             /* data_key = data_vec ^ key_vec; */
12562             uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
12563             uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
12564
12565             /*
12566              * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
12567              * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
12568              * get one vector with the low 32 bits of each lane, and one vector
12569              * with the high 32 bits of each lane.
12570              *
12571              * The intrinsic returns a double vector because the original ARMv7-a
12572              * instruction modified both arguments in place. AArch64 and SIMD128 emit
12573              * two instructions from this intrinsic.
12574              *
12575              *  [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
12576              *  [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
12577              */
12578             uint32x4x2_t unzipped = vuzpq_u32(
12579                 vreinterpretq_u32_u64(data_key_1),
12580                 vreinterpretq_u32_u64(data_key_2)
12581             );
12582             /* data_key_lo = data_key & 0xFFFFFFFF */
12583             uint32x4_t data_key_lo = unzipped.val[0];
12584             /* data_key_hi = data_key >> 32 */
12585             uint32x4_t data_key_hi = unzipped.val[1];
12586             /*
12587              * Then, we can split the vectors horizontally and multiply which, as for most
12588              * widening intrinsics, have a variant that works on both high half vectors
12589              * for free on AArch64. A similar instruction is available on SIMD128.
12590              *
12591              * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
12592              */
12593             uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
12594             uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
12595             /*
12596              * Clang reorders
12597              *    a += b * c;     // umlal   swap.2d, dkl.2s, dkh.2s
12598              *    c += a;         // add     acc.2d, acc.2d, swap.2d
12599              * to
12600              *    c += a;         // add     acc.2d, acc.2d, swap.2d
12601              *    c += b * c;     // umlal   acc.2d, dkl.2s, dkh.2s
12602              *
12603              * While it would make sense in theory since the addition is faster,
12604              * for reasons likely related to umlal being limited to certain NEON
12605              * pipelines, this is worse. A compiler guard fixes this.
12606              */
12607             XXH_COMPILER_GUARD_CLANG_NEON(sum_1);
12608             XXH_COMPILER_GUARD_CLANG_NEON(sum_2);
12609             /* xacc[i] = acc_vec + sum; */
12610             xacc[i]   = vaddq_u64(xacc[i], sum_1);
12611             xacc[i+1] = vaddq_u64(xacc[i+1], sum_2);
12612         }
12613         /* Operate on the remaining NEON lanes 2 at a time. */
12614         for (; i < XXH3_NEON_LANES / 2; i++) {
12615             /* data_vec = xinput[i]; */
12616             uint64x2_t data_vec = XXH_vld1q_u64(xinput  + (i * 16));
12617             /* key_vec  = xsecret[i];  */
12618             uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
12619             /* acc_vec_2 = swap(data_vec) */
12620             uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
12621             /* data_key = data_vec ^ key_vec; */
12622             uint64x2_t data_key = veorq_u64(data_vec, key_vec);
12623             /* For two lanes, just use VMOVN and VSHRN. */
12624             /* data_key_lo = data_key & 0xFFFFFFFF; */
12625             uint32x2_t data_key_lo = vmovn_u64(data_key);
12626             /* data_key_hi = data_key >> 32; */
12627             uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
12628             /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */
12629             uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
12630             /* Same Clang workaround as before */
12631             XXH_COMPILER_GUARD_CLANG_NEON(sum);
12632             /* xacc[i] = acc_vec + sum; */
12633             xacc[i] = vaddq_u64 (xacc[i], sum);
12634         }
12635     }
12636 }
12637 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
12638
12639 XXH_FORCE_INLINE void
12640 XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
12641 {
12642     XXH_ASSERT((((size_t)acc) & 15) == 0);
12643
12644     {   xxh_aliasing_uint64x2_t* xacc       = (xxh_aliasing_uint64x2_t*) acc;
12645         uint8_t const* xsecret = (uint8_t const*) secret;
12646
12647         size_t i;
12648         /* WASM uses operator overloads and doesn't need these. */
12649 #ifndef __wasm_simd128__
12650         /* { prime32_1, prime32_1 } */
12651         uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1);
12652         /* { 0, prime32_1, 0, prime32_1 } */
12653         uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32));
12654 #endif
12655
12656         /* AArch64 uses both scalar and neon at the same time */
12657         for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
12658             XXH3_scalarScrambleRound(acc, secret, i);
12659         }
12660         for (i=0; i < XXH3_NEON_LANES / 2; i++) {
12661             /* xacc[i] ^= (xacc[i] >> 47); */
12662             uint64x2_t acc_vec  = xacc[i];
12663             uint64x2_t shifted  = vshrq_n_u64(acc_vec, 47);
12664             uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
12665
12666             /* xacc[i] ^= xsecret[i]; */
12667             uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
12668             uint64x2_t data_key = veorq_u64(data_vec, key_vec);
12669             /* xacc[i] *= XXH_PRIME32_1 */
12670 #ifdef __wasm_simd128__
12671             /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */
12672             xacc[i] = data_key * XXH_PRIME32_1;
12673 #else
12674             /*
12675              * Expanded version with portable NEON intrinsics
12676              *
12677              *    lo(x) * lo(y) + (hi(x) * lo(y) << 32)
12678              *
12679              * prod_hi = hi(data_key) * lo(prime) << 32
12680              *
12681              * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector
12682              * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits
12683              * and avoid the shift.
12684              */
12685             uint32x4_t prod_hi = vmulq_u32 (vreinterpretq_u32_u64(data_key), kPrimeHi);
12686             /* Extract low bits for vmlal_u32  */
12687             uint32x2_t data_key_lo = vmovn_u64(data_key);
12688             /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */
12689             xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo);
12690 #endif
12691         }
12692     }
12693 }
12694 #endif
12695
12696 #if (XXH_VECTOR == XXH_VSX)
12697
12698 XXH_FORCE_INLINE void
12699 XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
12700                     const void* XXH_RESTRICT input,
12701                     const void* XXH_RESTRICT secret)
12702 {
12703     /* presumed aligned */
12704     xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
12705     xxh_u8 const* const xinput   = (xxh_u8 const*) input;   /* no alignment restriction */
12706     xxh_u8 const* const xsecret  = (xxh_u8 const*) secret;    /* no alignment restriction */
12707     xxh_u64x2 const v32 = { 32, 32 };
12708     size_t i;
12709     for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
12710         /* data_vec = xinput[i]; */
12711         xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i);
12712         /* key_vec = xsecret[i]; */
12713         xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16*i);
12714         xxh_u64x2 const data_key = data_vec ^ key_vec;
12715         /* shuffled = (data_key << 32) | (data_key >> 32); */
12716         xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
12717         /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
12718         xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
12719         /* acc_vec = xacc[i]; */
12720         xxh_u64x2 acc_vec        = xacc[i];
12721         acc_vec += product;
12722
12723         /* swap high and low halves */
12724 #ifdef __s390x__
12725         acc_vec += vec_permi(data_vec, data_vec, 2);
12726 #else
12727         acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
12728 #endif
12729         xacc[i] = acc_vec;
12730     }
12731 }
12732 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
12733
12734 XXH_FORCE_INLINE void
12735 XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
12736 {
12737     XXH_ASSERT((((size_t)acc) & 15) == 0);
12738
12739     {   xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
12740         const xxh_u8* const xsecret = (const xxh_u8*) secret;
12741         /* constants */
12742         xxh_u64x2 const v32  = { 32, 32 };
12743         xxh_u64x2 const v47 = { 47, 47 };
12744         xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
12745         size_t i;
12746         for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
12747             /* xacc[i] ^= (xacc[i] >> 47); */
12748             xxh_u64x2 const acc_vec  = xacc[i];
12749             xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
12750
12751             /* xacc[i] ^= xsecret[i]; */
12752             xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16*i);
12753             xxh_u64x2 const data_key = data_vec ^ key_vec;
12754
12755             /* xacc[i] *= XXH_PRIME32_1 */
12756             /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF);  */
12757             xxh_u64x2 const prod_even  = XXH_vec_mule((xxh_u32x4)data_key, prime);
12758             /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32);  */
12759             xxh_u64x2 const prod_odd  = XXH_vec_mulo((xxh_u32x4)data_key, prime);
12760             xacc[i] = prod_odd + (prod_even << v32);
12761     }   }
12762 }
12763
12764 #endif
12765
12766 #if (XXH_VECTOR == XXH_SVE)
12767
12768 XXH_FORCE_INLINE void
12769 XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
12770                    const void* XXH_RESTRICT input,
12771                    const void* XXH_RESTRICT secret)
12772 {
12773     uint64_t *xacc = (uint64_t *)acc;
12774     const uint64_t *xinput = (const uint64_t *)(const void *)input;
12775     const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
12776     svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
12777     uint64_t element_count = svcntd();
12778     if (element_count >= 8) {
12779         svbool_t mask = svptrue_pat_b64(SV_VL8);
12780         svuint64_t vacc = svld1_u64(mask, xacc);
12781         ACCRND(vacc, 0);
12782         svst1_u64(mask, xacc, vacc);
12783     } else if (element_count == 2) {   /* sve128 */
12784         svbool_t mask = svptrue_pat_b64(SV_VL2);
12785         svuint64_t acc0 = svld1_u64(mask, xacc + 0);
12786         svuint64_t acc1 = svld1_u64(mask, xacc + 2);
12787         svuint64_t acc2 = svld1_u64(mask, xacc + 4);
12788         svuint64_t acc3 = svld1_u64(mask, xacc + 6);
12789         ACCRND(acc0, 0);
12790         ACCRND(acc1, 2);
12791         ACCRND(acc2, 4);
12792         ACCRND(acc3, 6);
12793         svst1_u64(mask, xacc + 0, acc0);
12794         svst1_u64(mask, xacc + 2, acc1);
12795         svst1_u64(mask, xacc + 4, acc2);
12796         svst1_u64(mask, xacc + 6, acc3);
12797     } else {
12798         svbool_t mask = svptrue_pat_b64(SV_VL4);
12799         svuint64_t acc0 = svld1_u64(mask, xacc + 0);
12800         svuint64_t acc1 = svld1_u64(mask, xacc + 4);
12801         ACCRND(acc0, 0);
12802         ACCRND(acc1, 4);
12803         svst1_u64(mask, xacc + 0, acc0);
12804         svst1_u64(mask, xacc + 4, acc1);
12805     }
12806 }
12807
12808 XXH_FORCE_INLINE void
12809 XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
12810                const xxh_u8* XXH_RESTRICT input,
12811                const xxh_u8* XXH_RESTRICT secret,
12812                size_t nbStripes)
12813 {
12814     if (nbStripes != 0) {
12815         uint64_t *xacc = (uint64_t *)acc;
12816         const uint64_t *xinput = (const uint64_t *)(const void *)input;
12817         const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
12818         svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
12819         uint64_t element_count = svcntd();
12820         if (element_count >= 8) {
12821             svbool_t mask = svptrue_pat_b64(SV_VL8);
12822             svuint64_t vacc = svld1_u64(mask, xacc + 0);
12823             do {
12824                 /* svprfd(svbool_t, void *, enum svfprop); */
12825                 svprfd(mask, xinput + 128, SV_PLDL1STRM);
12826                 ACCRND(vacc, 0);
12827                 xinput += 8;
12828                 xsecret += 1;
12829                 nbStripes--;
12830            } while (nbStripes != 0);
12831
12832            svst1_u64(mask, xacc + 0, vacc);
12833         } else if (element_count == 2) { /* sve128 */
12834             svbool_t mask = svptrue_pat_b64(SV_VL2);
12835             svuint64_t acc0 = svld1_u64(mask, xacc + 0);
12836             svuint64_t acc1 = svld1_u64(mask, xacc + 2);
12837             svuint64_t acc2 = svld1_u64(mask, xacc + 4);
12838             svuint64_t acc3 = svld1_u64(mask, xacc + 6);
12839             do {
12840                 svprfd(mask, xinput + 128, SV_PLDL1STRM);
12841                 ACCRND(acc0, 0);
12842                 ACCRND(acc1, 2);
12843                 ACCRND(acc2, 4);
12844                 ACCRND(acc3, 6);
12845                 xinput += 8;
12846                 xsecret += 1;
12847                 nbStripes--;
12848            } while (nbStripes != 0);
12849
12850            svst1_u64(mask, xacc + 0, acc0);
12851            svst1_u64(mask, xacc + 2, acc1);
12852            svst1_u64(mask, xacc + 4, acc2);
12853            svst1_u64(mask, xacc + 6, acc3);
12854         } else {
12855             svbool_t mask = svptrue_pat_b64(SV_VL4);
12856             svuint64_t acc0 = svld1_u64(mask, xacc + 0);
12857             svuint64_t acc1 = svld1_u64(mask, xacc + 4);
12858             do {
12859                 svprfd(mask, xinput + 128, SV_PLDL1STRM);
12860                 ACCRND(acc0, 0);
12861                 ACCRND(acc1, 4);
12862                 xinput += 8;
12863                 xsecret += 1;
12864                 nbStripes--;
12865            } while (nbStripes != 0);
12866
12867            svst1_u64(mask, xacc + 0, acc0);
12868            svst1_u64(mask, xacc + 4, acc1);
12869        }
12870     }
12871 }
12872
12873 #endif
12874
12875 /* scalar variants - universal */
12876
12877 #if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
12878 /*
12879  * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they
12880  * emit an excess mask and a full 64-bit multiply-add (MADD X-form).
12881  *
12882  * While this might not seem like much, as AArch64 is a 64-bit architecture, only
12883  * big Cortex designs have a full 64-bit multiplier.
12884  *
12885  * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit
12886  * multiplies expand to 2-3 multiplies in microcode. This has a major penalty
12887  * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.
12888  *
12889  * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does
12890  * not have this penalty and does the mask automatically.
12891  */
12892 XXH_FORCE_INLINE xxh_u64
12893 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
12894 {
12895     xxh_u64 ret;
12896     /* note: %x = 64-bit register, %w = 32-bit register */
12897     __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc));
12898     return ret;
12899 }
12900 #else
12901 XXH_FORCE_INLINE xxh_u64
12902 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
12903 {
12904     return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc;
12905 }
12906 #endif
12907
12908 /*!
12909  * @internal
12910  * @brief Scalar round for @ref XXH3_accumulate_512_scalar().
12911  *
12912  * This is extracted to its own function because the NEON path uses a combination
12913  * of NEON and scalar.
12914  */
12915 XXH_FORCE_INLINE void
12916 XXH3_scalarRound(void* XXH_RESTRICT acc,
12917                  void const* XXH_RESTRICT input,
12918                  void const* XXH_RESTRICT secret,
12919                  size_t lane)
12920 {
12921     xxh_u64* xacc = (xxh_u64*) acc;
12922     xxh_u8 const* xinput  = (xxh_u8 const*) input;
12923     xxh_u8 const* xsecret = (xxh_u8 const*) secret;
12924     XXH_ASSERT(lane < XXH_ACC_NB);
12925     XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
12926     {
12927         xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
12928         xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
12929         xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
12930         xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);
12931     }
12932 }
12933
12934 /*!
12935  * @internal
12936  * @brief Processes a 64 byte block of data using the scalar path.
12937  */
12938 XXH_FORCE_INLINE void
12939 XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
12940                      const void* XXH_RESTRICT input,
12941                      const void* XXH_RESTRICT secret)
12942 {
12943     size_t i;
12944     /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
12945 #if defined(__GNUC__) && !defined(__clang__) \
12946   && (defined(__arm__) || defined(__thumb2__)) \
12947   && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
12948   && XXH_SIZE_OPT <= 0
12949 #  pragma GCC unroll 8
12950 #endif
12951     for (i=0; i < XXH_ACC_NB; i++) {
12952         XXH3_scalarRound(acc, input, secret, i);
12953     }
12954 }
12955 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
12956
12957 /*!
12958  * @internal
12959  * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().
12960  *
12961  * This is extracted to its own function because the NEON path uses a combination
12962  * of NEON and scalar.
12963  */
12964 XXH_FORCE_INLINE void
12965 XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
12966                          void const* XXH_RESTRICT secret,
12967                          size_t lane)
12968 {
12969     xxh_u64* const xacc = (xxh_u64*) acc;   /* presumed aligned */
12970     const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
12971     XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
12972     XXH_ASSERT(lane < XXH_ACC_NB);
12973     {
12974         xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
12975         xxh_u64 acc64 = xacc[lane];
12976         acc64 = XXH_xorshift64(acc64, 47);
12977         acc64 ^= key64;
12978         acc64 *= XXH_PRIME32_1;
12979         xacc[lane] = acc64;
12980     }
12981 }
12982
12983 /*!
12984  * @internal
12985  * @brief Scrambles the accumulators after a large chunk has been read
12986  */
12987 XXH_FORCE_INLINE void
12988 XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
12989 {
12990     size_t i;
12991     for (i=0; i < XXH_ACC_NB; i++) {
12992         XXH3_scalarScrambleRound(acc, secret, i);
12993     }
12994 }
12995
12996 XXH_FORCE_INLINE void
12997 XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
12998 {
12999     /*
13000      * We need a separate pointer for the hack below,
13001      * which requires a non-const pointer.
13002      * Any decent compiler will optimize this out otherwise.
13003      */
13004     const xxh_u8* kSecretPtr = XXH3_kSecret;
13005     XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
13006
13007 #if defined(__GNUC__) && defined(__aarch64__)
13008     /*
13009      * UGLY HACK:
13010      * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are
13011      * placed sequentially, in order, at the top of the unrolled loop.
13012      *
13013      * While MOVK is great for generating constants (2 cycles for a 64-bit
13014      * constant compared to 4 cycles for LDR), it fights for bandwidth with
13015      * the arithmetic instructions.
13016      *
13017      *   I   L   S
13018      * MOVK
13019      * MOVK
13020      * MOVK
13021      * MOVK
13022      * ADD
13023      * SUB      STR
13024      *          STR
13025      * By forcing loads from memory (as the asm line causes the compiler to assume
13026      * that XXH3_kSecretPtr has been changed), the pipelines are used more
13027      * efficiently:
13028      *   I   L   S
13029      *      LDR
13030      *  ADD LDR
13031      *  SUB     STR
13032      *          STR
13033      *
13034      * See XXH3_NEON_LANES for details on the pipsline.
13035      *
13036      * XXH3_64bits_withSeed, len == 256, Snapdragon 835
13037      *   without hack: 2654.4 MB/s
13038      *   with hack:    3202.9 MB/s
13039      */
13040     XXH_COMPILER_GUARD(kSecretPtr);
13041 #endif
13042     {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
13043         int i;
13044         for (i=0; i < nbRounds; i++) {
13045             /*
13046              * The asm hack causes the compiler to assume that kSecretPtr aliases with
13047              * customSecret, and on aarch64, this prevented LDP from merging two
13048              * loads together for free. Putting the loads together before the stores
13049              * properly generates LDP.
13050              */
13051             xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i)     + seed64;
13052             xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;
13053             XXH_writeLE64((xxh_u8*)customSecret + 16*i,     lo);
13054             XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);
13055     }   }
13056 }
13057
13058
13059 typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
13060 typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
13061 typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
13062
13063
13064 #if (XXH_VECTOR == XXH_AVX512)
13065
13066 #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
13067 #define XXH3_accumulate     XXH3_accumulate_avx512
13068 #define XXH3_scrambleAcc    XXH3_scrambleAcc_avx512
13069 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
13070
13071 #elif (XXH_VECTOR == XXH_AVX2)
13072
13073 #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
13074 #define XXH3_accumulate     XXH3_accumulate_avx2
13075 #define XXH3_scrambleAcc    XXH3_scrambleAcc_avx2
13076 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
13077
13078 #elif (XXH_VECTOR == XXH_SSE2)
13079
13080 #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
13081 #define XXH3_accumulate     XXH3_accumulate_sse2
13082 #define XXH3_scrambleAcc    XXH3_scrambleAcc_sse2
13083 #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
13084
13085 #elif (XXH_VECTOR == XXH_NEON)
13086
13087 #define XXH3_accumulate_512 XXH3_accumulate_512_neon
13088 #define XXH3_accumulate     XXH3_accumulate_neon
13089 #define XXH3_scrambleAcc    XXH3_scrambleAcc_neon
13090 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
13091
13092 #elif (XXH_VECTOR == XXH_VSX)
13093
13094 #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
13095 #define XXH3_accumulate     XXH3_accumulate_vsx
13096 #define XXH3_scrambleAcc    XXH3_scrambleAcc_vsx
13097 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
13098
13099 #elif (XXH_VECTOR == XXH_SVE)
13100 #define XXH3_accumulate_512 XXH3_accumulate_512_sve
13101 #define XXH3_accumulate     XXH3_accumulate_sve
13102 #define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
13103 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
13104
13105 #else /* scalar */
13106
13107 #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
13108 #define XXH3_accumulate     XXH3_accumulate_scalar
13109 #define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
13110 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
13111
13112 #endif
13113
13114 #if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
13115 #  undef XXH3_initCustomSecret
13116 #  define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
13117 #endif
13118
13119 XXH_FORCE_INLINE void
13120 XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
13121                       const xxh_u8* XXH_RESTRICT input, size_t len,
13122                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
13123                             XXH3_f_accumulate f_acc,
13124                             XXH3_f_scrambleAcc f_scramble)
13125 {
13126     size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
13127     size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
13128     size_t const nb_blocks = (len - 1) / block_len;
13129
13130     size_t n;
13131
13132     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
13133
13134     for (n = 0; n < nb_blocks; n++) {
13135         f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
13136         f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
13137     }
13138
13139     /* last partial block */
13140     XXH_ASSERT(len > XXH_STRIPE_LEN);
13141     {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
13142         XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
13143         f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
13144
13145         /* last stripe */
13146         {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
13147 #define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */
13148             XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
13149     }   }
13150 }
13151
13152 XXH_FORCE_INLINE xxh_u64
13153 XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
13154 {
13155     return XXH3_mul128_fold64(
13156                acc[0] ^ XXH_readLE64(secret),
13157                acc[1] ^ XXH_readLE64(secret+8) );
13158 }
13159
13160 static XXH64_hash_t
13161 XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
13162 {
13163     xxh_u64 result64 = start;
13164     size_t i = 0;
13165
13166     for (i = 0; i < 4; i++) {
13167         result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
13168 #if defined(__clang__)                                /* Clang */ \
13169     && (defined(__arm__) || defined(__thumb__))       /* ARMv7 */ \
13170     && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */  \
13171     && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
13172         /*
13173          * UGLY HACK:
13174          * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
13175          * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
13176          * XXH3_64bits, len == 256, Snapdragon 835:
13177          *   without hack: 2063.7 MB/s
13178          *   with hack:    2560.7 MB/s
13179          */
13180         XXH_COMPILER_GUARD(result64);
13181 #endif
13182     }
13183
13184     return XXH3_avalanche(result64);
13185 }
13186
13187 #define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
13188                         XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
13189
13190 XXH_FORCE_INLINE XXH64_hash_t
13191 XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
13192                            const void* XXH_RESTRICT secret, size_t secretSize,
13193                            XXH3_f_accumulate f_acc,
13194                            XXH3_f_scrambleAcc f_scramble)
13195 {
13196     XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
13197
13198     XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
13199
13200     /* converge into final hash */
13201     XXH_STATIC_ASSERT(sizeof(acc) == 64);
13202     /* do not align on 8, so that the secret is different from the accumulator */
13203 #define XXH_SECRET_MERGEACCS_START 11
13204     XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
13205     return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
13206 }
13207
13208 /*
13209  * It's important for performance to transmit secret's size (when it's static)
13210  * so that the compiler can properly optimize the vectorized loop.
13211  * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
13212  * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
13213  * breaks -Og, this is XXH_NO_INLINE.
13214  */
13215 XXH3_WITH_SECRET_INLINE XXH64_hash_t
13216 XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
13217                              XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
13218 {
13219     (void)seed64;
13220     return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
13221 }
13222
13223 /*
13224  * It's preferable for performance that XXH3_hashLong is not inlined,
13225  * as it results in a smaller function for small data, easier to the instruction cache.
13226  * Note that inside this no_inline function, we do inline the internal loop,
13227  * and provide a statically defined secret size to allow optimization of vector loop.
13228  */
13229 XXH_NO_INLINE XXH_PUREF XXH64_hash_t
13230 XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
13231                           XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
13232 {
13233     (void)seed64; (void)secret; (void)secretLen;
13234     return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
13235 }
13236
13237 /*
13238  * XXH3_hashLong_64b_withSeed():
13239  * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
13240  * and then use this key for long mode hashing.
13241  *
13242  * This operation is decently fast but nonetheless costs a little bit of time.
13243  * Try to avoid it whenever possible (typically when seed==0).
13244  *
13245  * It's important for performance that XXH3_hashLong is not inlined. Not sure
13246  * why (uop cache maybe?), but the difference is large and easily measurable.
13247  */
13248 XXH_FORCE_INLINE XXH64_hash_t
13249 XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
13250                                     XXH64_hash_t seed,
13251                                     XXH3_f_accumulate f_acc,
13252                                     XXH3_f_scrambleAcc f_scramble,
13253                                     XXH3_f_initCustomSecret f_initSec)
13254 {
13255 #if XXH_SIZE_OPT <= 0
13256     if (seed == 0)
13257         return XXH3_hashLong_64b_internal(input, len,
13258                                           XXH3_kSecret, sizeof(XXH3_kSecret),
13259                                           f_acc, f_scramble);
13260 #endif
13261     {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
13262         f_initSec(secret, seed);
13263         return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
13264                                           f_acc, f_scramble);
13265     }
13266 }
13267
13268 /*
13269  * It's important for performance that XXH3_hashLong is not inlined.
13270  */
13271 XXH_NO_INLINE XXH64_hash_t
13272 XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
13273                            XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
13274 {
13275     (void)secret; (void)secretLen;
13276     return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
13277                 XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
13278 }
13279
13280
13281 typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
13282                                           XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
13283
13284 XXH_FORCE_INLINE XXH64_hash_t
13285 XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
13286                      XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
13287                      XXH3_hashLong64_f f_hashLong)
13288 {
13289     XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
13290     /*
13291      * If an action is to be taken if `secretLen` condition is not respected,
13292      * it should be done here.
13293      * For now, it's a contract pre-condition.
13294      * Adding a check and a branch here would cost performance at every hash.
13295      * Also, note that function signature doesn't offer room to return an error.
13296      */
13297     if (len <= 16)
13298         return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
13299     if (len <= 128)
13300         return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
13301     if (len <= XXH3_MIDSIZE_MAX)
13302         return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
13303     return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
13304 }
13305
13306
13307 /* ===   Public entry point   === */
13308
13309 /*! @ingroup XXH3_family */
13310 XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length)
13311 {
13312     return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
13313 }
13314
13315 /*! @ingroup XXH3_family */
13316 XXH_PUBLIC_API XXH64_hash_t
13317 XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
13318 {
13319     return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
13320 }
13321
13322 /*! @ingroup XXH3_family */
13323 XXH_PUBLIC_API XXH64_hash_t
13324 XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
13325 {
13326     return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
13327 }
13328
13329 XXH_PUBLIC_API XXH64_hash_t
13330 XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
13331 {
13332     if (length <= XXH3_MIDSIZE_MAX)
13333         return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
13334     return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
13335 }
13336
13337
13338 /* ===   XXH3 streaming   === */
13339 #ifndef XXH_NO_STREAM
13340 /*
13341  * Malloc's a pointer that is always aligned to align.
13342  *
13343  * This must be freed with `XXH_alignedFree()`.
13344  *
13345  * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
13346  * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
13347  * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
13348  *
13349  * This underalignment previously caused a rather obvious crash which went
13350  * completely unnoticed due to XXH3_createState() not actually being tested.
13351  * Credit to RedSpah for noticing this bug.
13352  *
13353  * The alignment is done manually: Functions like posix_memalign or _mm_malloc
13354  * are avoided: To maintain portability, we would have to write a fallback
13355  * like this anyways, and besides, testing for the existence of library
13356  * functions without relying on external build tools is impossible.
13357  *
13358  * The method is simple: Overallocate, manually align, and store the offset
13359  * to the original behind the returned pointer.
13360  *
13361  * Align must be a power of 2 and 8 <= align <= 128.
13362  */
13363 static void* XXH_alignedMalloc(size_t s, size_t align)
13364 {
13365     XXH_ASSERT(align <= 128 && align >= 8); /* range check */
13366     XXH_ASSERT((align & (align-1)) == 0);   /* power of 2 */
13367     XXH_ASSERT(s != 0 && s < (s + align));  /* empty/overflow */
13368     {   /* Overallocate to make room for manual realignment and an offset byte */
13369         xxh_u8* base = (xxh_u8*)VG_(malloc)("zstddeclib.XXH_alignedMalloc.1",s + align);
13370         if (base != NULL) {
13371             /*
13372              * Get the offset needed to align this pointer.
13373              *
13374              * Even if the returned pointer is aligned, there will always be
13375              * at least one byte to store the offset to the original pointer.
13376              */
13377             size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
13378             /* Add the offset for the now-aligned pointer */
13379             xxh_u8* ptr = base + offset;
13380
13381             XXH_ASSERT((size_t)ptr % align == 0);
13382
13383             /* Store the offset immediately before the returned pointer. */
13384             ptr[-1] = (xxh_u8)offset;
13385             return ptr;
13386         }
13387         return NULL;
13388     }
13389 }
13390 /*
13391  * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
13392  * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
13393  */
13394 static void XXH_alignedFree(void* p)
13395 {
13396     if (p != NULL) {
13397         xxh_u8* ptr = (xxh_u8*)p;
13398         /* Get the offset byte we added in VG_(malloc). */
13399         xxh_u8 offset = ptr[-1];
13400         /* Free the original malloc'd pointer */
13401         xxh_u8* base = ptr - offset;
13402         XXH_free(base);
13403     }
13404 }
13405 /*! @ingroup XXH3_family */
13406 /*!
13407  * @brief Allocate an @ref XXH3_state_t.
13408  *
13409  * @return An allocated pointer of @ref XXH3_state_t on success.
13410  * @return `NULL` on failure.
13411  *
13412  * @note Must be freed with XXH3_freeState().
13413  */
13414 XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
13415 {
13416     XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
13417     if (state==NULL) return NULL;
13418     XXH3_INITSTATE(state);
13419     return state;
13420 }
13421
13422 /*! @ingroup XXH3_family */
13423 /*!
13424  * @brief Frees an @ref XXH3_state_t.
13425  *
13426  * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
13427  *
13428  * @return @ref XXH_OK.
13429  *
13430  * @note Must be allocated with XXH3_createState().
13431  */
13432 XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
13433 {
13434     XXH_alignedFree(statePtr);
13435     return XXH_OK;
13436 }
13437
13438 /*! @ingroup XXH3_family */
13439 XXH_PUBLIC_API void
13440 XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state)
13441 {
13442     XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
13443 }
13444
13445 static void
13446 XXH3_reset_internal(XXH3_state_t* statePtr,
13447                     XXH64_hash_t seed,
13448                     const void* secret, size_t secretSize)
13449 {
13450     size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
13451     size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
13452     XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
13453     XXH_ASSERT(statePtr != NULL);
13454     /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
13455     VG_(memset)((char*)statePtr + initStart, 0, initLength);
13456     statePtr->acc[0] = XXH_PRIME32_3;
13457     statePtr->acc[1] = XXH_PRIME64_1;
13458     statePtr->acc[2] = XXH_PRIME64_2;
13459     statePtr->acc[3] = XXH_PRIME64_3;
13460     statePtr->acc[4] = XXH_PRIME64_4;
13461     statePtr->acc[5] = XXH_PRIME32_2;
13462     statePtr->acc[6] = XXH_PRIME64_5;
13463     statePtr->acc[7] = XXH_PRIME32_1;
13464     statePtr->seed = seed;
13465     statePtr->useSeed = (seed != 0);
13466     statePtr->extSecret = (const unsigned char*)secret;
13467     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
13468     statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
13469     statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
13470 }
13471
13472 /*! @ingroup XXH3_family */
13473 XXH_PUBLIC_API XXH_errorcode
13474 XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
13475 {
13476     if (statePtr == NULL) return XXH_ERROR;
13477     XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
13478     return XXH_OK;
13479 }
13480
13481 /*! @ingroup XXH3_family */
13482 XXH_PUBLIC_API XXH_errorcode
13483 XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
13484 {
13485     if (statePtr == NULL) return XXH_ERROR;
13486     XXH3_reset_internal(statePtr, 0, secret, secretSize);
13487     if (secret == NULL) return XXH_ERROR;
13488     if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
13489     return XXH_OK;
13490 }
13491
13492 /*! @ingroup XXH3_family */
13493 XXH_PUBLIC_API XXH_errorcode
13494 XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
13495 {
13496     if (statePtr == NULL) return XXH_ERROR;
13497     if (seed==0) return XXH3_64bits_reset(statePtr);
13498     if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
13499         XXH3_initCustomSecret(statePtr->customSecret, seed);
13500     XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
13501     return XXH_OK;
13502 }
13503
13504 /*! @ingroup XXH3_family */
13505 XXH_PUBLIC_API XXH_errorcode
13506 XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
13507 {
13508     if (statePtr == NULL) return XXH_ERROR;
13509     if (secret == NULL) return XXH_ERROR;
13510     if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
13511     XXH3_reset_internal(statePtr, seed64, secret, secretSize);
13512     statePtr->useSeed = 1; /* always, even if seed64==0 */
13513     return XXH_OK;
13514 }
13515
13516 /*!
13517  * @internal
13518  * @brief Processes a large input for XXH3_update() and XXH3_digest_long().
13519  *
13520  * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block.
13521  *
13522  * @param acc                Pointer to the 8 accumulator lanes
13523  * @param nbStripesSoFarPtr  In/out pointer to the number of leftover stripes in the block*
13524  * @param nbStripesPerBlock  Number of stripes in a block
13525  * @param input              Input pointer
13526  * @param nbStripes          Number of stripes to process
13527  * @param secret             Secret pointer
13528  * @param secretLimit        Offset of the last block in @p secret
13529  * @param f_acc              Pointer to an XXH3_accumulate implementation
13530  * @param f_scramble         Pointer to an XXH3_scrambleAcc implementation
13531  * @return                   Pointer past the end of @p input after processing
13532  */
13533 XXH_FORCE_INLINE const xxh_u8 *
13534 XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
13535                     size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
13536                     const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
13537                     const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
13538                     XXH3_f_accumulate f_acc,
13539                     XXH3_f_scrambleAcc f_scramble)
13540 {
13541     const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
13542     /* Process full blocks */
13543     if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
13544         /* Process the initial partial block... */
13545         size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
13546
13547         do {
13548             /* Accumulate and scramble */
13549             f_acc(acc, input, initialSecret, nbStripesThisIter);
13550             f_scramble(acc, secret + secretLimit);
13551             input += nbStripesThisIter * XXH_STRIPE_LEN;
13552             nbStripes -= nbStripesThisIter;
13553             /* Then continue the loop with the full block size */
13554             nbStripesThisIter = nbStripesPerBlock;
13555             initialSecret = secret;
13556         } while (nbStripes >= nbStripesPerBlock);
13557         *nbStripesSoFarPtr = 0;
13558     }
13559     /* Process a partial block */
13560     if (nbStripes > 0) {
13561         f_acc(acc, input, initialSecret, nbStripes);
13562         input += nbStripes * XXH_STRIPE_LEN;
13563         *nbStripesSoFarPtr += nbStripes;
13564     }
13565     /* Return end pointer */
13566     return input;
13567 }
13568
13569 #ifndef XXH3_STREAM_USE_STACK
13570 # if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
13571 #   define XXH3_STREAM_USE_STACK 1
13572 # endif
13573 #endif
13574 /*
13575  * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
13576  */
13577 XXH_FORCE_INLINE XXH_errorcode
13578 XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
13579             const xxh_u8* XXH_RESTRICT input, size_t len,
13580             XXH3_f_accumulate f_acc,
13581             XXH3_f_scrambleAcc f_scramble)
13582 {
13583     if (input==NULL) {
13584         XXH_ASSERT(len == 0);
13585         return XXH_OK;
13586     }
13587
13588     XXH_ASSERT(state != NULL);
13589     {   const xxh_u8* const bEnd = input + len;
13590         const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
13591 #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
13592         /* For some reason, gcc and MSVC seem to suffer greatly
13593          * when operating accumulators directly into state.
13594          * Operating into stack space seems to enable proper optimization.
13595          * clang, on the other hand, doesn't seem to need this trick */
13596         XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
13597         XXH_memcpy(acc, state->acc, sizeof(acc));
13598 #else
13599         xxh_u64* XXH_RESTRICT const acc = state->acc;
13600 #endif
13601         state->totalLen += len;
13602         XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
13603
13604         /* small input : just fill in tmp buffer */
13605         if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
13606             XXH_memcpy(state->buffer + state->bufferedSize, input, len);
13607             state->bufferedSize += (XXH32_hash_t)len;
13608             return XXH_OK;
13609         }
13610
13611         /* total input is now > XXH3_INTERNALBUFFER_SIZE */
13612         #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
13613         XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0);   /* clean multiple */
13614
13615         /*
13616          * Internal buffer is partially filled (always, except at beginning)
13617          * Complete it, then consume it.
13618          */
13619         if (state->bufferedSize) {
13620             size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
13621             XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
13622             input += loadSize;
13623             XXH3_consumeStripes(acc,
13624                                &state->nbStripesSoFar, state->nbStripesPerBlock,
13625                                 state->buffer, XXH3_INTERNALBUFFER_STRIPES,
13626                                 secret, state->secretLimit,
13627                                 f_acc, f_scramble);
13628             state->bufferedSize = 0;
13629         }
13630         XXH_ASSERT(input < bEnd);
13631         if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
13632             size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
13633             input = XXH3_consumeStripes(acc,
13634                                        &state->nbStripesSoFar, state->nbStripesPerBlock,
13635                                        input, nbStripes,
13636                                        secret, state->secretLimit,
13637                                        f_acc, f_scramble);
13638             XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
13639
13640         }
13641         /* Some remaining input (always) : buffer it */
13642         XXH_ASSERT(input < bEnd);
13643         XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
13644         XXH_ASSERT(state->bufferedSize == 0);
13645         XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
13646         state->bufferedSize = (XXH32_hash_t)(bEnd-input);
13647 #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
13648         /* save stack accumulators into state */
13649         XXH_memcpy(state->acc, acc, sizeof(acc));
13650 #endif
13651     }
13652
13653     return XXH_OK;
13654 }
13655
13656 /*! @ingroup XXH3_family */
13657 XXH_PUBLIC_API XXH_errorcode
13658 XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
13659 {
13660     return XXH3_update(state, (const xxh_u8*)input, len,
13661                        XXH3_accumulate, XXH3_scrambleAcc);
13662 }
13663
13664
13665 XXH_FORCE_INLINE void
13666 XXH3_digest_long (XXH64_hash_t* acc,
13667                   const XXH3_state_t* state,
13668                   const unsigned char* secret)
13669 {
13670     xxh_u8 lastStripe[XXH_STRIPE_LEN];
13671     const xxh_u8* lastStripePtr;
13672
13673     /*
13674      * Digest on a local copy. This way, the state remains unaltered, and it can
13675      * continue ingesting more input afterwards.
13676      */
13677     XXH_memcpy(acc, state->acc, sizeof(state->acc));
13678     if (state->bufferedSize >= XXH_STRIPE_LEN) {
13679         /* Consume remaining stripes then point to remaining data in buffer */
13680         size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
13681         size_t nbStripesSoFar = state->nbStripesSoFar;
13682         XXH3_consumeStripes(acc,
13683                            &nbStripesSoFar, state->nbStripesPerBlock,
13684                             state->buffer, nbStripes,
13685                             secret, state->secretLimit,
13686                             XXH3_accumulate, XXH3_scrambleAcc);
13687         lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
13688     } else {  /* bufferedSize < XXH_STRIPE_LEN */
13689         /* Copy to temp buffer */
13690         size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
13691         XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */
13692         XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
13693         XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
13694         lastStripePtr = lastStripe;
13695     }
13696     /* Last stripe */
13697     XXH3_accumulate_512(acc,
13698                         lastStripePtr,
13699                         secret + state->secretLimit - XXH_SECRET_LASTACC_START);
13700 }
13701
13702 /*! @ingroup XXH3_family */
13703 XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
13704 {
13705     const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
13706     if (state->totalLen > XXH3_MIDSIZE_MAX) {
13707         XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
13708         XXH3_digest_long(acc, state, secret);
13709         return XXH3_mergeAccs(acc,
13710                               secret + XXH_SECRET_MERGEACCS_START,
13711                               (xxh_u64)state->totalLen * XXH_PRIME64_1);
13712     }
13713     /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
13714     if (state->useSeed)
13715         return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
13716     return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
13717                                   secret, state->secretLimit + XXH_STRIPE_LEN);
13718 }
13719 #endif /* !XXH_NO_STREAM */
13720
13721
13722 /* ==========================================
13723  * XXH3 128 bits (a.k.a XXH128)
13724  * ==========================================
13725  * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
13726  * even without counting the significantly larger output size.
13727  *
13728  * For example, extra steps are taken to avoid the seed-dependent collisions
13729  * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
13730  *
13731  * This strength naturally comes at the cost of some speed, especially on short
13732  * lengths. Note that longer hashes are about as fast as the 64-bit version
13733  * due to it using only a slight modification of the 64-bit loop.
13734  *
13735  * XXH128 is also more oriented towards 64-bit machines. It is still extremely
13736  * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
13737  */
13738
13739 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
13740 XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
13741 {
13742     /* A doubled version of 1to3_64b with different constants. */
13743     XXH_ASSERT(input != NULL);
13744     XXH_ASSERT(1 <= len && len <= 3);
13745     XXH_ASSERT(secret != NULL);
13746     /*
13747      * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
13748      * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
13749      * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
13750      */
13751     {   xxh_u8 const c1 = input[0];
13752         xxh_u8 const c2 = input[len >> 1];
13753         xxh_u8 const c3 = input[len - 1];
13754         xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
13755                                 | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
13756         xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
13757         xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
13758         xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
13759         xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
13760         xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
13761         XXH128_hash_t h128;
13762         h128.low64  = XXH64_avalanche(keyed_lo);
13763         h128.high64 = XXH64_avalanche(keyed_hi);
13764         return h128;
13765     }
13766 }
13767
13768 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
13769 XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
13770 {
13771     XXH_ASSERT(input != NULL);
13772     XXH_ASSERT(secret != NULL);
13773     XXH_ASSERT(4 <= len && len <= 8);
13774     seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
13775     {   xxh_u32 const input_lo = XXH_readLE32(input);
13776         xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
13777         xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
13778         xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
13779         xxh_u64 const keyed = input_64 ^ bitflip;
13780
13781         /* Shift len to the left to ensure it is even, this avoids even multiplies. */
13782         XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
13783
13784         m128.high64 += (m128.low64 << 1);
13785         m128.low64  ^= (m128.high64 >> 3);
13786
13787         m128.low64   = XXH_xorshift64(m128.low64, 35);
13788         m128.low64  *= PRIME_MX2;
13789         m128.low64   = XXH_xorshift64(m128.low64, 28);
13790         m128.high64  = XXH3_avalanche(m128.high64);
13791         return m128;
13792     }
13793 }
13794
13795 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
13796 XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
13797 {
13798     XXH_ASSERT(input != NULL);
13799     XXH_ASSERT(secret != NULL);
13800     XXH_ASSERT(9 <= len && len <= 16);
13801     {   xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
13802         xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
13803         xxh_u64 const input_lo = XXH_readLE64(input);
13804         xxh_u64       input_hi = XXH_readLE64(input + len - 8);
13805         XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
13806         /*
13807          * Put len in the middle of m128 to ensure that the length gets mixed to
13808          * both the low and high bits in the 128x64 multiply below.
13809          */
13810         m128.low64 += (xxh_u64)(len - 1) << 54;
13811         input_hi   ^= bitfliph;
13812         /*
13813          * Add the high 32 bits of input_hi to the high 32 bits of m128, then
13814          * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
13815          * the high 64 bits of m128.
13816          *
13817          * The best approach to this operation is different on 32-bit and 64-bit.
13818          */
13819         if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
13820             /*
13821              * 32-bit optimized version, which is more readable.
13822              *
13823              * On 32-bit, it removes an ADC and delays a dependency between the two
13824              * halves of m128.high64, but it generates an extra mask on 64-bit.
13825              */
13826             m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
13827         } else {
13828             /*
13829              * 64-bit optimized (albeit more confusing) version.
13830              *
13831              * Uses some properties of addition and multiplication to remove the mask:
13832              *
13833              * Let:
13834              *    a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
13835              *    b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
13836              *    c = XXH_PRIME32_2
13837              *
13838              *    a + (b * c)
13839              * Inverse Property: x + y - x == y
13840              *    a + (b * (1 + c - 1))
13841              * Distributive Property: x * (y + z) == (x * y) + (x * z)
13842              *    a + (b * 1) + (b * (c - 1))
13843              * Identity Property: x * 1 == x
13844              *    a + b + (b * (c - 1))
13845              *
13846              * Substitute a, b, and c:
13847              *    input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
13848              *
13849              * Since input_hi.hi + input_hi.lo == input_hi, we get this:
13850              *    input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
13851              */
13852             m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
13853         }
13854         /* m128 ^= XXH_swap64(m128 >> 64); */
13855         m128.low64  ^= XXH_swap64(m128.high64);
13856
13857         {   /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
13858             XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
13859             h128.high64 += m128.high64 * XXH_PRIME64_2;
13860
13861             h128.low64   = XXH3_avalanche(h128.low64);
13862             h128.high64  = XXH3_avalanche(h128.high64);
13863             return h128;
13864     }   }
13865 }
13866
13867 /*
13868  * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
13869  */
13870 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
13871 XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
13872 {
13873     XXH_ASSERT(len <= 16);
13874     {   if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
13875         if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
13876         if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
13877         {   XXH128_hash_t h128;
13878             xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
13879             xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
13880             h128.low64 = XXH64_avalanche(seed ^ bitflipl);
13881             h128.high64 = XXH64_avalanche( seed ^ bitfliph);
13882             return h128;
13883     }   }
13884 }
13885
13886 /*
13887  * A bit slower than XXH3_mix16B, but handles multiply by zero better.
13888  */
13889 XXH_FORCE_INLINE XXH128_hash_t
13890 XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
13891               const xxh_u8* secret, XXH64_hash_t seed)
13892 {
13893     acc.low64  += XXH3_mix16B (input_1, secret+0, seed);
13894     acc.low64  ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
13895     acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
13896     acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
13897     return acc;
13898 }
13899
13900
13901 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
13902 XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
13903                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
13904                       XXH64_hash_t seed)
13905 {
13906     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
13907     XXH_ASSERT(16 < len && len <= 128);
13908
13909     {   XXH128_hash_t acc;
13910         acc.low64 = len * XXH_PRIME64_1;
13911         acc.high64 = 0;
13912
13913 #if XXH_SIZE_OPT >= 1
13914         {
13915             /* Smaller, but slightly slower. */
13916             unsigned int i = (unsigned int)(len - 1) / 32;
13917             do {
13918                 acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
13919             } while (i-- != 0);
13920         }
13921 #else
13922         if (len > 32) {
13923             if (len > 64) {
13924                 if (len > 96) {
13925                     acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
13926                 }
13927                 acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
13928             }
13929             acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
13930         }
13931         acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
13932 #endif
13933         {   XXH128_hash_t h128;
13934             h128.low64  = acc.low64 + acc.high64;
13935             h128.high64 = (acc.low64    * XXH_PRIME64_1)
13936                         + (acc.high64   * XXH_PRIME64_4)
13937                         + ((len - seed) * XXH_PRIME64_2);
13938             h128.low64  = XXH3_avalanche(h128.low64);
13939             h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
13940             return h128;
13941         }
13942     }
13943 }
13944
13945 XXH_NO_INLINE XXH_PUREF XXH128_hash_t
13946 XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
13947                        const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
13948                        XXH64_hash_t seed)
13949 {
13950     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
13951     XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
13952
13953     {   XXH128_hash_t acc;
13954         unsigned i;
13955         acc.low64 = len * XXH_PRIME64_1;
13956         acc.high64 = 0;
13957         /*
13958          *  We set as `i` as offset + 32. We do this so that unchanged
13959          * `len` can be used as upper bound. This reaches a sweet spot
13960          * where both x86 and aarch64 get simple agen and good codegen
13961          * for the loop.
13962          */
13963         for (i = 32; i < 160; i += 32) {
13964             acc = XXH128_mix32B(acc,
13965                                 input  + i - 32,
13966                                 input  + i - 16,
13967                                 secret + i - 32,
13968                                 seed);
13969         }
13970         acc.low64 = XXH3_avalanche(acc.low64);
13971         acc.high64 = XXH3_avalanche(acc.high64);
13972         /*
13973          * NB: `i <= len` will duplicate the last 32-bytes if
13974          * len % 32 was zero. This is an unfortunate necessity to keep
13975          * the hash result stable.
13976          */
13977         for (i=160; i <= len; i += 32) {
13978             acc = XXH128_mix32B(acc,
13979                                 input + i - 32,
13980                                 input + i - 16,
13981                                 secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
13982                                 seed);
13983         }
13984         /* last bytes */
13985         acc = XXH128_mix32B(acc,
13986                             input + len - 16,
13987                             input + len - 32,
13988                             secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
13989                             (XXH64_hash_t)0 - seed);
13990
13991         {   XXH128_hash_t h128;
13992             h128.low64  = acc.low64 + acc.high64;
13993             h128.high64 = (acc.low64    * XXH_PRIME64_1)
13994                         + (acc.high64   * XXH_PRIME64_4)
13995                         + ((len - seed) * XXH_PRIME64_2);
13996             h128.low64  = XXH3_avalanche(h128.low64);
13997             h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
13998             return h128;
13999         }
14000     }
14001 }
14002
14003 XXH_FORCE_INLINE XXH128_hash_t
14004 XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
14005                             const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
14006                             XXH3_f_accumulate f_acc,
14007                             XXH3_f_scrambleAcc f_scramble)
14008 {
14009     XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
14010
14011     XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
14012
14013     /* converge into final hash */
14014     XXH_STATIC_ASSERT(sizeof(acc) == 64);
14015     XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
14016     {   XXH128_hash_t h128;
14017         h128.low64  = XXH3_mergeAccs(acc,
14018                                      secret + XXH_SECRET_MERGEACCS_START,
14019                                      (xxh_u64)len * XXH_PRIME64_1);
14020         h128.high64 = XXH3_mergeAccs(acc,
14021                                      secret + secretSize
14022                                             - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
14023                                      ~((xxh_u64)len * XXH_PRIME64_2));
14024         return h128;
14025     }
14026 }
14027
14028 /*
14029  * It's important for performance that XXH3_hashLong() is not inlined.
14030  */
14031 XXH_NO_INLINE XXH_PUREF XXH128_hash_t
14032 XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
14033                            XXH64_hash_t seed64,
14034                            const void* XXH_RESTRICT secret, size_t secretLen)
14035 {
14036     (void)seed64; (void)secret; (void)secretLen;
14037     return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
14038                                        XXH3_accumulate, XXH3_scrambleAcc);
14039 }
14040
14041 /*
14042  * It's important for performance to pass @p secretLen (when it's static)
14043  * to the compiler, so that it can properly optimize the vectorized loop.
14044  *
14045  * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
14046  * breaks -Og, this is XXH_NO_INLINE.
14047  */
14048 XXH3_WITH_SECRET_INLINE XXH128_hash_t
14049 XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
14050                               XXH64_hash_t seed64,
14051                               const void* XXH_RESTRICT secret, size_t secretLen)
14052 {
14053     (void)seed64;
14054     return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
14055                                        XXH3_accumulate, XXH3_scrambleAcc);
14056 }
14057
14058 XXH_FORCE_INLINE XXH128_hash_t
14059 XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
14060                                 XXH64_hash_t seed64,
14061                                 XXH3_f_accumulate f_acc,
14062                                 XXH3_f_scrambleAcc f_scramble,
14063                                 XXH3_f_initCustomSecret f_initSec)
14064 {
14065     if (seed64 == 0)
14066         return XXH3_hashLong_128b_internal(input, len,
14067                                            XXH3_kSecret, sizeof(XXH3_kSecret),
14068                                            f_acc, f_scramble);
14069     {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
14070         f_initSec(secret, seed64);
14071         return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
14072                                            f_acc, f_scramble);
14073     }
14074 }
14075
14076 /*
14077  * It's important for performance that XXH3_hashLong is not inlined.
14078  */
14079 XXH_NO_INLINE XXH128_hash_t
14080 XXH3_hashLong_128b_withSeed(const void* input, size_t len,
14081                             XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)
14082 {
14083     (void)secret; (void)secretLen;
14084     return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
14085                 XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
14086 }
14087
14088 typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
14089                                             XXH64_hash_t, const void* XXH_RESTRICT, size_t);
14090
14091 XXH_FORCE_INLINE XXH128_hash_t
14092 XXH3_128bits_internal(const void* input, size_t len,
14093                       XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
14094                       XXH3_hashLong128_f f_hl128)
14095 {
14096     XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
14097     /*
14098      * If an action is to be taken if `secret` conditions are not respected,
14099      * it should be done here.
14100      * For now, it's a contract pre-condition.
14101      * Adding a check and a branch here would cost performance at every hash.
14102      */
14103     if (len <= 16)
14104         return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
14105     if (len <= 128)
14106         return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
14107     if (len <= XXH3_MIDSIZE_MAX)
14108         return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
14109     return f_hl128(input, len, seed64, secret, secretLen);
14110 }
14111
14112
14113 /* ===   Public XXH128 API   === */
14114
14115 /*! @ingroup XXH3_family */
14116 XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len)
14117 {
14118     return XXH3_128bits_internal(input, len, 0,
14119                                  XXH3_kSecret, sizeof(XXH3_kSecret),
14120                                  XXH3_hashLong_128b_default);
14121 }
14122
14123 /*! @ingroup XXH3_family */
14124 XXH_PUBLIC_API XXH128_hash_t
14125 XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
14126 {
14127     return XXH3_128bits_internal(input, len, 0,
14128                                  (const xxh_u8*)secret, secretSize,
14129                                  XXH3_hashLong_128b_withSecret);
14130 }
14131
14132 /*! @ingroup XXH3_family */
14133 XXH_PUBLIC_API XXH128_hash_t
14134 XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
14135 {
14136     return XXH3_128bits_internal(input, len, seed,
14137                                  XXH3_kSecret, sizeof(XXH3_kSecret),
14138                                  XXH3_hashLong_128b_withSeed);
14139 }
14140
14141 /*! @ingroup XXH3_family */
14142 XXH_PUBLIC_API XXH128_hash_t
14143 XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
14144 {
14145     if (len <= XXH3_MIDSIZE_MAX)
14146         return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
14147     return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
14148 }
14149
14150 /*! @ingroup XXH3_family */
14151 XXH_PUBLIC_API XXH128_hash_t
14152 XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
14153 {
14154     return XXH3_128bits_withSeed(input, len, seed);
14155 }
14156
14157
14158 /* ===   XXH3 128-bit streaming   === */
14159 #ifndef XXH_NO_STREAM
14160 /*
14161  * All initialization and update functions are identical to 64-bit streaming variant.
14162  * The only difference is the finalization routine.
14163  */
14164
14165 /*! @ingroup XXH3_family */
14166 XXH_PUBLIC_API XXH_errorcode
14167 XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
14168 {
14169     return XXH3_64bits_reset(statePtr);
14170 }
14171
14172 /*! @ingroup XXH3_family */
14173 XXH_PUBLIC_API XXH_errorcode
14174 XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
14175 {
14176     return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
14177 }
14178
14179 /*! @ingroup XXH3_family */
14180 XXH_PUBLIC_API XXH_errorcode
14181 XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
14182 {
14183     return XXH3_64bits_reset_withSeed(statePtr, seed);
14184 }
14185
14186 /*! @ingroup XXH3_family */
14187 XXH_PUBLIC_API XXH_errorcode
14188 XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
14189 {
14190     return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
14191 }
14192
14193 /*! @ingroup XXH3_family */
14194 XXH_PUBLIC_API XXH_errorcode
14195 XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
14196 {
14197     return XXH3_64bits_update(state, input, len);
14198 }
14199
14200 /*! @ingroup XXH3_family */
14201 XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
14202 {
14203     const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
14204     if (state->totalLen > XXH3_MIDSIZE_MAX) {
14205         XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
14206         XXH3_digest_long(acc, state, secret);
14207         XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
14208         {   XXH128_hash_t h128;
14209             h128.low64  = XXH3_mergeAccs(acc,
14210                                          secret + XXH_SECRET_MERGEACCS_START,
14211                                          (xxh_u64)state->totalLen * XXH_PRIME64_1);
14212             h128.high64 = XXH3_mergeAccs(acc,
14213                                          secret + state->secretLimit + XXH_STRIPE_LEN
14214                                                 - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
14215                                          ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
14216             return h128;
14217         }
14218     }
14219     /* len <= XXH3_MIDSIZE_MAX : short code */
14220     if (state->seed)
14221         return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
14222     return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
14223                                    secret, state->secretLimit + XXH_STRIPE_LEN);
14224 }
14225 #endif /* !XXH_NO_STREAM */
14226 /* 128-bit utility functions */
14227
14228 #include <string.h>   /* memcmp, memcpy */
14229
14230 /* return : 1 is equal, 0 if different */
14231 /*! @ingroup XXH3_family */
14232 XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
14233 {
14234     /* note : XXH128_hash_t is compact, it has no padding byte */
14235     return !(memcmp(&h1, &h2, sizeof(h1)));
14236 }
14237
14238 /* This prototype is compatible with stdlib's qsort().
14239  * @return : >0 if *h128_1  > *h128_2
14240  *           <0 if *h128_1  < *h128_2
14241  *           =0 if *h128_1 == *h128_2  */
14242 /*! @ingroup XXH3_family */
14243 XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
14244 {
14245     XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
14246     XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
14247     int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
14248     /* note : bets that, in most cases, hash values are different */
14249     if (hcmp) return hcmp;
14250     return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
14251 }
14252
14253
14254 /*======   Canonical representation   ======*/
14255 /*! @ingroup XXH3_family */
14256 XXH_PUBLIC_API void
14257 XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash)
14258 {
14259     XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
14260     if (XXH_CPU_LITTLE_ENDIAN) {
14261         hash.high64 = XXH_swap64(hash.high64);
14262         hash.low64  = XXH_swap64(hash.low64);
14263     }
14264     XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
14265     XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
14266 }
14267
14268 /*! @ingroup XXH3_family */
14269 XXH_PUBLIC_API XXH128_hash_t
14270 XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src)
14271 {
14272     XXH128_hash_t h;
14273     h.high64 = XXH_readBE64(src);
14274     h.low64  = XXH_readBE64(src->digest + 8);
14275     return h;
14276 }
14277
14278
14279
14280 /* ==========================================
14281  * Secret generators
14282  * ==========================================
14283  */
14284 #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
14285
14286 XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
14287 {
14288     XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
14289     XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
14290 }
14291
14292 /*! @ingroup XXH3_family */
14293 XXH_PUBLIC_API XXH_errorcode
14294 XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
14295 {
14296 #if (XXH_DEBUGLEVEL >= 1)
14297     XXH_ASSERT(secretBuffer != NULL);
14298     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
14299 #else
14300     /* production mode, assert() are disabled */
14301     if (secretBuffer == NULL) return XXH_ERROR;
14302     if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
14303 #endif
14304
14305     if (customSeedSize == 0) {
14306         customSeed = XXH3_kSecret;
14307         customSeedSize = XXH_SECRET_DEFAULT_SIZE;
14308     }
14309 #if (XXH_DEBUGLEVEL >= 1)
14310     XXH_ASSERT(customSeed != NULL);
14311 #else
14312     if (customSeed == NULL) return XXH_ERROR;
14313 #endif
14314
14315     /* Fill secretBuffer with a copy of customSeed - repeat as needed */
14316     {   size_t pos = 0;
14317         while (pos < secretSize) {
14318             size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
14319             memcpy((char*)secretBuffer + pos, customSeed, toCopy);
14320             pos += toCopy;
14321     }   }
14322
14323     {   size_t const nbSeg16 = secretSize / 16;
14324         size_t n;
14325         XXH128_canonical_t scrambler;
14326         XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
14327         for (n=0; n<nbSeg16; n++) {
14328             XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
14329             XXH3_combine16((char*)secretBuffer + n*16, h128);
14330         }
14331         /* last segment */
14332         XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
14333     }
14334     return XXH_OK;
14335 }
14336
14337 /*! @ingroup XXH3_family */
14338 XXH_PUBLIC_API void
14339 XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
14340 {
14341     XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
14342     XXH3_initCustomSecret(secret, seed);
14343     XXH_ASSERT(secretBuffer != NULL);
14344     memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
14345 }
14346
14347
14348
14349 /* Pop our optimization override from above */
14350 #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
14351   && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
14352   && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
14353 #  pragma GCC pop_options
14354 #endif
14355
14356 #endif  /* XXH_NO_LONG_LONG */
14357
14358 #endif  /* XXH_NO_XXH3 */
14359
14360 /*!
14361  * @}
14362  */
14363 #endif  /* XXH_IMPLEMENTATION */
14364
14365
14366 #if defined (__cplusplus)
14367 } /* extern "C" */
14368 #endif
14369 /**** ended inlining xxhash.h ****/
14370 #ifndef ZSTD_NO_TRACE
14371 /**** start inlining zstd_trace.h ****/
14372 /*
14373  * Copyright (c) Meta Platforms, Inc. and affiliates.
14374  * All rights reserved.
14375  *
14376  * This source code is licensed under both the BSD-style license (found in the
14377  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
14378  * in the COPYING file in the root directory of this source tree).
14379  * You may select, at your option, one of the above-listed licenses.
14380  */
14381
14382 #ifndef ZSTD_TRACE_H
14383 #define ZSTD_TRACE_H
14384
14385 #if defined (__cplusplus)
14386 extern "C" {
14387 #endif
14388
14389 #include <stddef.h>
14390
14391 /* weak symbol support
14392  * For now, enable conservatively:
14393  * - Only GNUC
14394  * - Only ELF
14395  * - Only x86-64, i386 and aarch64
14396  * Also, explicitly disable on platforms known not to work so they aren't
14397  * forgotten in the future.
14398  */
14399 #if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \
14400     defined(__GNUC__) && defined(__ELF__) && \
14401     (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) || defined(__aarch64__)) && \
14402     !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
14403     !defined(__CYGWIN__) && !defined(_AIX)
14404 #  define ZSTD_HAVE_WEAK_SYMBOLS 1
14405 #else
14406 #  define ZSTD_HAVE_WEAK_SYMBOLS 0
14407 #endif
14408 #if ZSTD_HAVE_WEAK_SYMBOLS
14409 #  define ZSTD_WEAK_ATTR __attribute__((__weak__))
14410 #else
14411 #  define ZSTD_WEAK_ATTR
14412 #endif
14413
14414 /* Only enable tracing when weak symbols are available. */
14415 #ifndef ZSTD_TRACE
14416 #  define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS
14417 #endif
14418
14419 #if ZSTD_TRACE
14420
14421 struct ZSTD_CCtx_s;
14422 struct ZSTD_DCtx_s;
14423 struct ZSTD_CCtx_params_s;
14424
14425 typedef struct {
14426     /**
14427      * ZSTD_VERSION_NUMBER
14428      *
14429      * This is guaranteed to be the first member of ZSTD_trace.
14430      * Otherwise, this struct is not stable between versions. If
14431      * the version number does not match your expectation, you
14432      * should not interpret the rest of the struct.
14433      */
14434     unsigned version;
14435     /**
14436      * Non-zero if streaming (de)compression is used.
14437      */
14438     unsigned streaming;
14439     /**
14440      * The dictionary ID.
14441      */
14442     unsigned dictionaryID;
14443     /**
14444      * Is the dictionary cold?
14445      * Only set on decompression.
14446      */
14447     unsigned dictionaryIsCold;
14448     /**
14449      * The dictionary size or zero if no dictionary.
14450      */
14451     size_t dictionarySize;
14452     /**
14453      * The uncompressed size of the data.
14454      */
14455     size_t uncompressedSize;
14456     /**
14457      * The compressed size of the data.
14458      */
14459     size_t compressedSize;
14460     /**
14461      * The fully resolved CCtx parameters (NULL on decompression).
14462      */
14463     struct ZSTD_CCtx_params_s const* params;
14464     /**
14465      * The ZSTD_CCtx pointer (NULL on decompression).
14466      */
14467     struct ZSTD_CCtx_s const* cctx;
14468     /**
14469      * The ZSTD_DCtx pointer (NULL on compression).
14470      */
14471     struct ZSTD_DCtx_s const* dctx;
14472 } ZSTD_Trace;
14473
14474 /**
14475  * A tracing context. It must be 0 when tracing is disabled.
14476  * Otherwise, any non-zero value returned by a tracing begin()
14477  * function is presented to any subsequent calls to end().
14478  *
14479  * Any non-zero value is treated as tracing is enabled and not
14480  * interpreted by the library.
14481  *
14482  * Two possible uses are:
14483  * * A timestamp for when the begin() function was called.
14484  * * A unique key identifying the (de)compression, like the
14485  *   address of the [dc]ctx pointer if you need to track
14486  *   more information than just a timestamp.
14487  */
14488 typedef unsigned long long ZSTD_TraceCtx;
14489
14490 /**
14491  * Trace the beginning of a compression call.
14492  * @param cctx The dctx pointer for the compression.
14493  *             It can be used as a key to map begin() to end().
14494  * @returns Non-zero if tracing is enabled. The return value is
14495  *          passed to ZSTD_trace_compress_end().
14496  */
14497 ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(
14498     struct ZSTD_CCtx_s const* cctx);
14499
14500 /**
14501  * Trace the end of a compression call.
14502  * @param ctx The return value of ZSTD_trace_compress_begin().
14503  * @param trace The zstd tracing info.
14504  */
14505 ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(
14506     ZSTD_TraceCtx ctx,
14507     ZSTD_Trace const* trace);
14508
14509 /**
14510  * Trace the beginning of a decompression call.
14511  * @param dctx The dctx pointer for the decompression.
14512  *             It can be used as a key to map begin() to end().
14513  * @returns Non-zero if tracing is enabled. The return value is
14514  *          passed to ZSTD_trace_compress_end().
14515  */
14516 ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(
14517     struct ZSTD_DCtx_s const* dctx);
14518
14519 /**
14520  * Trace the end of a decompression call.
14521  * @param ctx The return value of ZSTD_trace_decompress_begin().
14522  * @param trace The zstd tracing info.
14523  */
14524 ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(
14525     ZSTD_TraceCtx ctx,
14526     ZSTD_Trace const* trace);
14527
14528 #endif /* ZSTD_TRACE */
14529
14530 #if defined (__cplusplus)
14531 }
14532 #endif
14533
14534 #endif /* ZSTD_TRACE_H */
14535 /**** ended inlining zstd_trace.h ****/
14536 #else
14537 #  define ZSTD_TRACE 0
14538 #endif
14539
14540 #if defined (__cplusplus)
14541 extern "C" {
14542 #endif
14543
14544 /* ---- static assert (debug) --- */
14545 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
14546 #define ZSTD_isError ERR_isError   /* for inlining */
14547 #define FSE_isError  ERR_isError
14548 #define HUF_isError  ERR_isError
14549
14550
14551 /*-*************************************
14552 *  shared macros
14553 ***************************************/
14554 #undef MIN
14555 #undef MAX
14556 #define MIN(a,b) ((a)<(b) ? (a) : (b))
14557 #define MAX(a,b) ((a)>(b) ? (a) : (b))
14558 #define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
14559
14560
14561 /*-*************************************
14562 *  Common constants
14563 ***************************************/
14564 #define ZSTD_OPT_NUM    (1<<12)
14565
14566 #define ZSTD_REP_NUM      3                 /* number of repcodes */
14567 static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
14568
14569 #define KB *(1 <<10)
14570 #define MB *(1 <<20)
14571 #define GB *(1U<<30)
14572
14573 #define BIT7 128
14574 #define BIT6  64
14575 #define BIT5  32
14576 #define BIT4  16
14577 #define BIT1   2
14578 #define BIT0   1
14579
14580 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
14581 static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
14582 static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
14583
14584 #define ZSTD_FRAMEIDSIZE 4   /* magic number size */
14585
14586 #define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
14587 static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
14588 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
14589
14590 #define ZSTD_FRAMECHECKSUMSIZE 4
14591
14592 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
14593 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */)   /* for a non-null block */
14594 #define MIN_LITERALS_FOR_4_STREAMS 6
14595
14596 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
14597
14598 #define LONGNBSEQ 0x7F00
14599
14600 #define MINMATCH 3
14601
14602 #define Litbits  8
14603 #define LitHufLog 11
14604 #define MaxLit ((1<<Litbits) - 1)
14605 #define MaxML   52
14606 #define MaxLL   35
14607 #define DefaultMaxOff 28
14608 #define MaxOff  31
14609 #define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
14610 #define MLFSELog    9
14611 #define LLFSELog    9
14612 #define OffFSELog   8
14613 #define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
14614 #define MaxMLBits 16
14615 #define MaxLLBits 16
14616
14617 #define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
14618 /* Each table cannot take more than #symbols * FSELog bits */
14619 #define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
14620
14621 static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
14622      0, 0, 0, 0, 0, 0, 0, 0,
14623      0, 0, 0, 0, 0, 0, 0, 0,
14624      1, 1, 1, 1, 2, 2, 3, 3,
14625      4, 6, 7, 8, 9,10,11,12,
14626     13,14,15,16
14627 };
14628 static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
14629      4, 3, 2, 2, 2, 2, 2, 2,
14630      2, 2, 2, 2, 2, 1, 1, 1,
14631      2, 2, 2, 2, 2, 2, 2, 2,
14632      2, 3, 2, 1, 1, 1, 1, 1,
14633     -1,-1,-1,-1
14634 };
14635 #define LL_DEFAULTNORMLOG 6  /* for static allocation */
14636 static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
14637
14638 static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
14639      0, 0, 0, 0, 0, 0, 0, 0,
14640      0, 0, 0, 0, 0, 0, 0, 0,
14641      0, 0, 0, 0, 0, 0, 0, 0,
14642      0, 0, 0, 0, 0, 0, 0, 0,
14643      1, 1, 1, 1, 2, 2, 3, 3,
14644      4, 4, 5, 7, 8, 9,10,11,
14645     12,13,14,15,16
14646 };
14647 static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
14648      1, 4, 3, 2, 2, 2, 2, 2,
14649      2, 1, 1, 1, 1, 1, 1, 1,
14650      1, 1, 1, 1, 1, 1, 1, 1,
14651      1, 1, 1, 1, 1, 1, 1, 1,
14652      1, 1, 1, 1, 1, 1, 1, 1,
14653      1, 1, 1, 1, 1, 1,-1,-1,
14654     -1,-1,-1,-1,-1
14655 };
14656 #define ML_DEFAULTNORMLOG 6  /* for static allocation */
14657 static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
14658
14659 static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
14660      1, 1, 1, 1, 1, 1, 2, 2,
14661      2, 1, 1, 1, 1, 1, 1, 1,
14662      1, 1, 1, 1, 1, 1, 1, 1,
14663     -1,-1,-1,-1,-1
14664 };
14665 #define OF_DEFAULTNORMLOG 5  /* for static allocation */
14666 static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
14667
14668
14669 /*-*******************************************
14670 *  Shared functions to include for inlining
14671 *********************************************/
14672 static void ZSTD_copy8(void* dst, const void* src) {
14673 #if defined(ZSTD_ARCH_ARM_NEON)
14674     vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
14675 #else
14676     ZSTD_memcpy(dst, src, 8);
14677 #endif
14678 }
14679 #define COPY8(d,s) do { ZSTD_copy8(d,s); d+=8; s+=8; } while (0)
14680
14681 /* Need to use memmove here since the literal buffer can now be located within
14682    the dst buffer. In circumstances where the op "catches up" to where the
14683    literal buffer is, there can be partial overlaps in this call on the final
14684    copy if the literal is being shifted by less than 16 bytes. */
14685 static void ZSTD_copy16(void* dst, const void* src) {
14686 #if defined(ZSTD_ARCH_ARM_NEON)
14687     vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
14688 #elif defined(ZSTD_ARCH_X86_SSE2)
14689     _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
14690 #elif defined(__clang__)
14691     ZSTD_memmove(dst, src, 16);
14692 #else
14693     /* ZSTD_memmove is not inlined properly by gcc */
14694     BYTE copy16_buf[16];
14695     ZSTD_memcpy(copy16_buf, src, 16);
14696     ZSTD_memcpy(dst, copy16_buf, 16);
14697 #endif
14698 }
14699 #define COPY16(d,s) do { ZSTD_copy16(d,s); d+=16; s+=16; } while (0)
14700
14701 #define WILDCOPY_OVERLENGTH 32
14702 #define WILDCOPY_VECLEN 16
14703
14704 typedef enum {
14705     ZSTD_no_overlap,
14706     ZSTD_overlap_src_before_dst
14707     /*  ZSTD_overlap_dst_before_src, */
14708 } ZSTD_overlap_e;
14709
14710 /*! ZSTD_wildcopy() :
14711  *  Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
14712  *  @param ovtype controls the overlap detection
14713  *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
14714  *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
14715  *           The src buffer must be before the dst buffer.
14716  */
14717 MEM_STATIC FORCE_INLINE_ATTR
14718 void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
14719 {
14720     ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
14721     const BYTE* ip = (const BYTE*)src;
14722     BYTE* op = (BYTE*)dst;
14723     BYTE* const oend = op + length;
14724
14725     if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
14726         /* Handle short offset copies. */
14727         do {
14728             COPY8(op, ip);
14729         } while (op < oend);
14730     } else {
14731         assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
14732         /* Separate out the first COPY16() call because the copy length is
14733          * almost certain to be short, so the branches have different
14734          * probabilities. Since it is almost certain to be short, only do
14735          * one COPY16() in the first call. Then, do two calls per loop since
14736          * at that point it is more likely to have a high trip count.
14737          */
14738         ZSTD_copy16(op, ip);
14739         if (16 >= length) return;
14740         op += 16;
14741         ip += 16;
14742         do {
14743             COPY16(op, ip);
14744             COPY16(op, ip);
14745         }
14746         while (op < oend);
14747     }
14748 }
14749
14750 MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
14751 {
14752     size_t const length = MIN(dstCapacity, srcSize);
14753     if (length > 0) {
14754         ZSTD_memcpy(dst, src, length);
14755     }
14756     return length;
14757 }
14758
14759 /* define "workspace is too large" as this number of times larger than needed */
14760 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3
14761
14762 /* when workspace is continuously too large
14763  * during at least this number of times,
14764  * context's memory usage is considered wasteful,
14765  * because it's sized to handle a worst case scenario which rarely happens.
14766  * In which case, resize it down to free some memory */
14767 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
14768
14769 /* Controls whether the input/output buffer is buffered or stable. */
14770 typedef enum {
14771     ZSTD_bm_buffered = 0,  /* Buffer the input/output */
14772     ZSTD_bm_stable = 1     /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
14773 } ZSTD_bufferMode_e;
14774
14775
14776 /*-*******************************************
14777 *  Private declarations
14778 *********************************************/
14779 typedef struct seqDef_s {
14780     U32 offBase;   /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
14781     U16 litLength;
14782     U16 mlBase;    /* mlBase == matchLength - MINMATCH */
14783 } seqDef;
14784
14785 /* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
14786 typedef enum {
14787     ZSTD_llt_none = 0,             /* no longLengthType */
14788     ZSTD_llt_literalLength = 1,    /* represents a long literal */
14789     ZSTD_llt_matchLength = 2       /* represents a long match */
14790 } ZSTD_longLengthType_e;
14791
14792 typedef struct {
14793     seqDef* sequencesStart;
14794     seqDef* sequences;      /* ptr to end of sequences */
14795     BYTE*  litStart;
14796     BYTE*  lit;             /* ptr to end of literals */
14797     BYTE*  llCode;
14798     BYTE*  mlCode;
14799     BYTE*  ofCode;
14800     size_t maxNbSeq;
14801     size_t maxNbLit;
14802
14803     /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
14804      * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
14805      * the existing value of the litLength or matchLength by 0x10000.
14806      */
14807     ZSTD_longLengthType_e longLengthType;
14808     U32                   longLengthPos;  /* Index of the sequence to apply long length modification to */
14809 } seqStore_t;
14810
14811 typedef struct {
14812     U32 litLength;
14813     U32 matchLength;
14814 } ZSTD_sequenceLength;
14815
14816 /**
14817  * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
14818  * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
14819  */
14820 MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
14821 {
14822     ZSTD_sequenceLength seqLen;
14823     seqLen.litLength = seq->litLength;
14824     seqLen.matchLength = seq->mlBase + MINMATCH;
14825     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
14826         if (seqStore->longLengthType == ZSTD_llt_literalLength) {
14827             seqLen.litLength += 0x10000;
14828         }
14829         if (seqStore->longLengthType == ZSTD_llt_matchLength) {
14830             seqLen.matchLength += 0x10000;
14831         }
14832     }
14833     return seqLen;
14834 }
14835
14836 /**
14837  * Contains the compressed frame size and an upper-bound for the decompressed frame size.
14838  * Note: before using `compressedSize`, check for errors using ZSTD_isError().
14839  *       similarly, before using `decompressedBound`, check for errors using:
14840  *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
14841  */
14842 typedef struct {
14843     size_t nbBlocks;
14844     size_t compressedSize;
14845     unsigned long long decompressedBound;
14846 } ZSTD_frameSizeInfo;   /* decompress & legacy */
14847
14848 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
14849 int ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
14850
14851
14852 /* ZSTD_invalidateRepCodes() :
14853  * ensures next compression will not use repcodes from previous block.
14854  * Note : only works with regular variant;
14855  *        do not use with extDict variant ! */
14856 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
14857
14858
14859 typedef struct {
14860     blockType_e blockType;
14861     U32 lastBlock;
14862     U32 origSize;
14863 } blockProperties_t;   /* declared here for decompress and fullbench */
14864
14865 /*! ZSTD_getcBlockSize() :
14866  *  Provides the size of compressed block from block header `src` */
14867 /*  Used by: decompress, fullbench */
14868 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
14869                           blockProperties_t* bpPtr);
14870
14871 /*! ZSTD_decodeSeqHeaders() :
14872  *  decode sequence header from src */
14873 /*  Used by: zstd_decompress_block, fullbench */
14874 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
14875                        const void* src, size_t srcSize);
14876
14877 /**
14878  * @returns true iff the CPU supports dynamic BMI2 dispatch.
14879  */
14880 MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
14881 {
14882     ZSTD_cpuid_t cpuid = ZSTD_cpuid();
14883     return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
14884 }
14885
14886 #if defined (__cplusplus)
14887 }
14888 #endif
14889
14890 #endif   /* ZSTD_CCOMMON_H_MODULE */
14891 /**** ended inlining zstd_internal.h ****/
14892
14893
14894 /*-****************************************
14895 *  Version
14896 ******************************************/
14897 unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
14898
14899 const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
14900
14901
14902 /*-****************************************
14903 *  ZSTD Error Management
14904 ******************************************/
14905 #undef ZSTD_isError   /* defined within zstd_internal.h */
14906 /*! ZSTD_isError() :
14907  *  tells if a return value is an error code
14908  *  symbol is required for external callers */
14909 unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
14910
14911 /*! ZSTD_getErrorName() :
14912  *  provides error code string from function result (useful for debugging) */
14913 const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
14914
14915 /*! ZSTD_getError() :
14916  *  convert a `size_t` function result into a proper ZSTD_errorCode enum */
14917 ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
14918
14919 /*! ZSTD_getErrorString() :
14920  *  provides error code string from enum */
14921 const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
14922 /**** ended inlining common/zstd_common.c ****/
14923
14924 /**** start inlining decompress/huf_decompress.c ****/
14925 /* ******************************************************************
14926  * huff0 huffman decoder,
14927  * part of Finite State Entropy library
14928  * Copyright (c) Meta Platforms, Inc. and affiliates.
14929  *
14930  *  You can contact the author at :
14931  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
14932  *
14933  * This source code is licensed under both the BSD-style license (found in the
14934  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
14935  * in the COPYING file in the root directory of this source tree).
14936  * You may select, at your option, one of the above-listed licenses.
14937 ****************************************************************** */
14938
14939 /* **************************************************************
14940 *  Dependencies
14941 ****************************************************************/
14942 /**** skipping file: ../common/zstd_deps.h ****/
14943 /**** skipping file: ../common/compiler.h ****/
14944 /**** skipping file: ../common/bitstream.h ****/
14945 /**** skipping file: ../common/fse.h ****/
14946 /**** skipping file: ../common/huf.h ****/
14947 /**** skipping file: ../common/error_private.h ****/
14948 /**** skipping file: ../common/zstd_internal.h ****/
14949 /**** skipping file: ../common/bits.h ****/
14950
14951 /* **************************************************************
14952 *  Constants
14953 ****************************************************************/
14954
14955 #define HUF_DECODER_FAST_TABLELOG 11
14956
14957 /* **************************************************************
14958 *  Macros
14959 ****************************************************************/
14960
14961 #ifdef HUF_DISABLE_FAST_DECODE
14962 # define HUF_ENABLE_FAST_DECODE 0
14963 #else
14964 # define HUF_ENABLE_FAST_DECODE 1
14965 #endif
14966
14967 /* These two optional macros force the use one way or another of the two
14968  * Huffman decompression implementations. You can't force in both directions
14969  * at the same time.
14970  */
14971 #if defined(HUF_FORCE_DECOMPRESS_X1) && \
14972     defined(HUF_FORCE_DECOMPRESS_X2)
14973 #error "Cannot force the use of the X1 and X2 decoders at the same time!"
14974 #endif
14975
14976 /* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is
14977  * supported at runtime, so we can add the BMI2 target attribute.
14978  * When it is disabled, we will still get BMI2 if it is enabled statically.
14979  */
14980 #if DYNAMIC_BMI2
14981 # define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
14982 #else
14983 # define HUF_FAST_BMI2_ATTRS
14984 #endif
14985
14986 #ifdef __cplusplus
14987 # define HUF_EXTERN_C extern "C"
14988 #else
14989 # define HUF_EXTERN_C
14990 #endif
14991 #define HUF_ASM_DECL HUF_EXTERN_C
14992
14993 #if DYNAMIC_BMI2
14994 # define HUF_NEED_BMI2_FUNCTION 1
14995 #else
14996 # define HUF_NEED_BMI2_FUNCTION 0
14997 #endif
14998
14999 /* **************************************************************
15000 *  Error Management
15001 ****************************************************************/
15002 #define HUF_isError ERR_isError
15003
15004
15005 /* **************************************************************
15006 *  Byte alignment for workSpace management
15007 ****************************************************************/
15008 #define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
15009 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
15010
15011
15012 /* **************************************************************
15013 *  BMI2 Variant Wrappers
15014 ****************************************************************/
15015 typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize,
15016                                               const void *cSrc,
15017                                               size_t cSrcSize,
15018                                               const HUF_DTable *DTable);
15019
15020 #if DYNAMIC_BMI2
15021
15022 #define HUF_DGEN(fn)                                                        \
15023                                                                             \
15024     static size_t fn##_default(                                             \
15025                   void* dst,  size_t dstSize,                               \
15026             const void* cSrc, size_t cSrcSize,                              \
15027             const HUF_DTable* DTable)                                       \
15028     {                                                                       \
15029         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
15030     }                                                                       \
15031                                                                             \
15032     static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2(                          \
15033                   void* dst,  size_t dstSize,                               \
15034             const void* cSrc, size_t cSrcSize,                              \
15035             const HUF_DTable* DTable)                                       \
15036     {                                                                       \
15037         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
15038     }                                                                       \
15039                                                                             \
15040     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
15041                      size_t cSrcSize, HUF_DTable const* DTable, int flags)  \
15042     {                                                                       \
15043         if (flags & HUF_flags_bmi2) {                                       \
15044             return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
15045         }                                                                   \
15046         return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
15047     }
15048
15049 #else
15050
15051 #define HUF_DGEN(fn)                                                        \
15052     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
15053                      size_t cSrcSize, HUF_DTable const* DTable, int flags)  \
15054     {                                                                       \
15055         (void)flags;                                                        \
15056         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
15057     }
15058
15059 #endif
15060
15061
15062 /*-***************************/
15063 /*  generic DTableDesc       */
15064 /*-***************************/
15065 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
15066
15067 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
15068 {
15069     DTableDesc dtd;
15070     ZSTD_memcpy(&dtd, table, sizeof(dtd));
15071     return dtd;
15072 }
15073
15074 static size_t HUF_initFastDStream(BYTE const* ip) {
15075     BYTE const lastByte = ip[7];
15076     size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
15077     size_t const value = MEM_readLEST(ip) | 1;
15078     assert(bitsConsumed <= 8);
15079     assert(sizeof(size_t) == 8);
15080     return value << bitsConsumed;
15081 }
15082
15083
15084 /**
15085  * The input/output arguments to the Huffman fast decoding loop:
15086  *
15087  * ip [in/out] - The input pointers, must be updated to reflect what is consumed.
15088  * op [in/out] - The output pointers, must be updated to reflect what is written.
15089  * bits [in/out] - The bitstream containers, must be updated to reflect the current state.
15090  * dt [in] - The decoding table.
15091  * ilowest [in] - The beginning of the valid range of the input. Decoders may read
15092  *                down to this pointer. It may be below iend[0].
15093  * oend [in] - The end of the output stream. op[3] must not cross oend.
15094  * iend [in] - The end of each input stream. ip[i] may cross iend[i],
15095  *             as long as it is above ilowest, but that indicates corruption.
15096  */
15097 typedef struct {
15098     BYTE const* ip[4];
15099     BYTE* op[4];
15100     U64 bits[4];
15101     void const* dt;
15102     BYTE const* ilowest;
15103     BYTE* oend;
15104     BYTE const* iend[4];
15105 } HUF_DecompressFastArgs;
15106
15107 typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*);
15108
15109 /**
15110  * Initializes args for the fast decoding loop.
15111  * @returns 1 on success
15112  *          0 if the fallback implementation should be used.
15113  *          Or an error code on failure.
15114  */
15115 static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
15116 {
15117     void const* dt = DTable + 1;
15118     U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
15119
15120     const BYTE* const istart = (const BYTE*)src;
15121
15122     BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
15123
15124     /* The fast decoding loop assumes 64-bit little-endian.
15125      * This condition is false on x32.
15126      */
15127     if (!MEM_isLittleEndian() || MEM_32bits())
15128         return 0;
15129
15130     /* Avoid nullptr addition */
15131     if (dstSize == 0)
15132         return 0;
15133     assert(dst != NULL);
15134
15135     /* strict minimum : jump table + 1 byte per stream */
15136     if (srcSize < 10)
15137         return ERROR(corruption_detected);
15138
15139     /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
15140      * If table log is not correct at this point, fallback to the old decoder.
15141      * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
15142      */
15143     if (dtLog != HUF_DECODER_FAST_TABLELOG)
15144         return 0;
15145
15146     /* Read the jump table. */
15147     {
15148         size_t const length1 = MEM_readLE16(istart);
15149         size_t const length2 = MEM_readLE16(istart+2);
15150         size_t const length3 = MEM_readLE16(istart+4);
15151         size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
15152         args->iend[0] = istart + 6;  /* jumpTable */
15153         args->iend[1] = args->iend[0] + length1;
15154         args->iend[2] = args->iend[1] + length2;
15155         args->iend[3] = args->iend[2] + length3;
15156
15157         /* HUF_initFastDStream() requires this, and this small of an input
15158          * won't benefit from the ASM loop anyways.
15159          */
15160         if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8)
15161             return 0;
15162         if (length4 > srcSize) return ERROR(corruption_detected);   /* overflow */
15163     }
15164     /* ip[] contains the position that is currently loaded into bits[]. */
15165     args->ip[0] = args->iend[1] - sizeof(U64);
15166     args->ip[1] = args->iend[2] - sizeof(U64);
15167     args->ip[2] = args->iend[3] - sizeof(U64);
15168     args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
15169
15170     /* op[] contains the output pointers. */
15171     args->op[0] = (BYTE*)dst;
15172     args->op[1] = args->op[0] + (dstSize+3)/4;
15173     args->op[2] = args->op[1] + (dstSize+3)/4;
15174     args->op[3] = args->op[2] + (dstSize+3)/4;
15175
15176     /* No point to call the ASM loop for tiny outputs. */
15177     if (args->op[3] >= oend)
15178         return 0;
15179
15180     /* bits[] is the bit container.
15181         * It is read from the MSB down to the LSB.
15182         * It is shifted left as it is read, and zeros are
15183         * shifted in. After the lowest valid bit a 1 is
15184         * set, so that CountTrailingZeros(bits[]) can be used
15185         * to count how many bits we've consumed.
15186         */
15187     args->bits[0] = HUF_initFastDStream(args->ip[0]);
15188     args->bits[1] = HUF_initFastDStream(args->ip[1]);
15189     args->bits[2] = HUF_initFastDStream(args->ip[2]);
15190     args->bits[3] = HUF_initFastDStream(args->ip[3]);
15191
15192     /* The decoders must be sure to never read beyond ilowest.
15193      * This is lower than iend[0], but allowing decoders to read
15194      * down to ilowest can allow an extra iteration or two in the
15195      * fast loop.
15196      */
15197     args->ilowest = istart;
15198
15199     args->oend = oend;
15200     args->dt = dt;
15201
15202     return 1;
15203 }
15204
15205 static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd)
15206 {
15207     /* Validate that we haven't overwritten. */
15208     if (args->op[stream] > segmentEnd)
15209         return ERROR(corruption_detected);
15210     /* Validate that we haven't read beyond iend[].
15211         * Note that ip[] may be < iend[] because the MSB is
15212         * the next bit to read, and we may have consumed 100%
15213         * of the stream, so down to iend[i] - 8 is valid.
15214         */
15215     if (args->ip[stream] < args->iend[stream] - 8)
15216         return ERROR(corruption_detected);
15217
15218     /* Construct the BIT_DStream_t. */
15219     assert(sizeof(size_t) == 8);
15220     bit->bitContainer = MEM_readLEST(args->ip[stream]);
15221     bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
15222     bit->start = (const char*)args->ilowest;
15223     bit->limitPtr = bit->start + sizeof(size_t);
15224     bit->ptr = (const char*)args->ip[stream];
15225
15226     return 0;
15227 }
15228
15229 /* Calls X(N) for each stream 0, 1, 2, 3. */
15230 #define HUF_4X_FOR_EACH_STREAM(X) \
15231     do {                          \
15232         X(0);                     \
15233         X(1);                     \
15234         X(2);                     \
15235         X(3);                     \
15236     } while (0)
15237
15238 /* Calls X(N, var) for each stream 0, 1, 2, 3. */
15239 #define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \
15240     do {                                        \
15241         X(0, (var));                            \
15242         X(1, (var));                            \
15243         X(2, (var));                            \
15244         X(3, (var));                            \
15245     } while (0)
15246
15247
15248 #ifndef HUF_FORCE_DECOMPRESS_X2
15249
15250 /*-***************************/
15251 /*  single-symbol decoding   */
15252 /*-***************************/
15253 typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1;   /* single-symbol decoding */
15254
15255 /**
15256  * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
15257  * a time.
15258  */
15259 static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
15260     U64 D4;
15261     if (MEM_isLittleEndian()) {
15262         D4 = (U64)((symbol << 8) + nbBits);
15263     } else {
15264         D4 = (U64)(symbol + (nbBits << 8));
15265     }
15266     assert(D4 < (1U << 16));
15267     D4 *= 0x0001000100010001ULL;
15268     return D4;
15269 }
15270
15271 /**
15272  * Increase the tableLog to targetTableLog and rescales the stats.
15273  * If tableLog > targetTableLog this is a no-op.
15274  * @returns New tableLog
15275  */
15276 static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
15277 {
15278     if (tableLog > targetTableLog)
15279         return tableLog;
15280     if (tableLog < targetTableLog) {
15281         U32 const scale = targetTableLog - tableLog;
15282         U32 s;
15283         /* Increase the weight for all non-zero probability symbols by scale. */
15284         for (s = 0; s < nbSymbols; ++s) {
15285             huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
15286         }
15287         /* Update rankVal to reflect the new weights.
15288          * All weights except 0 get moved to weight + scale.
15289          * Weights [1, scale] are empty.
15290          */
15291         for (s = targetTableLog; s > scale; --s) {
15292             rankVal[s] = rankVal[s - scale];
15293         }
15294         for (s = scale; s > 0; --s) {
15295             rankVal[s] = 0;
15296         }
15297     }
15298     return targetTableLog;
15299 }
15300
15301 typedef struct {
15302         U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
15303         U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
15304         U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
15305         BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
15306         BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
15307 } HUF_ReadDTableX1_Workspace;
15308
15309 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags)
15310 {
15311     U32 tableLog = 0;
15312     U32 nbSymbols = 0;
15313     size_t iSize;
15314     void* const dtPtr = DTable + 1;
15315     HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
15316     HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
15317
15318     DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
15319     if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
15320
15321     DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
15322     /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
15323
15324     iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags);
15325     if (HUF_isError(iSize)) return iSize;
15326
15327
15328     /* Table header */
15329     {   DTableDesc dtd = HUF_getDTableDesc(DTable);
15330         U32 const maxTableLog = dtd.maxTableLog + 1;
15331         U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
15332         tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
15333         if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
15334         dtd.tableType = 0;
15335         dtd.tableLog = (BYTE)tableLog;
15336         ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
15337     }
15338
15339     /* Compute symbols and rankStart given rankVal:
15340      *
15341      * rankVal already contains the number of values of each weight.
15342      *
15343      * symbols contains the symbols ordered by weight. First are the rankVal[0]
15344      * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
15345      * symbols[0] is filled (but unused) to avoid a branch.
15346      *
15347      * rankStart contains the offset where each rank belongs in the DTable.
15348      * rankStart[0] is not filled because there are no entries in the table for
15349      * weight 0.
15350      */
15351     {   int n;
15352         U32 nextRankStart = 0;
15353         int const unroll = 4;
15354         int const nLimit = (int)nbSymbols - unroll + 1;
15355         for (n=0; n<(int)tableLog+1; n++) {
15356             U32 const curr = nextRankStart;
15357             nextRankStart += wksp->rankVal[n];
15358             wksp->rankStart[n] = curr;
15359         }
15360         for (n=0; n < nLimit; n += unroll) {
15361             int u;
15362             for (u=0; u < unroll; ++u) {
15363                 size_t const w = wksp->huffWeight[n+u];
15364                 wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
15365             }
15366         }
15367         for (; n < (int)nbSymbols; ++n) {
15368             size_t const w = wksp->huffWeight[n];
15369             wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
15370         }
15371     }
15372
15373     /* fill DTable
15374      * We fill all entries of each weight in order.
15375      * That way length is a constant for each iteration of the outer loop.
15376      * We can switch based on the length to a different inner loop which is
15377      * optimized for that particular case.
15378      */
15379     {   U32 w;
15380         int symbol = wksp->rankVal[0];
15381         int rankStart = 0;
15382         for (w=1; w<tableLog+1; ++w) {
15383             int const symbolCount = wksp->rankVal[w];
15384             int const length = (1 << w) >> 1;
15385             int uStart = rankStart;
15386             BYTE const nbBits = (BYTE)(tableLog + 1 - w);
15387             int s;
15388             int u;
15389             switch (length) {
15390             case 1:
15391                 for (s=0; s<symbolCount; ++s) {
15392                     HUF_DEltX1 D;
15393                     D.byte = wksp->symbols[symbol + s];
15394                     D.nbBits = nbBits;
15395                     dt[uStart] = D;
15396                     uStart += 1;
15397                 }
15398                 break;
15399             case 2:
15400                 for (s=0; s<symbolCount; ++s) {
15401                     HUF_DEltX1 D;
15402                     D.byte = wksp->symbols[symbol + s];
15403                     D.nbBits = nbBits;
15404                     dt[uStart+0] = D;
15405                     dt[uStart+1] = D;
15406                     uStart += 2;
15407                 }
15408                 break;
15409             case 4:
15410                 for (s=0; s<symbolCount; ++s) {
15411                     U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
15412                     MEM_write64(dt + uStart, D4);
15413                     uStart += 4;
15414                 }
15415                 break;
15416             case 8:
15417                 for (s=0; s<symbolCount; ++s) {
15418                     U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
15419                     MEM_write64(dt + uStart, D4);
15420                     MEM_write64(dt + uStart + 4, D4);
15421                     uStart += 8;
15422                 }
15423                 break;
15424             default:
15425                 for (s=0; s<symbolCount; ++s) {
15426                     U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
15427                     for (u=0; u < length; u += 16) {
15428                         MEM_write64(dt + uStart + u + 0, D4);
15429                         MEM_write64(dt + uStart + u + 4, D4);
15430                         MEM_write64(dt + uStart + u + 8, D4);
15431                         MEM_write64(dt + uStart + u + 12, D4);
15432                     }
15433                     assert(u == length);
15434                     uStart += length;
15435                 }
15436                 break;
15437             }
15438             symbol += symbolCount;
15439             rankStart += symbolCount * length;
15440         }
15441     }
15442     return iSize;
15443 }
15444
15445 FORCE_INLINE_TEMPLATE BYTE
15446 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
15447 {
15448     size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
15449     BYTE const c = dt[val].byte;
15450     BIT_skipBits(Dstream, dt[val].nbBits);
15451     return c;
15452 }
15453
15454 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
15455     do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0)
15456
15457 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)      \
15458     do {                                            \
15459         if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
15460             HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
15461     } while (0)
15462
15463 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr)      \
15464     do {                                            \
15465         if (MEM_64bits())                           \
15466             HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
15467     } while (0)
15468
15469 HINT_INLINE size_t
15470 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
15471 {
15472     BYTE* const pStart = p;
15473
15474     /* up to 4 symbols at a time */
15475     if ((pEnd - p) > 3) {
15476         while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
15477             HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
15478             HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
15479             HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
15480             HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
15481         }
15482     } else {
15483         BIT_reloadDStream(bitDPtr);
15484     }
15485
15486     /* [0-3] symbols remaining */
15487     if (MEM_32bits())
15488         while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
15489             HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
15490
15491     /* no more data to retrieve from bitstream, no need to reload */
15492     while (p < pEnd)
15493         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
15494
15495     return (size_t)(pEnd-pStart);
15496 }
15497
15498 FORCE_INLINE_TEMPLATE size_t
15499 HUF_decompress1X1_usingDTable_internal_body(
15500           void* dst,  size_t dstSize,
15501     const void* cSrc, size_t cSrcSize,
15502     const HUF_DTable* DTable)
15503 {
15504     BYTE* op = (BYTE*)dst;
15505     BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize);
15506     const void* dtPtr = DTable + 1;
15507     const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
15508     BIT_DStream_t bitD;
15509     DTableDesc const dtd = HUF_getDTableDesc(DTable);
15510     U32 const dtLog = dtd.tableLog;
15511
15512     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
15513
15514     HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
15515
15516     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
15517
15518     return dstSize;
15519 }
15520
15521 /* HUF_decompress4X1_usingDTable_internal_body():
15522  * Conditions :
15523  * @dstSize >= 6
15524  */
15525 FORCE_INLINE_TEMPLATE size_t
15526 HUF_decompress4X1_usingDTable_internal_body(
15527           void* dst,  size_t dstSize,
15528     const void* cSrc, size_t cSrcSize,
15529     const HUF_DTable* DTable)
15530 {
15531     /* Check */
15532     if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
15533     if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
15534
15535     {   const BYTE* const istart = (const BYTE*) cSrc;
15536         BYTE* const ostart = (BYTE*) dst;
15537         BYTE* const oend = ostart + dstSize;
15538         BYTE* const olimit = oend - 3;
15539         const void* const dtPtr = DTable + 1;
15540         const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
15541
15542         /* Init */
15543         BIT_DStream_t bitD1;
15544         BIT_DStream_t bitD2;
15545         BIT_DStream_t bitD3;
15546         BIT_DStream_t bitD4;
15547         size_t const length1 = MEM_readLE16(istart);
15548         size_t const length2 = MEM_readLE16(istart+2);
15549         size_t const length3 = MEM_readLE16(istart+4);
15550         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
15551         const BYTE* const istart1 = istart + 6;  /* jumpTable */
15552         const BYTE* const istart2 = istart1 + length1;
15553         const BYTE* const istart3 = istart2 + length2;
15554         const BYTE* const istart4 = istart3 + length3;
15555         const size_t segmentSize = (dstSize+3) / 4;
15556         BYTE* const opStart2 = ostart + segmentSize;
15557         BYTE* const opStart3 = opStart2 + segmentSize;
15558         BYTE* const opStart4 = opStart3 + segmentSize;
15559         BYTE* op1 = ostart;
15560         BYTE* op2 = opStart2;
15561         BYTE* op3 = opStart3;
15562         BYTE* op4 = opStart4;
15563         DTableDesc const dtd = HUF_getDTableDesc(DTable);
15564         U32 const dtLog = dtd.tableLog;
15565         U32 endSignal = 1;
15566
15567         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
15568         if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
15569         assert(dstSize >= 6); /* validated above */
15570         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
15571         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
15572         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
15573         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
15574
15575         /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
15576         if ((size_t)(oend - op4) >= sizeof(size_t)) {
15577             for ( ; (endSignal) & (op4 < olimit) ; ) {
15578                 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
15579                 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
15580                 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
15581                 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
15582                 HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
15583                 HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
15584                 HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
15585                 HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
15586                 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
15587                 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
15588                 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
15589                 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
15590                 HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
15591                 HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
15592                 HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
15593                 HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
15594                 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
15595                 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
15596                 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
15597                 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
15598             }
15599         }
15600
15601         /* check corruption */
15602         /* note : should not be necessary : op# advance in lock step, and we control op4.
15603          *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
15604         if (op1 > opStart2) return ERROR(corruption_detected);
15605         if (op2 > opStart3) return ERROR(corruption_detected);
15606         if (op3 > opStart4) return ERROR(corruption_detected);
15607         /* note : op4 supposed already verified within main loop */
15608
15609         /* finish bitStreams one by one */
15610         HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
15611         HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
15612         HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
15613         HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
15614
15615         /* check */
15616         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
15617           if (!endCheck) return ERROR(corruption_detected); }
15618
15619         /* decoded size */
15620         return dstSize;
15621     }
15622 }
15623
15624 #if HUF_NEED_BMI2_FUNCTION
15625 static BMI2_TARGET_ATTRIBUTE
15626 size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
15627                     size_t cSrcSize, HUF_DTable const* DTable) {
15628     return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
15629 }
15630 #endif
15631
15632 static
15633 size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
15634                     size_t cSrcSize, HUF_DTable const* DTable) {
15635     return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
15636 }
15637
15638 #if ZSTD_ENABLE_ASM_X86_64_BMI2
15639
15640 HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
15641
15642 #endif
15643
15644 static HUF_FAST_BMI2_ATTRS
15645 void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
15646 {
15647     U64 bits[4];
15648     BYTE const* ip[4];
15649     BYTE* op[4];
15650     U16 const* const dtable = (U16 const*)args->dt;
15651     BYTE* const oend = args->oend;
15652     BYTE const* const ilowest = args->ilowest;
15653
15654     /* Copy the arguments to local variables */
15655     ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
15656     ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
15657     ZSTD_memcpy(&op, &args->op, sizeof(op));
15658
15659     assert(MEM_isLittleEndian());
15660     assert(!MEM_32bits());
15661
15662     for (;;) {
15663         BYTE* olimit;
15664         int stream;
15665
15666         /* Assert loop preconditions */
15667 #ifndef NDEBUG
15668         for (stream = 0; stream < 4; ++stream) {
15669             assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
15670             assert(ip[stream] >= ilowest);
15671         }
15672 #endif
15673         /* Compute olimit */
15674         {
15675             /* Each iteration produces 5 output symbols per stream */
15676             size_t const oiters = (size_t)(oend - op[3]) / 5;
15677             /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
15678              * per stream.
15679              */
15680             size_t const iiters = (size_t)(ip[0] - ilowest) / 7;
15681             /* We can safely run iters iterations before running bounds checks */
15682             size_t const iters = MIN(oiters, iiters);
15683             size_t const symbols = iters * 5;
15684
15685             /* We can simply check that op[3] < olimit, instead of checking all
15686              * of our bounds, since we can't hit the other bounds until we've run
15687              * iters iterations, which only happens when op[3] == olimit.
15688              */
15689             olimit = op[3] + symbols;
15690
15691             /* Exit fast decoding loop once we reach the end. */
15692             if (op[3] == olimit)
15693                 break;
15694
15695             /* Exit the decoding loop if any input pointer has crossed the
15696              * previous one. This indicates corruption, and a precondition
15697              * to our loop is that ip[i] >= ip[0].
15698              */
15699             for (stream = 1; stream < 4; ++stream) {
15700                 if (ip[stream] < ip[stream - 1])
15701                     goto _out;
15702             }
15703         }
15704
15705 #ifndef NDEBUG
15706         for (stream = 1; stream < 4; ++stream) {
15707             assert(ip[stream] >= ip[stream - 1]);
15708         }
15709 #endif
15710
15711 #define HUF_4X1_DECODE_SYMBOL(_stream, _symbol)                 \
15712     do {                                                        \
15713         int const index = (int)(bits[(_stream)] >> 53);         \
15714         int const entry = (int)dtable[index];                   \
15715         bits[(_stream)] <<= (entry & 0x3F);                     \
15716         op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \
15717     } while (0)
15718
15719 #define HUF_4X1_RELOAD_STREAM(_stream)                              \
15720     do {                                                            \
15721         int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
15722         int const nbBits = ctz & 7;                                 \
15723         int const nbBytes = ctz >> 3;                               \
15724         op[(_stream)] += 5;                                         \
15725         ip[(_stream)] -= nbBytes;                                   \
15726         bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
15727         bits[(_stream)] <<= nbBits;                                 \
15728     } while (0)
15729
15730         /* Manually unroll the loop because compilers don't consistently
15731          * unroll the inner loops, which destroys performance.
15732          */
15733         do {
15734             /* Decode 5 symbols in each of the 4 streams */
15735             HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0);
15736             HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1);
15737             HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2);
15738             HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3);
15739             HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4);
15740
15741             /* Reload each of the 4 the bitstreams */
15742             HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM);
15743         } while (op[3] < olimit);
15744
15745 #undef HUF_4X1_DECODE_SYMBOL
15746 #undef HUF_4X1_RELOAD_STREAM
15747     }
15748
15749 _out:
15750
15751     /* Save the final values of each of the state variables back to args. */
15752     ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
15753     ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
15754     ZSTD_memcpy(&args->op, &op, sizeof(op));
15755 }
15756
15757 /**
15758  * @returns @p dstSize on success (>= 6)
15759  *          0 if the fallback implementation should be used
15760  *          An error if an error occurred
15761  */
15762 static HUF_FAST_BMI2_ATTRS
15763 size_t
15764 HUF_decompress4X1_usingDTable_internal_fast(
15765           void* dst,  size_t dstSize,
15766     const void* cSrc, size_t cSrcSize,
15767     const HUF_DTable* DTable,
15768     HUF_DecompressFastLoopFn loopFn)
15769 {
15770     void const* dt = DTable + 1;
15771     BYTE const* const ilowest = (BYTE const*)cSrc;
15772     BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
15773     HUF_DecompressFastArgs args;
15774     {   size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
15775         FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
15776         if (ret == 0)
15777             return 0;
15778     }
15779
15780     assert(args.ip[0] >= args.ilowest);
15781     loopFn(&args);
15782
15783     /* Our loop guarantees that ip[] >= ilowest and that we haven't
15784     * overwritten any op[].
15785     */
15786     assert(args.ip[0] >= ilowest);
15787     assert(args.ip[0] >= ilowest);
15788     assert(args.ip[1] >= ilowest);
15789     assert(args.ip[2] >= ilowest);
15790     assert(args.ip[3] >= ilowest);
15791     assert(args.op[3] <= oend);
15792
15793     assert(ilowest == args.ilowest);
15794     assert(ilowest + 6 == args.iend[0]);
15795     (void)ilowest;
15796
15797     /* finish bit streams one by one. */
15798     {   size_t const segmentSize = (dstSize+3) / 4;
15799         BYTE* segmentEnd = (BYTE*)dst;
15800         int i;
15801         for (i = 0; i < 4; ++i) {
15802             BIT_DStream_t bit;
15803             if (segmentSize <= (size_t)(oend - segmentEnd))
15804                 segmentEnd += segmentSize;
15805             else
15806                 segmentEnd = oend;
15807             FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
15808             /* Decompress and validate that we've produced exactly the expected length. */
15809             args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
15810             if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
15811         }
15812     }
15813
15814     /* decoded size */
15815     assert(dstSize != 0);
15816     return dstSize;
15817 }
15818
15819 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
15820
15821 static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
15822                     size_t cSrcSize, HUF_DTable const* DTable, int flags)
15823 {
15824     HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default;
15825     HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop;
15826
15827 #if DYNAMIC_BMI2
15828     if (flags & HUF_flags_bmi2) {
15829         fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2;
15830 # if ZSTD_ENABLE_ASM_X86_64_BMI2
15831         if (!(flags & HUF_flags_disableAsm)) {
15832             loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
15833         }
15834 # endif
15835     } else {
15836         return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
15837     }
15838 #endif
15839
15840 #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
15841     if (!(flags & HUF_flags_disableAsm)) {
15842         loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
15843     }
15844 #endif
15845
15846     if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
15847         size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
15848         if (ret != 0)
15849             return ret;
15850     }
15851     return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
15852 }
15853
15854 static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
15855                                    const void* cSrc, size_t cSrcSize,
15856                                    void* workSpace, size_t wkspSize, int flags)
15857 {
15858     const BYTE* ip = (const BYTE*) cSrc;
15859
15860     size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
15861     if (HUF_isError(hSize)) return hSize;
15862     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
15863     ip += hSize; cSrcSize -= hSize;
15864
15865     return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
15866 }
15867
15868 #endif /* HUF_FORCE_DECOMPRESS_X2 */
15869
15870
15871 #ifndef HUF_FORCE_DECOMPRESS_X1
15872
15873 /* *************************/
15874 /* double-symbols decoding */
15875 /* *************************/
15876
15877 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
15878 typedef struct { BYTE symbol; } sortedSymbol_t;
15879 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
15880 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
15881
15882 /**
15883  * Constructs a HUF_DEltX2 in a U32.
15884  */
15885 static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
15886 {
15887     U32 seq;
15888     DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
15889     DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
15890     DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
15891     DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
15892     if (MEM_isLittleEndian()) {
15893         seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
15894         return seq + (nbBits << 16) + ((U32)level << 24);
15895     } else {
15896         seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
15897         return (seq << 16) + (nbBits << 8) + (U32)level;
15898     }
15899 }
15900
15901 /**
15902  * Constructs a HUF_DEltX2.
15903  */
15904 static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
15905 {
15906     HUF_DEltX2 DElt;
15907     U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
15908     DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
15909     ZSTD_memcpy(&DElt, &val, sizeof(val));
15910     return DElt;
15911 }
15912
15913 /**
15914  * Constructs 2 HUF_DEltX2s and packs them into a U64.
15915  */
15916 static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
15917 {
15918     U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
15919     return (U64)DElt + ((U64)DElt << 32);
15920 }
15921
15922 /**
15923  * Fills the DTable rank with all the symbols from [begin, end) that are each
15924  * nbBits long.
15925  *
15926  * @param DTableRank The start of the rank in the DTable.
15927  * @param begin The first symbol to fill (inclusive).
15928  * @param end The last symbol to fill (exclusive).
15929  * @param nbBits Each symbol is nbBits long.
15930  * @param tableLog The table log.
15931  * @param baseSeq If level == 1 { 0 } else { the first level symbol }
15932  * @param level The level in the table. Must be 1 or 2.
15933  */
15934 static void HUF_fillDTableX2ForWeight(
15935     HUF_DEltX2* DTableRank,
15936     sortedSymbol_t const* begin, sortedSymbol_t const* end,
15937     U32 nbBits, U32 tableLog,
15938     U16 baseSeq, int const level)
15939 {
15940     U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
15941     const sortedSymbol_t* ptr;
15942     assert(level >= 1 && level <= 2);
15943     switch (length) {
15944     case 1:
15945         for (ptr = begin; ptr != end; ++ptr) {
15946             HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
15947             *DTableRank++ = DElt;
15948         }
15949         break;
15950     case 2:
15951         for (ptr = begin; ptr != end; ++ptr) {
15952             HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
15953             DTableRank[0] = DElt;
15954             DTableRank[1] = DElt;
15955             DTableRank += 2;
15956         }
15957         break;
15958     case 4:
15959         for (ptr = begin; ptr != end; ++ptr) {
15960             U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
15961             ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
15962             ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
15963             DTableRank += 4;
15964         }
15965         break;
15966     case 8:
15967         for (ptr = begin; ptr != end; ++ptr) {
15968             U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
15969             ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
15970             ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
15971             ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
15972             ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
15973             DTableRank += 8;
15974         }
15975         break;
15976     default:
15977         for (ptr = begin; ptr != end; ++ptr) {
15978             U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
15979             HUF_DEltX2* const DTableRankEnd = DTableRank + length;
15980             for (; DTableRank != DTableRankEnd; DTableRank += 8) {
15981                 ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
15982                 ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
15983                 ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
15984                 ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
15985             }
15986         }
15987         break;
15988     }
15989 }
15990
15991 /* HUF_fillDTableX2Level2() :
15992  * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
15993 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
15994                            const U32* rankVal, const int minWeight, const int maxWeight1,
15995                            const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
15996                            U32 nbBitsBaseline, U16 baseSeq)
15997 {
15998     /* Fill skipped values (all positions up to rankVal[minWeight]).
15999      * These are positions only get a single symbol because the combined weight
16000      * is too large.
16001      */
16002     if (minWeight>1) {
16003         U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
16004         U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
16005         int const skipSize = rankVal[minWeight];
16006         assert(length > 1);
16007         assert((U32)skipSize < length);
16008         switch (length) {
16009         case 2:
16010             assert(skipSize == 1);
16011             ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
16012             break;
16013         case 4:
16014             assert(skipSize <= 4);
16015             ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
16016             ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
16017             break;
16018         default:
16019             {
16020                 int i;
16021                 for (i = 0; i < skipSize; i += 8) {
16022                     ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
16023                     ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
16024                     ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
16025                     ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
16026                 }
16027             }
16028         }
16029     }
16030
16031     /* Fill each of the second level symbols by weight. */
16032     {
16033         int w;
16034         for (w = minWeight; w < maxWeight1; ++w) {
16035             int const begin = rankStart[w];
16036             int const end = rankStart[w+1];
16037             U32 const nbBits = nbBitsBaseline - w;
16038             U32 const totalBits = nbBits + consumedBits;
16039             HUF_fillDTableX2ForWeight(
16040                 DTable + rankVal[w],
16041                 sortedSymbols + begin, sortedSymbols + end,
16042                 totalBits, targetLog,
16043                 baseSeq, /* level */ 2);
16044         }
16045     }
16046 }
16047
16048 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
16049                            const sortedSymbol_t* sortedList,
16050                            const U32* rankStart, rankValCol_t* rankValOrigin, const U32 maxWeight,
16051                            const U32 nbBitsBaseline)
16052 {
16053     U32* const rankVal = rankValOrigin[0];
16054     const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
16055     const U32 minBits  = nbBitsBaseline - maxWeight;
16056     int w;
16057     int const wEnd = (int)maxWeight + 1;
16058
16059     /* Fill DTable in order of weight. */
16060     for (w = 1; w < wEnd; ++w) {
16061         int const begin = (int)rankStart[w];
16062         int const end = (int)rankStart[w+1];
16063         U32 const nbBits = nbBitsBaseline - w;
16064
16065         if (targetLog-nbBits >= minBits) {
16066             /* Enough room for a second symbol. */
16067             int start = rankVal[w];
16068             U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
16069             int minWeight = nbBits + scaleLog;
16070             int s;
16071             if (minWeight < 1) minWeight = 1;
16072             /* Fill the DTable for every symbol of weight w.
16073              * These symbols get at least 1 second symbol.
16074              */
16075             for (s = begin; s != end; ++s) {
16076                 HUF_fillDTableX2Level2(
16077                     DTable + start, targetLog, nbBits,
16078                     rankValOrigin[nbBits], minWeight, wEnd,
16079                     sortedList, rankStart,
16080                     nbBitsBaseline, sortedList[s].symbol);
16081                 start += length;
16082             }
16083         } else {
16084             /* Only a single symbol. */
16085             HUF_fillDTableX2ForWeight(
16086                 DTable + rankVal[w],
16087                 sortedList + begin, sortedList + end,
16088                 nbBits, targetLog,
16089                 /* baseSeq */ 0, /* level */ 1);
16090         }
16091     }
16092 }
16093
16094 typedef struct {
16095     rankValCol_t rankVal[HUF_TABLELOG_MAX];
16096     U32 rankStats[HUF_TABLELOG_MAX + 1];
16097     U32 rankStart0[HUF_TABLELOG_MAX + 3];
16098     sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
16099     BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
16100     U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
16101 } HUF_ReadDTableX2_Workspace;
16102
16103 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
16104                        const void* src, size_t srcSize,
16105                              void* workSpace, size_t wkspSize, int flags)
16106 {
16107     U32 tableLog, maxW, nbSymbols;
16108     DTableDesc dtd = HUF_getDTableDesc(DTable);
16109     U32 maxTableLog = dtd.maxTableLog;
16110     size_t iSize;
16111     void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
16112     HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
16113     U32 *rankStart;
16114
16115     HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
16116
16117     if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
16118
16119     rankStart = wksp->rankStart0 + 1;
16120     ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
16121     ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
16122
16123     DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
16124     if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
16125     /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
16126
16127     iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags);
16128     if (HUF_isError(iSize)) return iSize;
16129
16130     /* check result */
16131     if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
16132     if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
16133
16134     /* find maxWeight */
16135     for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
16136
16137     /* Get start index of each weight */
16138     {   U32 w, nextRankStart = 0;
16139         for (w=1; w<maxW+1; w++) {
16140             U32 curr = nextRankStart;
16141             nextRankStart += wksp->rankStats[w];
16142             rankStart[w] = curr;
16143         }
16144         rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
16145         rankStart[maxW+1] = nextRankStart;
16146     }
16147
16148     /* sort symbols by weight */
16149     {   U32 s;
16150         for (s=0; s<nbSymbols; s++) {
16151             U32 const w = wksp->weightList[s];
16152             U32 const r = rankStart[w]++;
16153             wksp->sortedSymbol[r].symbol = (BYTE)s;
16154         }
16155         rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
16156     }
16157
16158     /* Build rankVal */
16159     {   U32* const rankVal0 = wksp->rankVal[0];
16160         {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
16161             U32 nextRankVal = 0;
16162             U32 w;
16163             for (w=1; w<maxW+1; w++) {
16164                 U32 curr = nextRankVal;
16165                 nextRankVal += wksp->rankStats[w] << (w+rescale);
16166                 rankVal0[w] = curr;
16167         }   }
16168         {   U32 const minBits = tableLog+1 - maxW;
16169             U32 consumed;
16170             for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
16171                 U32* const rankValPtr = wksp->rankVal[consumed];
16172                 U32 w;
16173                 for (w = 1; w < maxW+1; w++) {
16174                     rankValPtr[w] = rankVal0[w] >> consumed;
16175     }   }   }   }
16176
16177     HUF_fillDTableX2(dt, maxTableLog,
16178                    wksp->sortedSymbol,
16179                    wksp->rankStart0, wksp->rankVal, maxW,
16180                    tableLog+1);
16181
16182     dtd.tableLog = (BYTE)maxTableLog;
16183     dtd.tableType = 1;
16184     ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
16185     return iSize;
16186 }
16187
16188
16189 FORCE_INLINE_TEMPLATE U32
16190 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
16191 {
16192     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
16193     ZSTD_memcpy(op, &dt[val].sequence, 2);
16194     BIT_skipBits(DStream, dt[val].nbBits);
16195     return dt[val].length;
16196 }
16197
16198 FORCE_INLINE_TEMPLATE U32
16199 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
16200 {
16201     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
16202     ZSTD_memcpy(op, &dt[val].sequence, 1);
16203     if (dt[val].length==1) {
16204         BIT_skipBits(DStream, dt[val].nbBits);
16205     } else {
16206         if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
16207             BIT_skipBits(DStream, dt[val].nbBits);
16208             if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
16209                 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
16210                 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
16211         }
16212     }
16213     return 1;
16214 }
16215
16216 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
16217     do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0)
16218
16219 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)                     \
16220     do {                                                           \
16221         if (MEM_64bits() || (HUF_TABLELOG_MAX<=12))                \
16222             ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
16223     } while (0)
16224
16225 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr)                     \
16226     do {                                                           \
16227         if (MEM_64bits())                                          \
16228             ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
16229     } while (0)
16230
16231 HINT_INLINE size_t
16232 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
16233                 const HUF_DEltX2* const dt, const U32 dtLog)
16234 {
16235     BYTE* const pStart = p;
16236
16237     /* up to 8 symbols at a time */
16238     if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
16239         if (dtLog <= 11 && MEM_64bits()) {
16240             /* up to 10 symbols at a time */
16241             while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
16242                 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
16243                 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
16244                 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
16245                 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
16246                 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
16247             }
16248         } else {
16249             /* up to 8 symbols at a time */
16250             while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
16251                 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
16252                 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
16253                 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
16254                 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
16255             }
16256         }
16257     } else {
16258         BIT_reloadDStream(bitDPtr);
16259     }
16260
16261     /* closer to end : up to 2 symbols at a time */
16262     if ((size_t)(pEnd - p) >= 2) {
16263         while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
16264             HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
16265
16266         while (p <= pEnd-2)
16267             HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
16268     }
16269
16270     if (p < pEnd)
16271         p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
16272
16273     return p-pStart;
16274 }
16275
16276 FORCE_INLINE_TEMPLATE size_t
16277 HUF_decompress1X2_usingDTable_internal_body(
16278           void* dst,  size_t dstSize,
16279     const void* cSrc, size_t cSrcSize,
16280     const HUF_DTable* DTable)
16281 {
16282     BIT_DStream_t bitD;
16283
16284     /* Init */
16285     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
16286
16287     /* decode */
16288     {   BYTE* const ostart = (BYTE*) dst;
16289         BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize);
16290         const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
16291         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
16292         DTableDesc const dtd = HUF_getDTableDesc(DTable);
16293         HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
16294     }
16295
16296     /* check */
16297     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
16298
16299     /* decoded size */
16300     return dstSize;
16301 }
16302
16303 /* HUF_decompress4X2_usingDTable_internal_body():
16304  * Conditions:
16305  * @dstSize >= 6
16306  */
16307 FORCE_INLINE_TEMPLATE size_t
16308 HUF_decompress4X2_usingDTable_internal_body(
16309           void* dst,  size_t dstSize,
16310     const void* cSrc, size_t cSrcSize,
16311     const HUF_DTable* DTable)
16312 {
16313     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
16314     if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
16315
16316     {   const BYTE* const istart = (const BYTE*) cSrc;
16317         BYTE* const ostart = (BYTE*) dst;
16318         BYTE* const oend = ostart + dstSize;
16319         BYTE* const olimit = oend - (sizeof(size_t)-1);
16320         const void* const dtPtr = DTable+1;
16321         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
16322
16323         /* Init */
16324         BIT_DStream_t bitD1;
16325         BIT_DStream_t bitD2;
16326         BIT_DStream_t bitD3;
16327         BIT_DStream_t bitD4;
16328         size_t const length1 = MEM_readLE16(istart);
16329         size_t const length2 = MEM_readLE16(istart+2);
16330         size_t const length3 = MEM_readLE16(istart+4);
16331         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
16332         const BYTE* const istart1 = istart + 6;  /* jumpTable */
16333         const BYTE* const istart2 = istart1 + length1;
16334         const BYTE* const istart3 = istart2 + length2;
16335         const BYTE* const istart4 = istart3 + length3;
16336         size_t const segmentSize = (dstSize+3) / 4;
16337         BYTE* const opStart2 = ostart + segmentSize;
16338         BYTE* const opStart3 = opStart2 + segmentSize;
16339         BYTE* const opStart4 = opStart3 + segmentSize;
16340         BYTE* op1 = ostart;
16341         BYTE* op2 = opStart2;
16342         BYTE* op3 = opStart3;
16343         BYTE* op4 = opStart4;
16344         U32 endSignal = 1;
16345         DTableDesc const dtd = HUF_getDTableDesc(DTable);
16346         U32 const dtLog = dtd.tableLog;
16347
16348         if (length4 > cSrcSize) return ERROR(corruption_detected);  /* overflow */
16349         if (opStart4 > oend) return ERROR(corruption_detected);     /* overflow */
16350         assert(dstSize >= 6 /* validated above */);
16351         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
16352         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
16353         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
16354         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
16355
16356         /* 16-32 symbols per loop (4-8 symbols per stream) */
16357         if ((size_t)(oend - op4) >= sizeof(size_t)) {
16358             for ( ; (endSignal) & (op4 < olimit); ) {
16359 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
16360                 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
16361                 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
16362                 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
16363                 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
16364                 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
16365                 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
16366                 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
16367                 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
16368                 endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
16369                 endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
16370                 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
16371                 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
16372                 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
16373                 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
16374                 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
16375                 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
16376                 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
16377                 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
16378                 endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
16379                 endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
16380 #else
16381                 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
16382                 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
16383                 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
16384                 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
16385                 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
16386                 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
16387                 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
16388                 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
16389                 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
16390                 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
16391                 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
16392                 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
16393                 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
16394                 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
16395                 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
16396                 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
16397                 endSignal = (U32)LIKELY((U32)
16398                             (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
16399                         & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
16400                         & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
16401                         & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
16402 #endif
16403             }
16404         }
16405
16406         /* check corruption */
16407         if (op1 > opStart2) return ERROR(corruption_detected);
16408         if (op2 > opStart3) return ERROR(corruption_detected);
16409         if (op3 > opStart4) return ERROR(corruption_detected);
16410         /* note : op4 already verified within main loop */
16411
16412         /* finish bitStreams one by one */
16413         HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
16414         HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
16415         HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
16416         HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
16417
16418         /* check */
16419         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
16420           if (!endCheck) return ERROR(corruption_detected); }
16421
16422         /* decoded size */
16423         return dstSize;
16424     }
16425 }
16426
16427 #if HUF_NEED_BMI2_FUNCTION
16428 static BMI2_TARGET_ATTRIBUTE
16429 size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
16430                     size_t cSrcSize, HUF_DTable const* DTable) {
16431     return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
16432 }
16433 #endif
16434
16435 static
16436 size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
16437                     size_t cSrcSize, HUF_DTable const* DTable) {
16438     return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
16439 }
16440
16441 #if ZSTD_ENABLE_ASM_X86_64_BMI2
16442
16443 HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
16444
16445 #endif
16446
16447 static HUF_FAST_BMI2_ATTRS
16448 void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
16449 {
16450     U64 bits[4];
16451     BYTE const* ip[4];
16452     BYTE* op[4];
16453     BYTE* oend[4];
16454     HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
16455     BYTE const* const ilowest = args->ilowest;
16456
16457     /* Copy the arguments to local registers. */
16458     ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
16459     ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
16460     ZSTD_memcpy(&op, &args->op, sizeof(op));
16461
16462     oend[0] = op[1];
16463     oend[1] = op[2];
16464     oend[2] = op[3];
16465     oend[3] = args->oend;
16466
16467     assert(MEM_isLittleEndian());
16468     assert(!MEM_32bits());
16469
16470     for (;;) {
16471         BYTE* olimit;
16472         int stream;
16473
16474         /* Assert loop preconditions */
16475 #ifndef NDEBUG
16476         for (stream = 0; stream < 4; ++stream) {
16477             assert(op[stream] <= oend[stream]);
16478             assert(ip[stream] >= ilowest);
16479         }
16480 #endif
16481         /* Compute olimit */
16482         {
16483             /* Each loop does 5 table lookups for each of the 4 streams.
16484              * Each table lookup consumes up to 11 bits of input, and produces
16485              * up to 2 bytes of output.
16486              */
16487             /* We can consume up to 7 bytes of input per iteration per stream.
16488              * We also know that each input pointer is >= ip[0]. So we can run
16489              * iters loops before running out of input.
16490              */
16491             size_t iters = (size_t)(ip[0] - ilowest) / 7;
16492             /* Each iteration can produce up to 10 bytes of output per stream.
16493              * Each output stream my advance at different rates. So take the
16494              * minimum number of safe iterations among all the output streams.
16495              */
16496             for (stream = 0; stream < 4; ++stream) {
16497                 size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
16498                 iters = MIN(iters, oiters);
16499             }
16500
16501             /* Each iteration produces at least 5 output symbols. So until
16502              * op[3] crosses olimit, we know we haven't executed iters
16503              * iterations yet. This saves us maintaining an iters counter,
16504              * at the expense of computing the remaining # of iterations
16505              * more frequently.
16506              */
16507             olimit = op[3] + (iters * 5);
16508
16509             /* Exit the fast decoding loop once we reach the end. */
16510             if (op[3] == olimit)
16511                 break;
16512
16513             /* Exit the decoding loop if any input pointer has crossed the
16514              * previous one. This indicates corruption, and a precondition
16515              * to our loop is that ip[i] >= ip[0].
16516              */
16517             for (stream = 1; stream < 4; ++stream) {
16518                 if (ip[stream] < ip[stream - 1])
16519                     goto _out;
16520             }
16521         }
16522
16523 #ifndef NDEBUG
16524         for (stream = 1; stream < 4; ++stream) {
16525             assert(ip[stream] >= ip[stream - 1]);
16526         }
16527 #endif
16528
16529 #define HUF_4X2_DECODE_SYMBOL(_stream, _decode3)                      \
16530     do {                                                              \
16531         if ((_decode3) || (_stream) != 3) {                           \
16532             int const index = (int)(bits[(_stream)] >> 53);           \
16533             HUF_DEltX2 const entry = dtable[index];                   \
16534             MEM_write16(op[(_stream)], entry.sequence); \
16535             bits[(_stream)] <<= (entry.nbBits) & 0x3F;                \
16536             op[(_stream)] += (entry.length);                          \
16537         }                                                             \
16538     } while (0)
16539
16540 #define HUF_4X2_RELOAD_STREAM(_stream)                                  \
16541     do {                                                                \
16542         HUF_4X2_DECODE_SYMBOL(3, 1);                                    \
16543         {                                                               \
16544             int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
16545             int const nbBits = ctz & 7;                                 \
16546             int const nbBytes = ctz >> 3;                               \
16547             ip[(_stream)] -= nbBytes;                                   \
16548             bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
16549             bits[(_stream)] <<= nbBits;                                 \
16550         }                                                               \
16551     } while (0)
16552
16553         /* Manually unroll the loop because compilers don't consistently
16554          * unroll the inner loops, which destroys performance.
16555          */
16556         do {
16557             /* Decode 5 symbols from each of the first 3 streams.
16558              * The final stream will be decoded during the reload phase
16559              * to reduce register pressure.
16560              */
16561             HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
16562             HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
16563             HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
16564             HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
16565             HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
16566
16567             /* Decode one symbol from the final stream */
16568             HUF_4X2_DECODE_SYMBOL(3, 1);
16569
16570             /* Decode 4 symbols from the final stream & reload bitstreams.
16571              * The final stream is reloaded last, meaning that all 5 symbols
16572              * are decoded from the final stream before it is reloaded.
16573              */
16574             HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM);
16575         } while (op[3] < olimit);
16576     }
16577
16578 #undef HUF_4X2_DECODE_SYMBOL
16579 #undef HUF_4X2_RELOAD_STREAM
16580
16581 _out:
16582
16583     /* Save the final values of each of the state variables back to args. */
16584     ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
16585     ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
16586     ZSTD_memcpy(&args->op, &op, sizeof(op));
16587 }
16588
16589
16590 static HUF_FAST_BMI2_ATTRS size_t
16591 HUF_decompress4X2_usingDTable_internal_fast(
16592           void* dst,  size_t dstSize,
16593     const void* cSrc, size_t cSrcSize,
16594     const HUF_DTable* DTable,
16595     HUF_DecompressFastLoopFn loopFn) {
16596     void const* dt = DTable + 1;
16597     const BYTE* const ilowest = (const BYTE*)cSrc;
16598     BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
16599     HUF_DecompressFastArgs args;
16600     {
16601         size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
16602         FORWARD_IF_ERROR(ret, "Failed to init asm args");
16603         if (ret == 0)
16604             return 0;
16605     }
16606
16607     assert(args.ip[0] >= args.ilowest);
16608     loopFn(&args);
16609
16610     /* note : op4 already verified within main loop */
16611     assert(args.ip[0] >= ilowest);
16612     assert(args.ip[1] >= ilowest);
16613     assert(args.ip[2] >= ilowest);
16614     assert(args.ip[3] >= ilowest);
16615     assert(args.op[3] <= oend);
16616
16617     assert(ilowest == args.ilowest);
16618     assert(ilowest + 6 == args.iend[0]);
16619     (void)ilowest;
16620
16621     /* finish bitStreams one by one */
16622     {
16623         size_t const segmentSize = (dstSize+3) / 4;
16624         BYTE* segmentEnd = (BYTE*)dst;
16625         int i;
16626         for (i = 0; i < 4; ++i) {
16627             BIT_DStream_t bit;
16628             if (segmentSize <= (size_t)(oend - segmentEnd))
16629                 segmentEnd += segmentSize;
16630             else
16631                 segmentEnd = oend;
16632             FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
16633             args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
16634             if (args.op[i] != segmentEnd)
16635                 return ERROR(corruption_detected);
16636         }
16637     }
16638
16639     /* decoded size */
16640     return dstSize;
16641 }
16642
16643 static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
16644                     size_t cSrcSize, HUF_DTable const* DTable, int flags)
16645 {
16646     HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default;
16647     HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop;
16648
16649 #if DYNAMIC_BMI2
16650     if (flags & HUF_flags_bmi2) {
16651         fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2;
16652 # if ZSTD_ENABLE_ASM_X86_64_BMI2
16653         if (!(flags & HUF_flags_disableAsm)) {
16654             loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
16655         }
16656 # endif
16657     } else {
16658         return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
16659     }
16660 #endif
16661
16662 #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
16663     if (!(flags & HUF_flags_disableAsm)) {
16664         loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
16665     }
16666 #endif
16667
16668     if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
16669         size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
16670         if (ret != 0)
16671             return ret;
16672     }
16673     return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
16674 }
16675
16676 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
16677
16678 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
16679                                    const void* cSrc, size_t cSrcSize,
16680                                    void* workSpace, size_t wkspSize, int flags)
16681 {
16682     const BYTE* ip = (const BYTE*) cSrc;
16683
16684     size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
16685                                                workSpace, wkspSize, flags);
16686     if (HUF_isError(hSize)) return hSize;
16687     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
16688     ip += hSize; cSrcSize -= hSize;
16689
16690     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags);
16691 }
16692
16693 static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
16694                                    const void* cSrc, size_t cSrcSize,
16695                                    void* workSpace, size_t wkspSize, int flags)
16696 {
16697     const BYTE* ip = (const BYTE*) cSrc;
16698
16699     size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
16700                                          workSpace, wkspSize, flags);
16701     if (HUF_isError(hSize)) return hSize;
16702     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
16703     ip += hSize; cSrcSize -= hSize;
16704
16705     return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
16706 }
16707
16708 #endif /* HUF_FORCE_DECOMPRESS_X1 */
16709
16710
16711 /* ***********************************/
16712 /* Universal decompression selectors */
16713 /* ***********************************/
16714
16715
16716 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
16717 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
16718 static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
16719 {
16720     /* single, double, quad */
16721     {{0,0}, {1,1}},  /* Q==0 : impossible */
16722     {{0,0}, {1,1}},  /* Q==1 : impossible */
16723     {{ 150,216}, { 381,119}},   /* Q == 2 : 12-18% */
16724     {{ 170,205}, { 514,112}},   /* Q == 3 : 18-25% */
16725     {{ 177,199}, { 539,110}},   /* Q == 4 : 25-32% */
16726     {{ 197,194}, { 644,107}},   /* Q == 5 : 32-38% */
16727     {{ 221,192}, { 735,107}},   /* Q == 6 : 38-44% */
16728     {{ 256,189}, { 881,106}},   /* Q == 7 : 44-50% */
16729     {{ 359,188}, {1167,109}},   /* Q == 8 : 50-56% */
16730     {{ 582,187}, {1570,114}},   /* Q == 9 : 56-62% */
16731     {{ 688,187}, {1712,122}},   /* Q ==10 : 62-69% */
16732     {{ 825,186}, {1965,136}},   /* Q ==11 : 69-75% */
16733     {{ 976,185}, {2131,150}},   /* Q ==12 : 75-81% */
16734     {{1180,186}, {2070,175}},   /* Q ==13 : 81-87% */
16735     {{1377,185}, {1731,202}},   /* Q ==14 : 87-93% */
16736     {{1412,185}, {1695,202}},   /* Q ==15 : 93-99% */
16737 };
16738 #endif
16739
16740 /** HUF_selectDecoder() :
16741  *  Tells which decoder is likely to decode faster,
16742  *  based on a set of pre-computed metrics.
16743  * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
16744  *  Assumption : 0 < dstSize <= 128 KB */
16745 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
16746 {
16747     assert(dstSize > 0);
16748     assert(dstSize <= 128*1024);
16749 #if defined(HUF_FORCE_DECOMPRESS_X1)
16750     (void)dstSize;
16751     (void)cSrcSize;
16752     return 0;
16753 #elif defined(HUF_FORCE_DECOMPRESS_X2)
16754     (void)dstSize;
16755     (void)cSrcSize;
16756     return 1;
16757 #else
16758     /* decoder timing evaluation */
16759     {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
16760         U32 const D256 = (U32)(dstSize >> 8);
16761         U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
16762         U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
16763         DTime1 += DTime1 >> 5;  /* small advantage to algorithm using less memory, to reduce cache eviction */
16764         return DTime1 < DTime0;
16765     }
16766 #endif
16767 }
16768
16769 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
16770                                   const void* cSrc, size_t cSrcSize,
16771                                   void* workSpace, size_t wkspSize, int flags)
16772 {
16773     /* validation checks */
16774     if (dstSize == 0) return ERROR(dstSize_tooSmall);
16775     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
16776     if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
16777     if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
16778
16779     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
16780 #if defined(HUF_FORCE_DECOMPRESS_X1)
16781         (void)algoNb;
16782         assert(algoNb == 0);
16783         return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
16784                                 cSrcSize, workSpace, wkspSize, flags);
16785 #elif defined(HUF_FORCE_DECOMPRESS_X2)
16786         (void)algoNb;
16787         assert(algoNb == 1);
16788         return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
16789                                 cSrcSize, workSpace, wkspSize, flags);
16790 #else
16791         return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
16792                                 cSrcSize, workSpace, wkspSize, flags):
16793                         HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
16794                                 cSrcSize, workSpace, wkspSize, flags);
16795 #endif
16796     }
16797 }
16798
16799
16800 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
16801 {
16802     DTableDesc const dtd = HUF_getDTableDesc(DTable);
16803 #if defined(HUF_FORCE_DECOMPRESS_X1)
16804     (void)dtd;
16805     assert(dtd.tableType == 0);
16806     return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
16807 #elif defined(HUF_FORCE_DECOMPRESS_X2)
16808     (void)dtd;
16809     assert(dtd.tableType == 1);
16810     return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
16811 #else
16812     return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
16813                            HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
16814 #endif
16815 }
16816
16817 #ifndef HUF_FORCE_DECOMPRESS_X2
16818 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
16819 {
16820     const BYTE* ip = (const BYTE*) cSrc;
16821
16822     size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
16823     if (HUF_isError(hSize)) return hSize;
16824     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
16825     ip += hSize; cSrcSize -= hSize;
16826
16827     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
16828 }
16829 #endif
16830
16831 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags)
16832 {
16833     DTableDesc const dtd = HUF_getDTableDesc(DTable);
16834 #if defined(HUF_FORCE_DECOMPRESS_X1)
16835     (void)dtd;
16836     assert(dtd.tableType == 0);
16837     return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
16838 #elif defined(HUF_FORCE_DECOMPRESS_X2)
16839     (void)dtd;
16840     assert(dtd.tableType == 1);
16841     return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
16842 #else
16843     return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) :
16844                            HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
16845 #endif
16846 }
16847
16848 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags)
16849 {
16850     /* validation checks */
16851     if (dstSize == 0) return ERROR(dstSize_tooSmall);
16852     if (cSrcSize == 0) return ERROR(corruption_detected);
16853
16854     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
16855 #if defined(HUF_FORCE_DECOMPRESS_X1)
16856         (void)algoNb;
16857         assert(algoNb == 0);
16858         return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
16859 #elif defined(HUF_FORCE_DECOMPRESS_X2)
16860         (void)algoNb;
16861         assert(algoNb == 1);
16862         return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
16863 #else
16864         return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) :
16865                         HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags);
16866 #endif
16867     }
16868 }
16869 /**** ended inlining decompress/huf_decompress.c ****/
16870 /**** start inlining decompress/zstd_ddict.c ****/
16871 /*
16872  * Copyright (c) Meta Platforms, Inc. and affiliates.
16873  * All rights reserved.
16874  *
16875  * This source code is licensed under both the BSD-style license (found in the
16876  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
16877  * in the COPYING file in the root directory of this source tree).
16878  * You may select, at your option, one of the above-listed licenses.
16879  */
16880
16881 /* zstd_ddict.c :
16882  * concentrates all logic that needs to know the internals of ZSTD_DDict object */
16883
16884 /*-*******************************************************
16885 *  Dependencies
16886 *********************************************************/
16887 /**** start inlining ../common/allocations.h ****/
16888 /*
16889  * Copyright (c) Meta Platforms, Inc. and affiliates.
16890  * All rights reserved.
16891  *
16892  * This source code is licensed under both the BSD-style license (found in the
16893  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
16894  * in the COPYING file in the root directory of this source tree).
16895  * You may select, at your option, one of the above-listed licenses.
16896  */
16897
16898 /* This file provides custom allocation primitives
16899  */
16900
16901 #define ZSTD_DEPS_NEED_MALLOC
16902 /**** skipping file: zstd_deps.h ****/
16903
16904 /**** skipping file: compiler.h ****/
16905 #define ZSTD_STATIC_LINKING_ONLY
16906 /**** skipping file: ../zstd.h ****/
16907
16908 #ifndef ZSTD_ALLOCATIONS_H
16909 #define ZSTD_ALLOCATIONS_H
16910
16911 /* custom memory allocation functions */
16912
16913 #endif /* ZSTD_ALLOCATIONS_H */
16914 /**** ended inlining ../common/allocations.h ****/
16915 /**** skipping file: ../common/zstd_deps.h ****/
16916 /**** skipping file: ../common/cpu.h ****/
16917 /**** skipping file: ../common/mem.h ****/
16918 #define FSE_STATIC_LINKING_ONLY
16919 /**** skipping file: ../common/fse.h ****/
16920 /**** skipping file: ../common/huf.h ****/
16921 /**** start inlining zstd_decompress_internal.h ****/
16922 /*
16923  * Copyright (c) Meta Platforms, Inc. and affiliates.
16924  * All rights reserved.
16925  *
16926  * This source code is licensed under both the BSD-style license (found in the
16927  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
16928  * in the COPYING file in the root directory of this source tree).
16929  * You may select, at your option, one of the above-listed licenses.
16930  */
16931
16932
16933 /* zstd_decompress_internal:
16934  * objects and definitions shared within lib/decompress modules */
16935
16936  #ifndef ZSTD_DECOMPRESS_INTERNAL_H
16937  #define ZSTD_DECOMPRESS_INTERNAL_H
16938
16939
16940 /*-*******************************************************
16941  *  Dependencies
16942  *********************************************************/
16943 /**** skipping file: ../common/mem.h ****/
16944 /**** skipping file: ../common/zstd_internal.h ****/
16945
16946
16947
16948 /*-*******************************************************
16949  *  Constants
16950  *********************************************************/
16951 static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
16952                  0,    1,    2,     3,     4,     5,     6,      7,
16953                  8,    9,   10,    11,    12,    13,    14,     15,
16954                 16,   18,   20,    22,    24,    28,    32,     40,
16955                 48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
16956                 0x2000, 0x4000, 0x8000, 0x10000 };
16957
16958 static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
16959                  0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
16960                  0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
16961                  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
16962                  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
16963
16964 static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
16965                      0,  1,  2,  3,  4,  5,  6,  7,
16966                      8,  9, 10, 11, 12, 13, 14, 15,
16967                     16, 17, 18, 19, 20, 21, 22, 23,
16968                     24, 25, 26, 27, 28, 29, 30, 31 };
16969
16970 static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
16971                      3,  4,  5,    6,     7,     8,     9,    10,
16972                     11, 12, 13,   14,    15,    16,    17,    18,
16973                     19, 20, 21,   22,    23,    24,    25,    26,
16974                     27, 28, 29,   30,    31,    32,    33,    34,
16975                     35, 37, 39,   41,    43,    47,    51,    59,
16976                     67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
16977                     0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
16978
16979
16980 /*-*******************************************************
16981  *  Decompression types
16982  *********************************************************/
16983  typedef struct {
16984      U32 fastMode;
16985      U32 tableLog;
16986  } ZSTD_seqSymbol_header;
16987
16988  typedef struct {
16989      U16  nextState;
16990      BYTE nbAdditionalBits;
16991      BYTE nbBits;
16992      U32  baseValue;
16993  } ZSTD_seqSymbol;
16994
16995  #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
16996
16997 #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
16998 #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
16999 #define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
17000
17001 typedef struct {
17002     ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
17003     ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
17004     ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
17005     HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)];  /* can accommodate HUF_decompress4X */
17006     U32 rep[ZSTD_REP_NUM];
17007     U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
17008 } ZSTD_entropyDTables_t;
17009
17010 typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
17011                ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
17012                ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
17013                ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
17014
17015 typedef enum { zdss_init=0, zdss_loadHeader,
17016                zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
17017
17018 typedef enum {
17019     ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
17020     ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
17021     ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
17022 } ZSTD_dictUses_e;
17023
17024 /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
17025 typedef struct {
17026     const ZSTD_DDict** ddictPtrTable;
17027     size_t ddictPtrTableSize;
17028     size_t ddictPtrCount;
17029 } ZSTD_DDictHashSet;
17030
17031 #ifndef ZSTD_DECODER_INTERNAL_BUFFER
17032 #  define ZSTD_DECODER_INTERNAL_BUFFER  (1 << 16)
17033 #endif
17034
17035 #define ZSTD_LBMIN 64
17036 #define ZSTD_LBMAX (128 << 10)
17037
17038 /* extra buffer, compensates when dst is not large enough to store litBuffer */
17039 #define ZSTD_LITBUFFEREXTRASIZE  BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
17040
17041 typedef enum {
17042     ZSTD_not_in_dst = 0,  /* Stored entirely within litExtraBuffer */
17043     ZSTD_in_dst = 1,           /* Stored entirely within dst (in memory after current output write) */
17044     ZSTD_split = 2            /* Split between litExtraBuffer and dst */
17045 } ZSTD_litLocation_e;
17046
17047 struct ZSTD_DCtx_s
17048 {
17049     const ZSTD_seqSymbol* LLTptr;
17050     const ZSTD_seqSymbol* MLTptr;
17051     const ZSTD_seqSymbol* OFTptr;
17052     const HUF_DTable* HUFptr;
17053     ZSTD_entropyDTables_t entropy;
17054     U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];   /* space needed when building huffman tables */
17055     const void* previousDstEnd;   /* detect continuity */
17056     const void* prefixStart;      /* start of current segment */
17057     const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
17058     const void* dictEnd;          /* end of previous segment */
17059     size_t expected;
17060     ZSTD_frameHeader fParams;
17061     U64 processedCSize;
17062     U64 decodedSize;
17063     blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
17064     ZSTD_dStage stage;
17065     U32 litEntropy;
17066     U32 fseEntropy;
17067     XXH64_state_t xxhState;
17068     size_t headerSize;
17069     ZSTD_format_e format;
17070     ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum;   /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
17071     U32 validateChecksum;         /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
17072     const BYTE* litPtr;
17073     ZSTD_customMem customMem;
17074     size_t litSize;
17075     size_t rleSize;
17076     size_t staticSize;
17077     int isFrameDecompression;
17078 #if DYNAMIC_BMI2 != 0
17079     int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
17080 #endif
17081
17082     /* dictionary */
17083     ZSTD_DDict* ddictLocal;
17084     const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
17085     U32 dictID;
17086     int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
17087     ZSTD_dictUses_e dictUses;
17088     ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
17089     ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
17090     int disableHufAsm;
17091     int maxBlockSizeParam;
17092
17093     /* streaming */
17094     ZSTD_dStreamStage streamStage;
17095     char*  inBuff;
17096     size_t inBuffSize;
17097     size_t inPos;
17098     size_t maxWindowSize;
17099     char*  outBuff;
17100     size_t outBuffSize;
17101     size_t outStart;
17102     size_t outEnd;
17103     size_t lhSize;
17104 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
17105     void* legacyContext;
17106     U32 previousLegacyVersion;
17107     U32 legacyVersion;
17108 #endif
17109     U32 hostageByte;
17110     int noForwardProgress;
17111     ZSTD_bufferMode_e outBufferMode;
17112     ZSTD_outBuffer expectedOutBuffer;
17113
17114     /* workspace */
17115     BYTE* litBuffer;
17116     const BYTE* litBufferEnd;
17117     ZSTD_litLocation_e litBufferLocation;
17118     BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
17119     BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
17120
17121     size_t oversizedDuration;
17122
17123 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
17124     void const* dictContentBeginForFuzzing;
17125     void const* dictContentEndForFuzzing;
17126 #endif
17127
17128     /* Tracing */
17129 #if ZSTD_TRACE
17130     ZSTD_TraceCtx traceCtx;
17131 #endif
17132 };  /* typedef'd to ZSTD_DCtx within "zstd.h" */
17133
17134 MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
17135 #if DYNAMIC_BMI2 != 0
17136         return dctx->bmi2;
17137 #else
17138     (void)dctx;
17139         return 0;
17140 #endif
17141 }
17142
17143 /*-*******************************************************
17144  *  Shared internal functions
17145  *********************************************************/
17146
17147 /*! ZSTD_loadDEntropy() :
17148  *  dict : must point at beginning of a valid zstd dictionary.
17149  * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
17150 size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
17151                    const void* const dict, size_t const dictSize);
17152
17153 /*! ZSTD_checkContinuity() :
17154  *  check if next `dst` follows previous position, where decompression ended.
17155  *  If yes, do nothing (continue on current segment).
17156  *  If not, classify previous segment as "external dictionary", and start a new segment.
17157  *  This function cannot fail. */
17158 void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
17159
17160
17161 #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
17162 /**** ended inlining zstd_decompress_internal.h ****/
17163 /**** start inlining zstd_ddict.h ****/
17164 /*
17165  * Copyright (c) Meta Platforms, Inc. and affiliates.
17166  * All rights reserved.
17167  *
17168  * This source code is licensed under both the BSD-style license (found in the
17169  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
17170  * in the COPYING file in the root directory of this source tree).
17171  * You may select, at your option, one of the above-listed licenses.
17172  */
17173
17174
17175 #ifndef ZSTD_DDICT_H
17176 #define ZSTD_DDICT_H
17177
17178 /*-*******************************************************
17179  *  Dependencies
17180  *********************************************************/
17181 /**** skipping file: ../common/zstd_deps.h ****/
17182 /**** skipping file: ../zstd.h ****/
17183
17184
17185 /*-*******************************************************
17186  *  Interface
17187  *********************************************************/
17188
17189 /* note: several prototypes are already published in `zstd.h` :
17190  * ZSTD_createDDict()
17191  * ZSTD_createDDict_byReference()
17192  * ZSTD_createDDict_advanced()
17193  * ZSTD_freeDDict()
17194  * ZSTD_initStaticDDict()
17195  * ZSTD_sizeof_DDict()
17196  * ZSTD_estimateDDictSize()
17197  * ZSTD_getDictID_fromDict()
17198  */
17199
17200 const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
17201 size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
17202
17203 void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
17204
17205
17206
17207 #endif /* ZSTD_DDICT_H */
17208 /**** ended inlining zstd_ddict.h ****/
17209
17210 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
17211 #error Using excluded file: ../legacy/zstd_legacy.h (re-amalgamate source to fix)
17212 #endif
17213
17214
17215
17216 /*-*******************************************************
17217 *  Types
17218 *********************************************************/
17219 struct ZSTD_DDict_s {
17220     void* dictBuffer;
17221     const void* dictContent;
17222     size_t dictSize;
17223     ZSTD_entropyDTables_t entropy;
17224     U32 dictID;
17225     U32 entropyPresent;
17226     ZSTD_customMem cMem;
17227 };  /* typedef'd to ZSTD_DDict within "zstd.h" */
17228
17229 const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
17230 {
17231     assert(ddict != NULL);
17232     return ddict->dictContent;
17233 }
17234
17235 size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
17236 {
17237     assert(ddict != NULL);
17238     return ddict->dictSize;
17239 }
17240
17241 void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
17242 {
17243     DEBUGLOG(4, "ZSTD_copyDDictParameters");
17244     assert(dctx != NULL);
17245     assert(ddict != NULL);
17246     dctx->dictID = ddict->dictID;
17247     dctx->prefixStart = ddict->dictContent;
17248     dctx->virtualStart = ddict->dictContent;
17249     dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
17250     dctx->previousDstEnd = dctx->dictEnd;
17251 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
17252     dctx->dictContentBeginForFuzzing = dctx->prefixStart;
17253     dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
17254 #endif
17255     if (ddict->entropyPresent) {
17256         dctx->litEntropy = 1;
17257         dctx->fseEntropy = 1;
17258         dctx->LLTptr = ddict->entropy.LLTable;
17259         dctx->MLTptr = ddict->entropy.MLTable;
17260         dctx->OFTptr = ddict->entropy.OFTable;
17261         dctx->HUFptr = ddict->entropy.hufTable;
17262         dctx->entropy.rep[0] = ddict->entropy.rep[0];
17263         dctx->entropy.rep[1] = ddict->entropy.rep[1];
17264         dctx->entropy.rep[2] = ddict->entropy.rep[2];
17265     } else {
17266         dctx->litEntropy = 0;
17267         dctx->fseEntropy = 0;
17268     }
17269 }
17270
17271
17272 static size_t
17273 ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
17274                            ZSTD_dictContentType_e dictContentType)
17275 {
17276     ddict->dictID = 0;
17277     ddict->entropyPresent = 0;
17278     if (dictContentType == ZSTD_dct_rawContent) return 0;
17279
17280     if (ddict->dictSize < 8) {
17281         if (dictContentType == ZSTD_dct_fullDict)
17282             return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
17283         return 0;   /* pure content mode */
17284     }
17285     {   U32 const magic = MEM_readLE32(ddict->dictContent);
17286         if (magic != ZSTD_MAGIC_DICTIONARY) {
17287             if (dictContentType == ZSTD_dct_fullDict)
17288                 return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
17289             return 0;   /* pure content mode */
17290         }
17291     }
17292     ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
17293
17294     /* load entropy tables */
17295     RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
17296             &ddict->entropy, ddict->dictContent, ddict->dictSize)),
17297         dictionary_corrupted, "");
17298     ddict->entropyPresent = 1;
17299     return 0;
17300 }
17301
17302
17303 static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
17304                                       const void* dict, size_t dictSize,
17305                                       ZSTD_dictLoadMethod_e dictLoadMethod,
17306                                       ZSTD_dictContentType_e dictContentType)
17307 {
17308     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
17309         ddict->dictBuffer = NULL;
17310         ddict->dictContent = dict;
17311         if (!dict) dictSize = 0;
17312     } else {
17313         void* const internalBuffer = VG_(malloc)("zstddeclib.ZSTD_initDDict_internal.1", dictSize);
17314         ddict->dictBuffer = internalBuffer;
17315         ddict->dictContent = internalBuffer;
17316         if (!internalBuffer) return ERROR(memory_allocation);
17317         ZSTD_memcpy(internalBuffer, dict, dictSize);
17318     }
17319     ddict->dictSize = dictSize;
17320     ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001);  /* cover both little and big endian */
17321
17322     /* parse dictionary content */
17323     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
17324
17325     return 0;
17326 }
17327
17328 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
17329                                       ZSTD_dictLoadMethod_e dictLoadMethod,
17330                                       ZSTD_dictContentType_e dictContentType,
17331                                       ZSTD_customMem customMem)
17332 {
17333     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
17334
17335     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) VG_(malloc)("zstddeclib.ZSTD_createDDict_advanced.1", sizeof(ZSTD_DDict));
17336         if (ddict == NULL) return NULL;
17337         ddict->cMem = customMem;
17338         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
17339                                             dict, dictSize,
17340                                             dictLoadMethod, dictContentType);
17341             if (ZSTD_isError(initResult)) {
17342                 ZSTD_freeDDict(ddict);
17343                 return NULL;
17344         }   }
17345         return ddict;
17346     }
17347 }
17348
17349 /*! ZSTD_createDDict() :
17350 *   Create a digested dictionary, to start decompression without startup delay.
17351 *   `dict` content is copied inside DDict.
17352 *   Consequently, `dict` can be released after `ZSTD_DDict` creation */
17353 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
17354 {
17355     ZSTD_customMem const allocator = { NULL, NULL, NULL };
17356     return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
17357 }
17358
17359 /*! ZSTD_createDDict_byReference() :
17360  *  Create a digested dictionary, to start decompression without startup delay.
17361  *  Dictionary content is simply referenced, it will be accessed during decompression.
17362  *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
17363 ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
17364 {
17365     ZSTD_customMem const allocator = { NULL, NULL, NULL };
17366     return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
17367 }
17368
17369
17370 const ZSTD_DDict* ZSTD_initStaticDDict(
17371                                 void* sBuffer, size_t sBufferSize,
17372                                 const void* dict, size_t dictSize,
17373                                 ZSTD_dictLoadMethod_e dictLoadMethod,
17374                                 ZSTD_dictContentType_e dictContentType)
17375 {
17376     size_t const neededSpace = sizeof(ZSTD_DDict)
17377                              + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
17378     ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
17379     assert(sBuffer != NULL);
17380     assert(dict != NULL);
17381     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
17382     if (sBufferSize < neededSpace) return NULL;
17383     if (dictLoadMethod == ZSTD_dlm_byCopy) {
17384         ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
17385         dict = ddict+1;
17386     }
17387     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
17388                                               dict, dictSize,
17389                                               ZSTD_dlm_byRef, dictContentType) ))
17390         return NULL;
17391     return ddict;
17392 }
17393
17394
17395 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
17396 {
17397     if (ddict==NULL) return 0;   /* support free on NULL */
17398     {
17399         VG_(free)(ddict->dictBuffer);
17400         VG_(free)(ddict);
17401         return 0;
17402     }
17403 }
17404
17405 /*! ZSTD_estimateDDictSize() :
17406  *  Estimate amount of memory that will be needed to create a dictionary for decompression.
17407  *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
17408 size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
17409 {
17410     return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
17411 }
17412
17413 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
17414 {
17415     if (ddict==NULL) return 0;   /* support sizeof on NULL */
17416     return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
17417 }
17418
17419 /*! ZSTD_getDictID_fromDDict() :
17420  *  Provides the dictID of the dictionary loaded into `ddict`.
17421  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
17422  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
17423 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
17424 {
17425     if (ddict==NULL) return 0;
17426     return ddict->dictID;
17427 }
17428 /**** ended inlining decompress/zstd_ddict.c ****/
17429 /**** start inlining decompress/zstd_decompress.c ****/
17430 /*
17431  * Copyright (c) Meta Platforms, Inc. and affiliates.
17432  * All rights reserved.
17433  *
17434  * This source code is licensed under both the BSD-style license (found in the
17435  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
17436  * in the COPYING file in the root directory of this source tree).
17437  * You may select, at your option, one of the above-listed licenses.
17438  */
17439
17440
17441 /* ***************************************************************
17442 *  Tuning parameters
17443 *****************************************************************/
17444 /*!
17445  * HEAPMODE :
17446  * Select how default decompression function ZSTD_decompress() allocates its context,
17447  * on stack (0), or into heap (1, default; requires malloc()).
17448  * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
17449  */
17450 #ifndef ZSTD_HEAPMODE
17451 #  define ZSTD_HEAPMODE 1
17452 #endif
17453
17454 /*!
17455 *  LEGACY_SUPPORT :
17456 *  if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
17457 */
17458 #ifndef ZSTD_LEGACY_SUPPORT
17459 #  define ZSTD_LEGACY_SUPPORT 0
17460 #endif
17461
17462 /*!
17463  *  MAXWINDOWSIZE_DEFAULT :
17464  *  maximum window size accepted by DStream __by default__.
17465  *  Frames requiring more memory will be rejected.
17466  *  It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
17467  */
17468 #ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
17469 #  define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
17470 #endif
17471
17472 /*!
17473  *  NO_FORWARD_PROGRESS_MAX :
17474  *  maximum allowed nb of calls to ZSTD_decompressStream()
17475  *  without any forward progress
17476  *  (defined as: no byte read from input, and no byte flushed to output)
17477  *  before triggering an error.
17478  */
17479 #ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
17480 #  define ZSTD_NO_FORWARD_PROGRESS_MAX 16
17481 #endif
17482
17483
17484 /*-*******************************************************
17485 *  Dependencies
17486 *********************************************************/
17487 /**** skipping file: ../common/zstd_deps.h ****/
17488 /**** skipping file: ../common/allocations.h ****/
17489 /**** skipping file: ../common/error_private.h ****/
17490 /**** skipping file: ../common/zstd_internal.h ****/
17491 /**** skipping file: ../common/mem.h ****/
17492 /**** skipping file: ../common/bits.h ****/
17493 #define FSE_STATIC_LINKING_ONLY
17494 /**** skipping file: ../common/fse.h ****/
17495 /**** skipping file: ../common/huf.h ****/
17496 /**** skipping file: ../common/xxhash.h ****/
17497 /**** skipping file: zstd_decompress_internal.h ****/
17498 /**** skipping file: zstd_ddict.h ****/
17499 /**** start inlining zstd_decompress_block.h ****/
17500 /*
17501  * Copyright (c) Meta Platforms, Inc. and affiliates.
17502  * All rights reserved.
17503  *
17504  * This source code is licensed under both the BSD-style license (found in the
17505  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
17506  * in the COPYING file in the root directory of this source tree).
17507  * You may select, at your option, one of the above-listed licenses.
17508  */
17509
17510
17511 #ifndef ZSTD_DEC_BLOCK_H
17512 #define ZSTD_DEC_BLOCK_H
17513
17514 /*-*******************************************************
17515  *  Dependencies
17516  *********************************************************/
17517 /**** skipping file: ../common/zstd_deps.h ****/
17518 /**** skipping file: ../zstd.h ****/
17519 /**** skipping file: ../common/zstd_internal.h ****/
17520 /**** skipping file: zstd_decompress_internal.h ****/
17521
17522
17523 /* ===   Prototypes   === */
17524
17525 /* note: prototypes already published within `zstd.h` :
17526  * ZSTD_decompressBlock()
17527  */
17528
17529 /* note: prototypes already published within `zstd_internal.h` :
17530  * ZSTD_getcBlockSize()
17531  * ZSTD_decodeSeqHeaders()
17532  */
17533
17534
17535  /* Streaming state is used to inform allocation of the literal buffer */
17536 typedef enum {
17537     not_streaming = 0,
17538     is_streaming = 1
17539 } streaming_operation;
17540
17541 /* ZSTD_decompressBlock_internal() :
17542  * decompress block, starting at `src`,
17543  * into destination buffer `dst`.
17544  * @return : decompressed block size,
17545  *           or an error code (which can be tested using ZSTD_isError())
17546  */
17547 size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
17548                                void* dst, size_t dstCapacity,
17549                          const void* src, size_t srcSize, const streaming_operation streaming);
17550
17551 /* ZSTD_buildFSETable() :
17552  * generate FSE decoding table for one symbol (ll, ml or off)
17553  * this function must be called with valid parameters only
17554  * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
17555  * in which case it cannot fail.
17556  * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
17557  * defined in zstd_decompress_internal.h.
17558  * Internal use only.
17559  */
17560 void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
17561              const short* normalizedCounter, unsigned maxSymbolValue,
17562              const U32* baseValue, const U8* nbAdditionalBits,
17563                    unsigned tableLog, void* wksp, size_t wkspSize,
17564                    int bmi2);
17565
17566 /* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
17567 size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
17568                             void* dst, size_t dstCapacity,
17569                       const void* src, size_t srcSize);
17570
17571
17572 #endif /* ZSTD_DEC_BLOCK_H */
17573 /**** ended inlining zstd_decompress_block.h ****/
17574
17575 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
17576 #error Using excluded file: ../legacy/zstd_legacy.h (re-amalgamate source to fix)
17577 #endif
17578
17579
17580
17581 /*************************************
17582  * Multiple DDicts Hashset internals *
17583  *************************************/
17584
17585 #define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
17586 #define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3  /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
17587                                                     * Currently, that means a 0.75 load factor.
17588                                                     * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
17589                                                     * the load factor of the ddict hash set.
17590                                                     */
17591
17592 #define DDICT_HASHSET_TABLE_BASE_SIZE 64
17593 #define DDICT_HASHSET_RESIZE_FACTOR 2
17594
17595 /* Hash function to determine starting position of dict insertion within the table
17596  * Returns an index between [0, hashSet->ddictPtrTableSize]
17597  */
17598 static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
17599     const U64 hash = XXH64(&dictID, sizeof(U32), 0);
17600     /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
17601     return hash & (hashSet->ddictPtrTableSize - 1);
17602 }
17603
17604 /* Adds DDict to a hashset without resizing it.
17605  * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set.
17606  * Returns 0 if successful, or a zstd error code if something went wrong.
17607  */
17608 static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) {
17609     const U32 dictID = ZSTD_getDictID_fromDDict(ddict);
17610     size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
17611     const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
17612     RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
17613     DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
17614     while (hashSet->ddictPtrTable[idx] != NULL) {
17615         /* Replace existing ddict if inserting ddict with same dictID */
17616         if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) {
17617             DEBUGLOG(4, "DictID already exists, replacing rather than adding");
17618             hashSet->ddictPtrTable[idx] = ddict;
17619             return 0;
17620         }
17621         idx &= idxRangeMask;
17622         idx++;
17623     }
17624     DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
17625     hashSet->ddictPtrTable[idx] = ddict;
17626     hashSet->ddictPtrCount++;
17627     return 0;
17628 }
17629
17630 /* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and
17631  * rehashes all values, allocates new table, frees old table.
17632  * Returns 0 on success, otherwise a zstd error code.
17633  */
17634 static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
17635     size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR;
17636     const ZSTD_DDict** newTable = (const ZSTD_DDict**)VG_(calloc)("zstddeclib.ZSTD_DDictHashSet_expand.1", 1, sizeof(ZSTD_DDict*) * newTableSize);
17637     const ZSTD_DDict** oldTable = hashSet->ddictPtrTable;
17638     size_t oldTableSize = hashSet->ddictPtrTableSize;
17639     size_t i;
17640
17641     DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
17642     RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
17643     hashSet->ddictPtrTable = newTable;
17644     hashSet->ddictPtrTableSize = newTableSize;
17645     hashSet->ddictPtrCount = 0;
17646     for (i = 0; i < oldTableSize; ++i) {
17647         if (oldTable[i] != NULL) {
17648             FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), "");
17649         }
17650     }
17651     VG_(free)((void*)oldTable);
17652     DEBUGLOG(4, "Finished re-hash");
17653     return 0;
17654 }
17655
17656 /* Fetches a DDict with the given dictID
17657  * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL.
17658  */
17659 static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) {
17660     size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
17661     const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
17662     DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
17663     for (;;) {
17664         size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
17665         if (currDictID == dictID || currDictID == 0) {
17666             /* currDictID == 0 implies a NULL ddict entry */
17667             break;
17668         } else {
17669             idx &= idxRangeMask;    /* Goes to start of table when we reach the end */
17670             idx++;
17671         }
17672     }
17673     DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
17674     return hashSet->ddictPtrTable[idx];
17675 }
17676
17677 /* Allocates space for and returns a ddict hash set
17678  * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with.
17679  * Returns NULL if allocation failed.
17680  */
17681 static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
17682     ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)VG_(malloc)("zstddeclib.ZSTD_createDDictHashSet.1", sizeof(ZSTD_DDictHashSet));
17683     DEBUGLOG(4, "Allocating new hash set");
17684     if (!ret)
17685         return NULL;
17686     ret->ddictPtrTable = (const ZSTD_DDict**)VG_(calloc)("zstddeclib.ZSTD_createDDictHashSet.2", 1, DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*));
17687     if (!ret->ddictPtrTable) {
17688         VG_(free)(ret);
17689         return NULL;
17690     }
17691     ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
17692     ret->ddictPtrCount = 0;
17693     return ret;
17694 }
17695
17696 /* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
17697  * Note: The ZSTD_DDict* within the table are NOT freed.
17698  */
17699 static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet) {
17700     DEBUGLOG(4, "Freeing ddict hash set");
17701     if (hashSet && hashSet->ddictPtrTable) {
17702         VG_(free)((void*)hashSet->ddictPtrTable);
17703     }
17704     if (hashSet) {
17705         VG_(free)(hashSet);
17706     }
17707 }
17708
17709 /* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
17710  * Returns 0 on success, or a ZSTD error.
17711  */
17712 static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) {
17713     DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize);
17714     if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) {
17715         FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), "");
17716     }
17717     FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), "");
17718     return 0;
17719 }
17720
17721 /*-*************************************************************
17722 *   Context management
17723 ***************************************************************/
17724 size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
17725 {
17726     if (dctx==NULL) return 0;   /* support sizeof NULL */
17727     return sizeof(*dctx)
17728            + ZSTD_sizeof_DDict(dctx->ddictLocal)
17729            + dctx->inBuffSize + dctx->outBuffSize;
17730 }
17731
17732 size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
17733
17734
17735 static size_t ZSTD_startingInputLength(ZSTD_format_e format)
17736 {
17737     size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
17738     /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
17739     assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
17740     return startingInputLength;
17741 }
17742
17743 static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
17744 {
17745     assert(dctx->streamStage == zdss_init);
17746     dctx->format = ZSTD_f_zstd1;
17747     dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
17748     dctx->outBufferMode = ZSTD_bm_buffered;
17749     dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
17750     dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
17751     dctx->disableHufAsm = 0;
17752     dctx->maxBlockSizeParam = 0;
17753 }
17754
17755 static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
17756 {
17757     dctx->staticSize  = 0;
17758     dctx->ddict       = NULL;
17759     dctx->ddictLocal  = NULL;
17760     dctx->dictEnd     = NULL;
17761     dctx->ddictIsCold = 0;
17762     dctx->dictUses = ZSTD_dont_use;
17763     dctx->inBuff      = NULL;
17764     dctx->inBuffSize  = 0;
17765     dctx->outBuffSize = 0;
17766     dctx->streamStage = zdss_init;
17767 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
17768     dctx->legacyContext = NULL;
17769     dctx->previousLegacyVersion = 0;
17770 #endif
17771     dctx->noForwardProgress = 0;
17772     dctx->oversizedDuration = 0;
17773     dctx->isFrameDecompression = 1;
17774 #if DYNAMIC_BMI2
17775     dctx->bmi2 = ZSTD_cpuSupportsBmi2();
17776 #endif
17777     dctx->ddictSet = NULL;
17778     ZSTD_DCtx_resetParameters(dctx);
17779 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
17780     dctx->dictContentEndForFuzzing = NULL;
17781 #endif
17782 }
17783
17784 ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
17785 {
17786     ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace;
17787
17788     if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
17789     if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL;  /* minimum size */
17790
17791     ZSTD_initDCtx_internal(dctx);
17792     dctx->staticSize = workspaceSize;
17793     dctx->inBuff = (char*)(dctx+1);
17794     return dctx;
17795 }
17796
17797 static ZSTD_DCtx* ZSTD_createDCtx_internal(ZSTD_customMem customMem) {
17798     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
17799
17800     {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)VG_(malloc)("zstddeclib.ZSTD_createDCtx_internal.1", sizeof(*dctx));
17801         if (!dctx) return NULL;
17802         dctx->customMem = customMem;
17803         ZSTD_initDCtx_internal(dctx);
17804         return dctx;
17805     }
17806 }
17807
17808 ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
17809 {
17810     return ZSTD_createDCtx_internal(customMem);
17811 }
17812
17813 ZSTD_DCtx* ZSTD_createDCtx(void)
17814 {
17815     DEBUGLOG(3, "ZSTD_createDCtx");
17816     return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
17817 }
17818
17819 static void ZSTD_clearDict(ZSTD_DCtx* dctx)
17820 {
17821     ZSTD_freeDDict(dctx->ddictLocal);
17822     dctx->ddictLocal = NULL;
17823     dctx->ddict = NULL;
17824     dctx->dictUses = ZSTD_dont_use;
17825 }
17826
17827 size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
17828 {
17829     if (dctx==NULL) return 0;   /* support free on NULL */
17830     RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
17831     {
17832         ZSTD_clearDict(dctx);
17833         VG_(free)(dctx->inBuff);
17834         dctx->inBuff = NULL;
17835 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
17836         if (dctx->legacyContext)
17837             ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
17838 #endif
17839         if (dctx->ddictSet) {
17840             ZSTD_freeDDictHashSet(dctx->ddictSet);
17841             dctx->ddictSet = NULL;
17842         }
17843         VG_(free)(dctx);
17844         return 0;
17845     }
17846 }
17847
17848 /* no longer useful */
17849 void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
17850 {
17851     size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
17852     ZSTD_memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
17853 }
17854
17855 /* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
17856  * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
17857  * accordingly sets the ddict to be used to decompress the frame.
17858  *
17859  * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
17860  *
17861  * ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
17862  */
17863 static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
17864     assert(dctx->refMultipleDDicts && dctx->ddictSet);
17865     DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
17866     if (dctx->ddict) {
17867         const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
17868         if (frameDDict) {
17869             DEBUGLOG(4, "DDict found!");
17870             ZSTD_clearDict(dctx);
17871             dctx->dictID = dctx->fParams.dictID;
17872             dctx->ddict = frameDDict;
17873             dctx->dictUses = ZSTD_use_indefinitely;
17874         }
17875     }
17876 }
17877
17878
17879 /*-*************************************************************
17880  *   Frame header decoding
17881  ***************************************************************/
17882
17883 /*! ZSTD_isFrame() :
17884  *  Tells if the content of `buffer` starts with a valid Frame Identifier.
17885  *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
17886  *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
17887  *  Note 3 : Skippable Frame Identifiers are considered valid. */
17888 unsigned ZSTD_isFrame(const void* buffer, size_t size)
17889 {
17890     if (size < ZSTD_FRAMEIDSIZE) return 0;
17891     {   U32 const magic = MEM_readLE32(buffer);
17892         if (magic == ZSTD_MAGICNUMBER) return 1;
17893         if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
17894     }
17895 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
17896     if (ZSTD_isLegacy(buffer, size)) return 1;
17897 #endif
17898     return 0;
17899 }
17900
17901 /*! ZSTD_isSkippableFrame() :
17902  *  Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
17903  *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
17904  */
17905 unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size)
17906 {
17907     if (size < ZSTD_FRAMEIDSIZE) return 0;
17908     {   U32 const magic = MEM_readLE32(buffer);
17909         if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
17910     }
17911     return 0;
17912 }
17913
17914 /** ZSTD_frameHeaderSize_internal() :
17915  *  srcSize must be large enough to reach header size fields.
17916  *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
17917  * @return : size of the Frame Header
17918  *           or an error code, which can be tested with ZSTD_isError() */
17919 static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
17920 {
17921     size_t const minInputSize = ZSTD_startingInputLength(format);
17922     RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, "");
17923
17924     {   BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
17925         U32 const dictID= fhd & 3;
17926         U32 const singleSegment = (fhd >> 5) & 1;
17927         U32 const fcsId = fhd >> 6;
17928         return minInputSize + !singleSegment
17929              + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId]
17930              + (singleSegment && !fcsId);
17931     }
17932 }
17933
17934 /** ZSTD_frameHeaderSize() :
17935  *  srcSize must be >= ZSTD_frameHeaderSize_prefix.
17936  * @return : size of the Frame Header,
17937  *           or an error code (if srcSize is too small) */
17938 size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
17939 {
17940     return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
17941 }
17942
17943
17944 /** ZSTD_getFrameHeader_advanced() :
17945  *  decode Frame Header, or require larger `srcSize`.
17946  *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
17947  * @return : 0, `zfhPtr` is correctly filled,
17948  *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
17949 **           or an error code, which can be tested using ZSTD_isError() */
17950 size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
17951 {
17952     const BYTE* ip = (const BYTE*)src;
17953     size_t const minInputSize = ZSTD_startingInputLength(format);
17954
17955     DEBUGLOG(5, "ZSTD_getFrameHeader_advanced: minInputSize = %zu, srcSize = %zu", minInputSize, srcSize);
17956
17957     if (srcSize > 0) {
17958         /* note : technically could be considered an assert(), since it's an invalid entry */
17959         RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter : src==NULL, but srcSize>0");
17960     }
17961     if (srcSize < minInputSize) {
17962         if (srcSize > 0 && format != ZSTD_f_zstd1_magicless) {
17963             /* when receiving less than @minInputSize bytes,
17964              * control these bytes at least correspond to a supported magic number
17965              * in order to error out early if they don't.
17966             **/
17967             size_t const toCopy = MIN(4, srcSize);
17968             unsigned char hbuf[4]; MEM_writeLE32(hbuf, ZSTD_MAGICNUMBER);
17969             assert(src != NULL);
17970             ZSTD_memcpy(hbuf, src, toCopy);
17971             if ( MEM_readLE32(hbuf) != ZSTD_MAGICNUMBER ) {
17972                 /* not a zstd frame : let's check if it's a skippable frame */
17973                 MEM_writeLE32(hbuf, ZSTD_MAGIC_SKIPPABLE_START);
17974                 ZSTD_memcpy(hbuf, src, toCopy);
17975                 if ((MEM_readLE32(hbuf) & ZSTD_MAGIC_SKIPPABLE_MASK) != ZSTD_MAGIC_SKIPPABLE_START) {
17976                     RETURN_ERROR(prefix_unknown,
17977                                 "first bytes don't correspond to any supported magic number");
17978         }   }   }
17979         return minInputSize;
17980     }
17981
17982     ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzers may not understand that zfhPtr will be read only if return value is zero, since they are 2 different signals */
17983     if ( (format != ZSTD_f_zstd1_magicless)
17984       && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
17985         if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
17986             /* skippable frame */
17987             if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
17988                 return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
17989             ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));
17990             zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
17991             zfhPtr->frameType = ZSTD_skippableFrame;
17992             return 0;
17993         }
17994         RETURN_ERROR(prefix_unknown, "");
17995     }
17996
17997     /* ensure there is enough `srcSize` to fully read/decode frame header */
17998     {   size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format);
17999         if (srcSize < fhsize) return fhsize;
18000         zfhPtr->headerSize = (U32)fhsize;
18001     }
18002
18003     {   BYTE const fhdByte = ip[minInputSize-1];
18004         size_t pos = minInputSize;
18005         U32 const dictIDSizeCode = fhdByte&3;
18006         U32 const checksumFlag = (fhdByte>>2)&1;
18007         U32 const singleSegment = (fhdByte>>5)&1;
18008         U32 const fcsID = fhdByte>>6;
18009         U64 windowSize = 0;
18010         U32 dictID = 0;
18011         U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
18012         RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
18013                         "reserved bits, must be zero");
18014
18015         if (!singleSegment) {
18016             BYTE const wlByte = ip[pos++];
18017             U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
18018             RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, "");
18019             windowSize = (1ULL << windowLog);
18020             windowSize += (windowSize >> 3) * (wlByte&7);
18021         }
18022         switch(dictIDSizeCode)
18023         {
18024             default:
18025                 assert(0);  /* impossible */
18026                 ZSTD_FALLTHROUGH;
18027             case 0 : break;
18028             case 1 : dictID = ip[pos]; pos++; break;
18029             case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
18030             case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break;
18031         }
18032         switch(fcsID)
18033         {
18034             default:
18035                 assert(0);  /* impossible */
18036                 ZSTD_FALLTHROUGH;
18037             case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
18038             case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
18039             case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
18040             case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
18041         }
18042         if (singleSegment) windowSize = frameContentSize;
18043
18044         zfhPtr->frameType = ZSTD_frame;
18045         zfhPtr->frameContentSize = frameContentSize;
18046         zfhPtr->windowSize = windowSize;
18047         zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
18048         zfhPtr->dictID = dictID;
18049         zfhPtr->checksumFlag = checksumFlag;
18050     }
18051     return 0;
18052 }
18053
18054 /** ZSTD_getFrameHeader() :
18055  *  decode Frame Header, or require larger `srcSize`.
18056  *  note : this function does not consume input, it only reads it.
18057  * @return : 0, `zfhPtr` is correctly filled,
18058  *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
18059  *           or an error code, which can be tested using ZSTD_isError() */
18060 size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
18061 {
18062     return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
18063 }
18064
18065 /** ZSTD_getFrameContentSize() :
18066  *  compatible with legacy mode
18067  * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
18068  *         - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
18069  *         - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
18070 unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
18071 {
18072 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
18073     if (ZSTD_isLegacy(src, srcSize)) {
18074         unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize);
18075         return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret;
18076     }
18077 #endif
18078     {   ZSTD_frameHeader zfh;
18079         if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
18080             return ZSTD_CONTENTSIZE_ERROR;
18081         if (zfh.frameType == ZSTD_skippableFrame) {
18082             return 0;
18083         } else {
18084             return zfh.frameContentSize;
18085     }   }
18086 }
18087
18088 static size_t readSkippableFrameSize(void const* src, size_t srcSize)
18089 {
18090     size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
18091     U32 sizeU32;
18092
18093     RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
18094
18095     sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
18096     RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
18097                     frameParameter_unsupported, "");
18098     {   size_t const skippableSize = skippableHeaderSize + sizeU32;
18099         RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
18100         return skippableSize;
18101     }
18102 }
18103
18104 /*! ZSTD_readSkippableFrame() :
18105  * Retrieves content of a skippable frame, and writes it to dst buffer.
18106  *
18107  * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
18108  * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START.  This can be NULL if the caller is not interested
18109  * in the magicVariant.
18110  *
18111  * Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame.
18112  *
18113  * @return : number of bytes written or a ZSTD error.
18114  */
18115 size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity,
18116                                unsigned* magicVariant,  /* optional, can be NULL */
18117                          const void* src, size_t srcSize)
18118 {
18119     RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
18120
18121     {   U32 const magicNumber = MEM_readLE32(src);
18122         size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
18123         size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
18124
18125         /* check input validity */
18126         RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
18127         RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
18128         RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
18129
18130         /* deliver payload */
18131         if (skippableContentSize > 0  && dst != NULL)
18132             ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
18133         if (magicVariant != NULL)
18134             *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
18135         return skippableContentSize;
18136     }
18137 }
18138
18139 /** ZSTD_findDecompressedSize() :
18140  *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
18141  *      skippable frames
18142  *  note: compatible with legacy mode
18143  * @return : decompressed size of the frames contained */
18144 unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
18145 {
18146     unsigned long long totalDstSize = 0;
18147
18148     while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
18149         U32 const magicNumber = MEM_readLE32(src);
18150
18151         if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
18152             size_t const skippableSize = readSkippableFrameSize(src, srcSize);
18153             if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR;
18154             assert(skippableSize <= srcSize);
18155
18156             src = (const BYTE *)src + skippableSize;
18157             srcSize -= skippableSize;
18158             continue;
18159         }
18160
18161         {   unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
18162             if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs;
18163
18164             if (totalDstSize + fcs < totalDstSize)
18165                 return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */
18166             totalDstSize += fcs;
18167         }
18168         /* skip to next frame */
18169         {   size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
18170             if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR;
18171             assert(frameSrcSize <= srcSize);
18172
18173             src = (const BYTE *)src + frameSrcSize;
18174             srcSize -= frameSrcSize;
18175         }
18176     }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
18177
18178     if (srcSize) return ZSTD_CONTENTSIZE_ERROR;
18179
18180     return totalDstSize;
18181 }
18182
18183 /** ZSTD_getDecompressedSize() :
18184  *  compatible with legacy mode
18185  * @return : decompressed size if known, 0 otherwise
18186              note : 0 can mean any of the following :
18187                    - frame content is empty
18188                    - decompressed size field is not present in frame header
18189                    - frame header unknown / not supported
18190                    - frame header not complete (`srcSize` too small) */
18191 unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
18192 {
18193     unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
18194     ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN);
18195     return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret;
18196 }
18197
18198
18199 /** ZSTD_decodeFrameHeader() :
18200  * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
18201  * If multiple DDict references are enabled, also will choose the correct DDict to use.
18202  * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
18203 static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
18204 {
18205     size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
18206     if (ZSTD_isError(result)) return result;    /* invalid header */
18207     RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
18208
18209     /* Reference DDict requested by frame if dctx references multiple ddicts */
18210     if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) {
18211         ZSTD_DCtx_selectFrameDDict(dctx);
18212     }
18213
18214 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
18215     /* Skip the dictID check in fuzzing mode, because it makes the search
18216      * harder.
18217      */
18218     RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
18219                     dictionary_wrong, "");
18220 #endif
18221     dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0;
18222     if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0);
18223     dctx->processedCSize += headerSize;
18224     return 0;
18225 }
18226
18227 static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
18228 {
18229     ZSTD_frameSizeInfo frameSizeInfo;
18230     frameSizeInfo.compressedSize = ret;
18231     frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
18232     return frameSizeInfo;
18233 }
18234
18235 static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format)
18236 {
18237     ZSTD_frameSizeInfo frameSizeInfo;
18238     ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
18239
18240 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
18241     if (format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize))
18242         return ZSTD_findFrameSizeInfoLegacy(src, srcSize);
18243 #endif
18244
18245     if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
18246         && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
18247         frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
18248         assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
18249                frameSizeInfo.compressedSize <= srcSize);
18250         return frameSizeInfo;
18251     } else {
18252         const BYTE* ip = (const BYTE*)src;
18253         const BYTE* const ipstart = ip;
18254         size_t remainingSize = srcSize;
18255         size_t nbBlocks = 0;
18256         ZSTD_frameHeader zfh;
18257
18258         /* Extract Frame Header */
18259         {   size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format);
18260             if (ZSTD_isError(ret))
18261                 return ZSTD_errorFrameSizeInfo(ret);
18262             if (ret > 0)
18263                 return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
18264         }
18265
18266         ip += zfh.headerSize;
18267         remainingSize -= zfh.headerSize;
18268
18269         /* Iterate over each block */
18270         while (1) {
18271             blockProperties_t blockProperties;
18272             size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
18273             if (ZSTD_isError(cBlockSize))
18274                 return ZSTD_errorFrameSizeInfo(cBlockSize);
18275
18276             if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
18277                 return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
18278
18279             ip += ZSTD_blockHeaderSize + cBlockSize;
18280             remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
18281             nbBlocks++;
18282
18283             if (blockProperties.lastBlock) break;
18284         }
18285
18286         /* Final frame content checksum */
18287         if (zfh.checksumFlag) {
18288             if (remainingSize < 4)
18289                 return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
18290             ip += 4;
18291         }
18292
18293         frameSizeInfo.nbBlocks = nbBlocks;
18294         frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
18295         frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
18296                                         ? zfh.frameContentSize
18297                                         : (unsigned long long)nbBlocks * zfh.blockSizeMax;
18298         return frameSizeInfo;
18299     }
18300 }
18301
18302 static size_t ZSTD_findFrameCompressedSize_advanced(const void *src, size_t srcSize, ZSTD_format_e format) {
18303     ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format);
18304     return frameSizeInfo.compressedSize;
18305 }
18306
18307 /** ZSTD_findFrameCompressedSize() :
18308  * See docs in zstd.h
18309  * Note: compatible with legacy mode */
18310 size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
18311 {
18312     return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1);
18313 }
18314
18315 /** ZSTD_decompressBound() :
18316  *  compatible with legacy mode
18317  *  `src` must point to the start of a ZSTD frame or a skippeable frame
18318  *  `srcSize` must be at least as large as the frame contained
18319  *  @return : the maximum decompressed size of the compressed source
18320  */
18321 unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
18322 {
18323     unsigned long long bound = 0;
18324     /* Iterate over each frame */
18325     while (srcSize > 0) {
18326         ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
18327         size_t const compressedSize = frameSizeInfo.compressedSize;
18328         unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
18329         if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
18330             return ZSTD_CONTENTSIZE_ERROR;
18331         assert(srcSize >= compressedSize);
18332         src = (const BYTE*)src + compressedSize;
18333         srcSize -= compressedSize;
18334         bound += decompressedBound;
18335     }
18336     return bound;
18337 }
18338
18339 size_t ZSTD_decompressionMargin(void const* src, size_t srcSize)
18340 {
18341     size_t margin = 0;
18342     unsigned maxBlockSize = 0;
18343
18344     /* Iterate over each frame */
18345     while (srcSize > 0) {
18346         ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
18347         size_t const compressedSize = frameSizeInfo.compressedSize;
18348         unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
18349         ZSTD_frameHeader zfh;
18350
18351         FORWARD_IF_ERROR(ZSTD_getFrameHeader(&zfh, src, srcSize), "");
18352         if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
18353             return ERROR(corruption_detected);
18354
18355         if (zfh.frameType == ZSTD_frame) {
18356             /* Add the frame header to our margin */
18357             margin += zfh.headerSize;
18358             /* Add the checksum to our margin */
18359             margin += zfh.checksumFlag ? 4 : 0;
18360             /* Add 3 bytes per block */
18361             margin += 3 * frameSizeInfo.nbBlocks;
18362
18363             /* Compute the max block size */
18364             maxBlockSize = MAX(maxBlockSize, zfh.blockSizeMax);
18365         } else {
18366             assert(zfh.frameType == ZSTD_skippableFrame);
18367             /* Add the entire skippable frame size to our margin. */
18368             margin += compressedSize;
18369         }
18370
18371         assert(srcSize >= compressedSize);
18372         src = (const BYTE*)src + compressedSize;
18373         srcSize -= compressedSize;
18374     }
18375
18376     /* Add the max block size back to the margin. */
18377     margin += maxBlockSize;
18378
18379     return margin;
18380 }
18381
18382 /*-*************************************************************
18383  *   Frame decoding
18384  ***************************************************************/
18385
18386 /** ZSTD_insertBlock() :
18387  *  insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
18388 size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
18389 {
18390     DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
18391     ZSTD_checkContinuity(dctx, blockStart, blockSize);
18392     dctx->previousDstEnd = (const char*)blockStart + blockSize;
18393     return blockSize;
18394 }
18395
18396
18397 static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
18398                           const void* src, size_t srcSize)
18399 {
18400     DEBUGLOG(5, "ZSTD_copyRawBlock");
18401     RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
18402     if (dst == NULL) {
18403         if (srcSize == 0) return 0;
18404         RETURN_ERROR(dstBuffer_null, "");
18405     }
18406     ZSTD_memmove(dst, src, srcSize);
18407     return srcSize;
18408 }
18409
18410 static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
18411                                BYTE b,
18412                                size_t regenSize)
18413 {
18414     RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
18415     if (dst == NULL) {
18416         if (regenSize == 0) return 0;
18417         RETURN_ERROR(dstBuffer_null, "");
18418     }
18419     ZSTD_memset(dst, b, regenSize);
18420     return regenSize;
18421 }
18422
18423 static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
18424 {
18425 #if ZSTD_TRACE
18426     if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) {
18427         ZSTD_Trace trace;
18428         ZSTD_memset(&trace, 0, sizeof(trace));
18429         trace.version = ZSTD_VERSION_NUMBER;
18430         trace.streaming = streaming;
18431         if (dctx->ddict) {
18432             trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict);
18433             trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict);
18434             trace.dictionaryIsCold = dctx->ddictIsCold;
18435         }
18436         trace.uncompressedSize = (size_t)uncompressedSize;
18437         trace.compressedSize = (size_t)compressedSize;
18438         trace.dctx = dctx;
18439         ZSTD_trace_decompress_end(dctx->traceCtx, &trace);
18440     }
18441 #else
18442     (void)dctx;
18443     (void)uncompressedSize;
18444     (void)compressedSize;
18445     (void)streaming;
18446 #endif
18447 }
18448
18449
18450 /*! ZSTD_decompressFrame() :
18451  * @dctx must be properly initialized
18452  *  will update *srcPtr and *srcSizePtr,
18453  *  to make *srcPtr progress by one frame. */
18454 static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
18455                                    void* dst, size_t dstCapacity,
18456                              const void** srcPtr, size_t *srcSizePtr)
18457 {
18458     const BYTE* const istart = (const BYTE*)(*srcPtr);
18459     const BYTE* ip = istart;
18460     BYTE* const ostart = (BYTE*)dst;
18461     BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
18462     BYTE* op = ostart;
18463     size_t remainingSrcSize = *srcSizePtr;
18464
18465     DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);
18466
18467     /* check */
18468     RETURN_ERROR_IF(
18469         remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
18470         srcSize_wrong, "");
18471
18472     /* Frame Header */
18473     {   size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
18474                 ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
18475         if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
18476         RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
18477                         srcSize_wrong, "");
18478         FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , "");
18479         ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
18480     }
18481
18482     /* Shrink the blockSizeMax if enabled */
18483     if (dctx->maxBlockSizeParam != 0)
18484         dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam);
18485
18486     /* Loop on each block */
18487     while (1) {
18488         BYTE* oBlockEnd = oend;
18489         size_t decodedSize;
18490         blockProperties_t blockProperties;
18491         size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
18492         if (ZSTD_isError(cBlockSize)) return cBlockSize;
18493
18494         ip += ZSTD_blockHeaderSize;
18495         remainingSrcSize -= ZSTD_blockHeaderSize;
18496         RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
18497
18498         if (ip >= op && ip < oBlockEnd) {
18499             /* We are decompressing in-place. Limit the output pointer so that we
18500              * don't overwrite the block that we are currently reading. This will
18501              * fail decompression if the input & output pointers aren't spaced
18502              * far enough apart.
18503              *
18504              * This is important to set, even when the pointers are far enough
18505              * apart, because ZSTD_decompressBlock_internal() can decide to store
18506              * literals in the output buffer, after the block it is decompressing.
18507              * Since we don't want anything to overwrite our input, we have to tell
18508              * ZSTD_decompressBlock_internal to never write past ip.
18509              *
18510              * See ZSTD_allocateLiteralsBuffer() for reference.
18511              */
18512             oBlockEnd = op + (ip - op);
18513         }
18514
18515         switch(blockProperties.blockType)
18516         {
18517         case bt_compressed:
18518             assert(dctx->isFrameDecompression == 1);
18519             decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming);
18520             break;
18521         case bt_raw :
18522             /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */
18523             decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
18524             break;
18525         case bt_rle :
18526             decodedSize = ZSTD_setRleBlock(op, (size_t)(oBlockEnd-op), *ip, blockProperties.origSize);
18527             break;
18528         case bt_reserved :
18529         default:
18530             RETURN_ERROR(corruption_detected, "invalid block type");
18531         }
18532         FORWARD_IF_ERROR(decodedSize, "Block decompression failure");
18533         DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned)decodedSize);
18534         if (dctx->validateChecksum) {
18535             XXH64_update(&dctx->xxhState, op, decodedSize);
18536         }
18537         if (decodedSize) /* support dst = NULL,0 */ {
18538             op += decodedSize;
18539         }
18540         assert(ip != NULL);
18541         ip += cBlockSize;
18542         remainingSrcSize -= cBlockSize;
18543         if (blockProperties.lastBlock) break;
18544     }
18545
18546     if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
18547         RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
18548                         corruption_detected, "");
18549     }
18550     if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
18551         RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, "");
18552         if (!dctx->forceIgnoreChecksum) {
18553             U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
18554             U32 checkRead;
18555             checkRead = MEM_readLE32(ip);
18556             RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
18557         }
18558         ip += 4;
18559         remainingSrcSize -= 4;
18560     }
18561     ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
18562     /* Allow caller to get size read */
18563     DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %zi, consuming %zi bytes of input", op-ostart, ip - (const BYTE*)*srcPtr);
18564     *srcPtr = ip;
18565     *srcSizePtr = remainingSrcSize;
18566     return (size_t)(op-ostart);
18567 }
18568
18569 static
18570 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
18571 size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
18572                                         void* dst, size_t dstCapacity,
18573                                   const void* src, size_t srcSize,
18574                                   const void* dict, size_t dictSize,
18575                                   const ZSTD_DDict* ddict)
18576 {
18577     void* const dststart = dst;
18578     int moreThan1Frame = 0;
18579
18580     DEBUGLOG(5, "ZSTD_decompressMultiFrame");
18581     assert(dict==NULL || ddict==NULL);  /* either dict or ddict set, not both */
18582
18583     if (ddict) {
18584         dict = ZSTD_DDict_dictContent(ddict);
18585         dictSize = ZSTD_DDict_dictSize(ddict);
18586     }
18587
18588     while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
18589
18590 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
18591         if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) {
18592             size_t decodedSize;
18593             size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
18594             if (ZSTD_isError(frameSize)) return frameSize;
18595             RETURN_ERROR_IF(dctx->staticSize, memory_allocation,
18596                 "legacy support is not compatible with static dctx");
18597
18598             decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
18599             if (ZSTD_isError(decodedSize)) return decodedSize;
18600
18601             {
18602                 unsigned long long const expectedSize = ZSTD_getFrameContentSize(src, srcSize);
18603                 RETURN_ERROR_IF(expectedSize == ZSTD_CONTENTSIZE_ERROR, corruption_detected, "Corrupted frame header!");
18604                 if (expectedSize != ZSTD_CONTENTSIZE_UNKNOWN) {
18605                     RETURN_ERROR_IF(expectedSize != decodedSize, corruption_detected,
18606                         "Frame header size does not match decoded size!");
18607                 }
18608             }
18609
18610             assert(decodedSize <= dstCapacity);
18611             dst = (BYTE*)dst + decodedSize;
18612             dstCapacity -= decodedSize;
18613
18614             src = (const BYTE*)src + frameSize;
18615             srcSize -= frameSize;
18616
18617             continue;
18618         }
18619 #endif
18620
18621         if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) {
18622             U32 const magicNumber = MEM_readLE32(src);
18623             DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
18624             if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
18625                 /* skippable frame detected : skip it */
18626                 size_t const skippableSize = readSkippableFrameSize(src, srcSize);
18627                 FORWARD_IF_ERROR(skippableSize, "invalid skippable frame");
18628                 assert(skippableSize <= srcSize);
18629
18630                 src = (const BYTE *)src + skippableSize;
18631                 srcSize -= skippableSize;
18632                 continue; /* check next frame */
18633         }   }
18634
18635         if (ddict) {
18636             /* we were called from ZSTD_decompress_usingDDict */
18637             FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), "");
18638         } else {
18639             /* this will initialize correctly with no dict if dict == NULL, so
18640              * use this in all cases but ddict */
18641             FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
18642         }
18643         ZSTD_checkContinuity(dctx, dst, dstCapacity);
18644
18645         {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
18646                                                     &src, &srcSize);
18647             RETURN_ERROR_IF(
18648                 (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
18649              && (moreThan1Frame==1),
18650                 srcSize_wrong,
18651                 "At least one frame successfully completed, "
18652                 "but following bytes are garbage: "
18653                 "it's more likely to be a srcSize error, "
18654                 "specifying more input bytes than size of frame(s). "
18655                 "Note: one could be unlucky, it might be a corruption error instead, "
18656                 "happening right at the place where we expect zstd magic bytes. "
18657                 "But this is _much_ less likely than a srcSize field error.");
18658             if (ZSTD_isError(res)) return res;
18659             assert(res <= dstCapacity);
18660             if (res != 0)
18661                 dst = (BYTE*)dst + res;
18662             dstCapacity -= res;
18663         }
18664         moreThan1Frame = 1;
18665     }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
18666
18667     RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
18668
18669     return (size_t)((BYTE*)dst - (BYTE*)dststart);
18670 }
18671
18672 size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
18673                                  void* dst, size_t dstCapacity,
18674                            const void* src, size_t srcSize,
18675                            const void* dict, size_t dictSize)
18676 {
18677     return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
18678 }
18679
18680
18681 static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
18682 {
18683     switch (dctx->dictUses) {
18684     default:
18685         assert(0 /* Impossible */);
18686         ZSTD_FALLTHROUGH;
18687     case ZSTD_dont_use:
18688         ZSTD_clearDict(dctx);
18689         return NULL;
18690     case ZSTD_use_indefinitely:
18691         return dctx->ddict;
18692     case ZSTD_use_once:
18693         dctx->dictUses = ZSTD_dont_use;
18694         return dctx->ddict;
18695     }
18696 }
18697
18698 size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
18699 {
18700     return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx));
18701 }
18702
18703
18704 size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
18705 {
18706 #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
18707     size_t regenSize;
18708     ZSTD_DCtx* const dctx =  ZSTD_createDCtx_internal(ZSTD_defaultCMem);
18709     RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
18710     regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
18711     ZSTD_freeDCtx(dctx);
18712     return regenSize;
18713 #else   /* stack mode */
18714     ZSTD_DCtx dctx;
18715     ZSTD_initDCtx_internal(&dctx);
18716     return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
18717 #endif
18718 }
18719
18720
18721 /*-**************************************
18722 *   Advanced Streaming Decompression API
18723 *   Bufferless and synchronous
18724 ****************************************/
18725 size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
18726
18727 /**
18728  * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, we
18729  * allow taking a partial block as the input. Currently only raw uncompressed blocks can
18730  * be streamed.
18731  *
18732  * For blocks that can be streamed, this allows us to reduce the latency until we produce
18733  * output, and avoid copying the input.
18734  *
18735  * @param inputSize - The total amount of input that the caller currently has.
18736  */
18737 static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) {
18738     if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock))
18739         return dctx->expected;
18740     if (dctx->bType != bt_raw)
18741         return dctx->expected;
18742     return BOUNDED(1, inputSize, dctx->expected);
18743 }
18744
18745 ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
18746     switch(dctx->stage)
18747     {
18748     default:   /* should not happen */
18749         assert(0);
18750         ZSTD_FALLTHROUGH;
18751     case ZSTDds_getFrameHeaderSize:
18752         ZSTD_FALLTHROUGH;
18753     case ZSTDds_decodeFrameHeader:
18754         return ZSTDnit_frameHeader;
18755     case ZSTDds_decodeBlockHeader:
18756         return ZSTDnit_blockHeader;
18757     case ZSTDds_decompressBlock:
18758         return ZSTDnit_block;
18759     case ZSTDds_decompressLastBlock:
18760         return ZSTDnit_lastBlock;
18761     case ZSTDds_checkChecksum:
18762         return ZSTDnit_checksum;
18763     case ZSTDds_decodeSkippableHeader:
18764         ZSTD_FALLTHROUGH;
18765     case ZSTDds_skipFrame:
18766         return ZSTDnit_skippableFrame;
18767     }
18768 }
18769
18770 static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
18771
18772 /** ZSTD_decompressContinue() :
18773  *  srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
18774  *  @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
18775  *            or an error code, which can be tested using ZSTD_isError() */
18776 size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
18777 {
18778     DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
18779     /* Sanity check */
18780     RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed");
18781     ZSTD_checkContinuity(dctx, dst, dstCapacity);
18782
18783     dctx->processedCSize += srcSize;
18784
18785     switch (dctx->stage)
18786     {
18787     case ZSTDds_getFrameHeaderSize :
18788         assert(src != NULL);
18789         if (dctx->format == ZSTD_f_zstd1) {  /* allows header */
18790             assert(srcSize >= ZSTD_FRAMEIDSIZE);  /* to read skippable magic number */
18791             if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {        /* skippable frame */
18792                 ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
18793                 dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize;  /* remaining to load to get full skippable frame header */
18794                 dctx->stage = ZSTDds_decodeSkippableHeader;
18795                 return 0;
18796         }   }
18797         dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
18798         if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
18799         ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
18800         dctx->expected = dctx->headerSize - srcSize;
18801         dctx->stage = ZSTDds_decodeFrameHeader;
18802         return 0;
18803
18804     case ZSTDds_decodeFrameHeader:
18805         assert(src != NULL);
18806         ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
18807         FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
18808         dctx->expected = ZSTD_blockHeaderSize;
18809         dctx->stage = ZSTDds_decodeBlockHeader;
18810         return 0;
18811
18812     case ZSTDds_decodeBlockHeader:
18813         {   blockProperties_t bp;
18814             size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
18815             if (ZSTD_isError(cBlockSize)) return cBlockSize;
18816             RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
18817             dctx->expected = cBlockSize;
18818             dctx->bType = bp.blockType;
18819             dctx->rleSize = bp.origSize;
18820             if (cBlockSize) {
18821                 dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
18822                 return 0;
18823             }
18824             /* empty block */
18825             if (bp.lastBlock) {
18826                 if (dctx->fParams.checksumFlag) {
18827                     dctx->expected = 4;
18828                     dctx->stage = ZSTDds_checkChecksum;
18829                 } else {
18830                     dctx->expected = 0; /* end of frame */
18831                     dctx->stage = ZSTDds_getFrameHeaderSize;
18832                 }
18833             } else {
18834                 dctx->expected = ZSTD_blockHeaderSize;  /* jump to next header */
18835                 dctx->stage = ZSTDds_decodeBlockHeader;
18836             }
18837             return 0;
18838         }
18839
18840     case ZSTDds_decompressLastBlock:
18841     case ZSTDds_decompressBlock:
18842         DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
18843         {   size_t rSize;
18844             switch(dctx->bType)
18845             {
18846             case bt_compressed:
18847                 DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
18848                 assert(dctx->isFrameDecompression == 1);
18849                 rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming);
18850                 dctx->expected = 0;  /* Streaming not supported */
18851                 break;
18852             case bt_raw :
18853                 assert(srcSize <= dctx->expected);
18854                 rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
18855                 FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed");
18856                 assert(rSize == srcSize);
18857                 dctx->expected -= rSize;
18858                 break;
18859             case bt_rle :
18860                 rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize);
18861                 dctx->expected = 0;  /* Streaming not supported */
18862                 break;
18863             case bt_reserved :   /* should never happen */
18864             default:
18865                 RETURN_ERROR(corruption_detected, "invalid block type");
18866             }
18867             FORWARD_IF_ERROR(rSize, "");
18868             RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
18869             DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
18870             dctx->decodedSize += rSize;
18871             if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize);
18872             dctx->previousDstEnd = (char*)dst + rSize;
18873
18874             /* Stay on the same stage until we are finished streaming the block. */
18875             if (dctx->expected > 0) {
18876                 return rSize;
18877             }
18878
18879             if (dctx->stage == ZSTDds_decompressLastBlock) {   /* end of frame */
18880                 DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
18881                 RETURN_ERROR_IF(
18882                     dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
18883                  && dctx->decodedSize != dctx->fParams.frameContentSize,
18884                     corruption_detected, "");
18885                 if (dctx->fParams.checksumFlag) {  /* another round for frame checksum */
18886                     dctx->expected = 4;
18887                     dctx->stage = ZSTDds_checkChecksum;
18888                 } else {
18889                     ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
18890                     dctx->expected = 0;   /* ends here */
18891                     dctx->stage = ZSTDds_getFrameHeaderSize;
18892                 }
18893             } else {
18894                 dctx->stage = ZSTDds_decodeBlockHeader;
18895                 dctx->expected = ZSTD_blockHeaderSize;
18896             }
18897             return rSize;
18898         }
18899
18900     case ZSTDds_checkChecksum:
18901         assert(srcSize == 4);  /* guaranteed by dctx->expected */
18902         {
18903             if (dctx->validateChecksum) {
18904                 U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
18905                 U32 const check32 = MEM_readLE32(src);
18906                 DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
18907                 RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
18908             }
18909             ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
18910             dctx->expected = 0;
18911             dctx->stage = ZSTDds_getFrameHeaderSize;
18912             return 0;
18913         }
18914
18915     case ZSTDds_decodeSkippableHeader:
18916         assert(src != NULL);
18917         assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
18918         assert(dctx->format != ZSTD_f_zstd1_magicless);
18919         ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
18920         dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
18921         dctx->stage = ZSTDds_skipFrame;
18922         return 0;
18923
18924     case ZSTDds_skipFrame:
18925         dctx->expected = 0;
18926         dctx->stage = ZSTDds_getFrameHeaderSize;
18927         return 0;
18928
18929     default:
18930         assert(0);   /* impossible */
18931         RETURN_ERROR(GENERIC, "impossible to reach");   /* some compilers require default to do something */
18932     }
18933 }
18934
18935
18936 static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
18937 {
18938     dctx->dictEnd = dctx->previousDstEnd;
18939     dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
18940     dctx->prefixStart = dict;
18941     dctx->previousDstEnd = (const char*)dict + dictSize;
18942 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
18943     dctx->dictContentBeginForFuzzing = dctx->prefixStart;
18944     dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
18945 #endif
18946     return 0;
18947 }
18948
18949 /*! ZSTD_loadDEntropy() :
18950  *  dict : must point at beginning of a valid zstd dictionary.
18951  * @return : size of entropy tables read */
18952 size_t
18953 ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
18954                   const void* const dict, size_t const dictSize)
18955 {
18956     const BYTE* dictPtr = (const BYTE*)dict;
18957     const BYTE* const dictEnd = dictPtr + dictSize;
18958
18959     RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small");
18960     assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY);   /* dict must be valid */
18961     dictPtr += 8;   /* skip header = magic + dictID */
18962
18963     ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
18964     ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
18965     ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
18966     {   void* const workspace = &entropy->LLTable;   /* use fse tables as temporary workspace; implies fse tables are grouped together */
18967         size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
18968 #ifdef HUF_FORCE_DECOMPRESS_X1
18969         /* in minimal huffman, we always use X1 variants */
18970         size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
18971                                                 dictPtr, dictEnd - dictPtr,
18972                                                 workspace, workspaceSize, /* flags */ 0);
18973 #else
18974         size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
18975                                                 dictPtr, (size_t)(dictEnd - dictPtr),
18976                                                 workspace, workspaceSize, /* flags */ 0);
18977 #endif
18978         RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
18979         dictPtr += hSize;
18980     }
18981
18982     {   short offcodeNCount[MaxOff+1];
18983         unsigned offcodeMaxValue = MaxOff, offcodeLog;
18984         size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr));
18985         RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
18986         RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
18987         RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
18988         ZSTD_buildFSETable( entropy->OFTable,
18989                             offcodeNCount, offcodeMaxValue,
18990                             OF_base, OF_bits,
18991                             offcodeLog,
18992                             entropy->workspace, sizeof(entropy->workspace),
18993                             /* bmi2 */0);
18994         dictPtr += offcodeHeaderSize;
18995     }
18996
18997     {   short matchlengthNCount[MaxML+1];
18998         unsigned matchlengthMaxValue = MaxML, matchlengthLog;
18999         size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
19000         RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
19001         RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
19002         RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
19003         ZSTD_buildFSETable( entropy->MLTable,
19004                             matchlengthNCount, matchlengthMaxValue,
19005                             ML_base, ML_bits,
19006                             matchlengthLog,
19007                             entropy->workspace, sizeof(entropy->workspace),
19008                             /* bmi2 */ 0);
19009         dictPtr += matchlengthHeaderSize;
19010     }
19011
19012     {   short litlengthNCount[MaxLL+1];
19013         unsigned litlengthMaxValue = MaxLL, litlengthLog;
19014         size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
19015         RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
19016         RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
19017         RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
19018         ZSTD_buildFSETable( entropy->LLTable,
19019                             litlengthNCount, litlengthMaxValue,
19020                             LL_base, LL_bits,
19021                             litlengthLog,
19022                             entropy->workspace, sizeof(entropy->workspace),
19023                             /* bmi2 */ 0);
19024         dictPtr += litlengthHeaderSize;
19025     }
19026
19027     RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
19028     {   int i;
19029         size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
19030         for (i=0; i<3; i++) {
19031             U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
19032             RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
19033                             dictionary_corrupted, "");
19034             entropy->rep[i] = rep;
19035     }   }
19036
19037     return (size_t)(dictPtr - (const BYTE*)dict);
19038 }
19039
19040 static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
19041 {
19042     if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
19043     {   U32 const magic = MEM_readLE32(dict);
19044         if (magic != ZSTD_MAGIC_DICTIONARY) {
19045             return ZSTD_refDictContent(dctx, dict, dictSize);   /* pure content mode */
19046     }   }
19047     dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
19048
19049     /* load entropy tables */
19050     {   size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
19051         RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, "");
19052         dict = (const char*)dict + eSize;
19053         dictSize -= eSize;
19054     }
19055     dctx->litEntropy = dctx->fseEntropy = 1;
19056
19057     /* reference dictionary content */
19058     return ZSTD_refDictContent(dctx, dict, dictSize);
19059 }
19060
19061 size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
19062 {
19063     assert(dctx != NULL);
19064 #if ZSTD_TRACE
19065     dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0;
19066 #endif
19067     dctx->expected = ZSTD_startingInputLength(dctx->format);  /* dctx->format must be properly set */
19068     dctx->stage = ZSTDds_getFrameHeaderSize;
19069     dctx->processedCSize = 0;
19070     dctx->decodedSize = 0;
19071     dctx->previousDstEnd = NULL;
19072     dctx->prefixStart = NULL;
19073     dctx->virtualStart = NULL;
19074     dctx->dictEnd = NULL;
19075     dctx->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001);  /* cover both little and big endian */
19076     dctx->litEntropy = dctx->fseEntropy = 0;
19077     dctx->dictID = 0;
19078     dctx->bType = bt_reserved;
19079     dctx->isFrameDecompression = 1;
19080     ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
19081     ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
19082     dctx->LLTptr = dctx->entropy.LLTable;
19083     dctx->MLTptr = dctx->entropy.MLTable;
19084     dctx->OFTptr = dctx->entropy.OFTable;
19085     dctx->HUFptr = dctx->entropy.hufTable;
19086     return 0;
19087 }
19088
19089 size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
19090 {
19091     FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
19092     if (dict && dictSize)
19093         RETURN_ERROR_IF(
19094             ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
19095             dictionary_corrupted, "");
19096     return 0;
19097 }
19098
19099
19100 /* ======   ZSTD_DDict   ====== */
19101
19102 size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
19103 {
19104     DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
19105     assert(dctx != NULL);
19106     if (ddict) {
19107         const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict);
19108         size_t const dictSize = ZSTD_DDict_dictSize(ddict);
19109         const void* const dictEnd = dictStart + dictSize;
19110         dctx->ddictIsCold = (dctx->dictEnd != dictEnd);
19111         DEBUGLOG(4, "DDict is %s",
19112                     dctx->ddictIsCold ? "~cold~" : "hot!");
19113     }
19114     FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
19115     if (ddict) {   /* NULL ddict is equivalent to no dictionary */
19116         ZSTD_copyDDictParameters(dctx, ddict);
19117     }
19118     return 0;
19119 }
19120
19121 /*! ZSTD_getDictID_fromDict() :
19122  *  Provides the dictID stored within dictionary.
19123  *  if @return == 0, the dictionary is not conformant with Zstandard specification.
19124  *  It can still be loaded, but as a content-only dictionary. */
19125 unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
19126 {
19127     if (dictSize < 8) return 0;
19128     if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
19129     return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
19130 }
19131
19132 /*! ZSTD_getDictID_fromFrame() :
19133  *  Provides the dictID required to decompress frame stored within `src`.
19134  *  If @return == 0, the dictID could not be decoded.
19135  *  This could for one of the following reasons :
19136  *  - The frame does not require a dictionary (most common case).
19137  *  - The frame was built with dictID intentionally removed.
19138  *    Needed dictionary is a hidden piece of information.
19139  *    Note : this use case also happens when using a non-conformant dictionary.
19140  *  - `srcSize` is too small, and as a result, frame header could not be decoded.
19141  *    Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
19142  *  - This is not a Zstandard frame.
19143  *  When identifying the exact failure cause, it's possible to use
19144  *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
19145 unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
19146 {
19147     ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 };
19148     size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
19149     if (ZSTD_isError(hError)) return 0;
19150     return zfp.dictID;
19151 }
19152
19153
19154 /*! ZSTD_decompress_usingDDict() :
19155 *   Decompression using a pre-digested Dictionary
19156 *   Use dictionary without significant overhead. */
19157 size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
19158                                   void* dst, size_t dstCapacity,
19159                             const void* src, size_t srcSize,
19160                             const ZSTD_DDict* ddict)
19161 {
19162     /* pass content and size in case legacy frames are encountered */
19163     return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
19164                                      NULL, 0,
19165                                      ddict);
19166 }
19167
19168
19169 /*=====================================
19170 *   Streaming decompression
19171 *====================================*/
19172
19173 ZSTD_DStream* ZSTD_createDStream(void)
19174 {
19175     DEBUGLOG(3, "ZSTD_createDStream");
19176     return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
19177 }
19178
19179 ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
19180 {
19181     return ZSTD_initStaticDCtx(workspace, workspaceSize);
19182 }
19183
19184 ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
19185 {
19186     return ZSTD_createDCtx_internal(customMem);
19187 }
19188
19189 size_t ZSTD_freeDStream(ZSTD_DStream* zds)
19190 {
19191     return ZSTD_freeDCtx(zds);
19192 }
19193
19194
19195 /* ***  Initialization  *** */
19196
19197 size_t ZSTD_DStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
19198 size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
19199
19200 size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
19201                                    const void* dict, size_t dictSize,
19202                                          ZSTD_dictLoadMethod_e dictLoadMethod,
19203                                          ZSTD_dictContentType_e dictContentType)
19204 {
19205     RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
19206     ZSTD_clearDict(dctx);
19207     if (dict && dictSize != 0) {
19208         dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
19209         RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!");
19210         dctx->ddict = dctx->ddictLocal;
19211         dctx->dictUses = ZSTD_use_indefinitely;
19212     }
19213     return 0;
19214 }
19215
19216 size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
19217 {
19218     return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
19219 }
19220
19221 size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
19222 {
19223     return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
19224 }
19225
19226 size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
19227 {
19228     FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), "");
19229     dctx->dictUses = ZSTD_use_once;
19230     return 0;
19231 }
19232
19233 size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
19234 {
19235     return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
19236 }
19237
19238
19239 /* ZSTD_initDStream_usingDict() :
19240  * return : expected size, aka ZSTD_startingInputLength().
19241  * this function cannot fail */
19242 size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
19243 {
19244     DEBUGLOG(4, "ZSTD_initDStream_usingDict");
19245     FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , "");
19246     FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , "");
19247     return ZSTD_startingInputLength(zds->format);
19248 }
19249
19250 /* note : this variant can't fail */
19251 size_t ZSTD_initDStream(ZSTD_DStream* zds)
19252 {
19253     DEBUGLOG(4, "ZSTD_initDStream");
19254     FORWARD_IF_ERROR(ZSTD_DCtx_reset(zds, ZSTD_reset_session_only), "");
19255     FORWARD_IF_ERROR(ZSTD_DCtx_refDDict(zds, NULL), "");
19256     return ZSTD_startingInputLength(zds->format);
19257 }
19258
19259 /* ZSTD_initDStream_usingDDict() :
19260  * ddict will just be referenced, and must outlive decompression session
19261  * this function cannot fail */
19262 size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
19263 {
19264     DEBUGLOG(4, "ZSTD_initDStream_usingDDict");
19265     FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
19266     FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
19267     return ZSTD_startingInputLength(dctx->format);
19268 }
19269
19270 /* ZSTD_resetDStream() :
19271  * return : expected size, aka ZSTD_startingInputLength().
19272  * this function cannot fail */
19273 size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
19274 {
19275     DEBUGLOG(4, "ZSTD_resetDStream");
19276     FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
19277     return ZSTD_startingInputLength(dctx->format);
19278 }
19279
19280
19281 size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
19282 {
19283     RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
19284     ZSTD_clearDict(dctx);
19285     if (ddict) {
19286         dctx->ddict = ddict;
19287         dctx->dictUses = ZSTD_use_indefinitely;
19288         if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) {
19289             if (dctx->ddictSet == NULL) {
19290                 dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
19291                 if (!dctx->ddictSet) {
19292                     RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
19293                 }
19294             }
19295             assert(!dctx->staticSize);  /* Impossible: ddictSet cannot have been allocated if static dctx */
19296             FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), "");
19297         }
19298     }
19299     return 0;
19300 }
19301
19302 /* ZSTD_DCtx_setMaxWindowSize() :
19303  * note : no direct equivalence in ZSTD_DCtx_setParameter,
19304  * since this version sets windowSize, and the other sets windowLog */
19305 size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
19306 {
19307     ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
19308     size_t const min = (size_t)1 << bounds.lowerBound;
19309     size_t const max = (size_t)1 << bounds.upperBound;
19310     RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
19311     RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, "");
19312     RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, "");
19313     dctx->maxWindowSize = maxWindowSize;
19314     return 0;
19315 }
19316
19317 size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
19318 {
19319     return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format);
19320 }
19321
19322 ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
19323 {
19324     ZSTD_bounds bounds = { 0, 0, 0 };
19325     switch(dParam) {
19326         case ZSTD_d_windowLogMax:
19327             bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN;
19328             bounds.upperBound = ZSTD_WINDOWLOG_MAX;
19329             return bounds;
19330         case ZSTD_d_format:
19331             bounds.lowerBound = (int)ZSTD_f_zstd1;
19332             bounds.upperBound = (int)ZSTD_f_zstd1_magicless;
19333             ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
19334             return bounds;
19335         case ZSTD_d_stableOutBuffer:
19336             bounds.lowerBound = (int)ZSTD_bm_buffered;
19337             bounds.upperBound = (int)ZSTD_bm_stable;
19338             return bounds;
19339         case ZSTD_d_forceIgnoreChecksum:
19340             bounds.lowerBound = (int)ZSTD_d_validateChecksum;
19341             bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
19342             return bounds;
19343         case ZSTD_d_refMultipleDDicts:
19344             bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
19345             bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
19346             return bounds;
19347         case ZSTD_d_disableHuffmanAssembly:
19348             bounds.lowerBound = 0;
19349             bounds.upperBound = 1;
19350             return bounds;
19351         case ZSTD_d_maxBlockSize:
19352             bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
19353             bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
19354             return bounds;
19355
19356         default:;
19357     }
19358     bounds.error = ERROR(parameter_unsupported);
19359     return bounds;
19360 }
19361
19362 /* ZSTD_dParam_withinBounds:
19363  * @return 1 if value is within dParam bounds,
19364  * 0 otherwise */
19365 static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
19366 {
19367     ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam);
19368     if (ZSTD_isError(bounds.error)) return 0;
19369     if (value < bounds.lowerBound) return 0;
19370     if (value > bounds.upperBound) return 0;
19371     return 1;
19372 }
19373
19374 #define CHECK_DBOUNDS(p,v) {                \
19375     RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
19376 }
19377
19378 size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value)
19379 {
19380     switch (param) {
19381         case ZSTD_d_windowLogMax:
19382             *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize);
19383             return 0;
19384         case ZSTD_d_format:
19385             *value = (int)dctx->format;
19386             return 0;
19387         case ZSTD_d_stableOutBuffer:
19388             *value = (int)dctx->outBufferMode;
19389             return 0;
19390         case ZSTD_d_forceIgnoreChecksum:
19391             *value = (int)dctx->forceIgnoreChecksum;
19392             return 0;
19393         case ZSTD_d_refMultipleDDicts:
19394             *value = (int)dctx->refMultipleDDicts;
19395             return 0;
19396         case ZSTD_d_disableHuffmanAssembly:
19397             *value = (int)dctx->disableHufAsm;
19398             return 0;
19399         case ZSTD_d_maxBlockSize:
19400             *value = dctx->maxBlockSizeParam;
19401             return 0;
19402         default:;
19403     }
19404     RETURN_ERROR(parameter_unsupported, "");
19405 }
19406
19407 size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
19408 {
19409     RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
19410     switch(dParam) {
19411         case ZSTD_d_windowLogMax:
19412             if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
19413             CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
19414             dctx->maxWindowSize = ((size_t)1) << value;
19415             return 0;
19416         case ZSTD_d_format:
19417             CHECK_DBOUNDS(ZSTD_d_format, value);
19418             dctx->format = (ZSTD_format_e)value;
19419             return 0;
19420         case ZSTD_d_stableOutBuffer:
19421             CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
19422             dctx->outBufferMode = (ZSTD_bufferMode_e)value;
19423             return 0;
19424         case ZSTD_d_forceIgnoreChecksum:
19425             CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
19426             dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value;
19427             return 0;
19428         case ZSTD_d_refMultipleDDicts:
19429             CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
19430             if (dctx->staticSize != 0) {
19431                 RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
19432             }
19433             dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
19434             return 0;
19435         case ZSTD_d_disableHuffmanAssembly:
19436             CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
19437             dctx->disableHufAsm = value != 0;
19438             return 0;
19439         case ZSTD_d_maxBlockSize:
19440             if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value);
19441             dctx->maxBlockSizeParam = value;
19442             return 0;
19443         default:;
19444     }
19445     RETURN_ERROR(parameter_unsupported, "");
19446 }
19447
19448 size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
19449 {
19450     if ( (reset == ZSTD_reset_session_only)
19451       || (reset == ZSTD_reset_session_and_parameters) ) {
19452         dctx->streamStage = zdss_init;
19453         dctx->noForwardProgress = 0;
19454         dctx->isFrameDecompression = 1;
19455     }
19456     if ( (reset == ZSTD_reset_parameters)
19457       || (reset == ZSTD_reset_session_and_parameters) ) {
19458         RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
19459         ZSTD_clearDict(dctx);
19460         ZSTD_DCtx_resetParameters(dctx);
19461     }
19462     return 0;
19463 }
19464
19465
19466 size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
19467 {
19468     return ZSTD_sizeof_DCtx(dctx);
19469 }
19470
19471 static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax)
19472 {
19473     size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax);
19474     /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block
19475      * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing
19476      * the block at the beginning of the output buffer, and maintain a full window.
19477      *
19478      * We need another blockSize worth of buffer so that we can store split
19479      * literals at the end of the block without overwriting the extDict window.
19480      */
19481     unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2);
19482     unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
19483     size_t const minRBSize = (size_t) neededSize;
19484     RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
19485                     frameParameter_windowTooLarge, "");
19486     return minRBSize;
19487 }
19488
19489 size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
19490 {
19491     return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX);
19492 }
19493
19494 size_t ZSTD_estimateDStreamSize(size_t windowSize)
19495 {
19496     size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
19497     size_t const inBuffSize = blockSize;  /* no block can be larger */
19498     size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN);
19499     return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
19500 }
19501
19502 size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
19503 {
19504     U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;   /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
19505     ZSTD_frameHeader zfh;
19506     size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
19507     if (ZSTD_isError(err)) return err;
19508     RETURN_ERROR_IF(err>0, srcSize_wrong, "");
19509     RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
19510                     frameParameter_windowTooLarge, "");
19511     return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
19512 }
19513
19514
19515 /* *****   Decompression   ***** */
19516
19517 static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
19518 {
19519     return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR;
19520 }
19521
19522 static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
19523 {
19524     if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
19525         zds->oversizedDuration++;
19526     else
19527         zds->oversizedDuration = 0;
19528 }
19529
19530 static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds)
19531 {
19532     return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION;
19533 }
19534
19535 /* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */
19536 static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output)
19537 {
19538     ZSTD_outBuffer const expect = zds->expectedOutBuffer;
19539     /* No requirement when ZSTD_obm_stable is not enabled. */
19540     if (zds->outBufferMode != ZSTD_bm_stable)
19541         return 0;
19542     /* Any buffer is allowed in zdss_init, this must be the same for every other call until
19543      * the context is reset.
19544      */
19545     if (zds->streamStage == zdss_init)
19546         return 0;
19547     /* The buffer must match our expectation exactly. */
19548     if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
19549         return 0;
19550     RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!");
19551 }
19552
19553 /* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
19554  * and updates the stage and the output buffer state. This call is extracted so it can be
19555  * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode.
19556  * NOTE: You must break after calling this function since the streamStage is modified.
19557  */
19558 static size_t ZSTD_decompressContinueStream(
19559             ZSTD_DStream* zds, char** op, char* oend,
19560             void const* src, size_t srcSize) {
19561     int const isSkipFrame = ZSTD_isSkipFrame(zds);
19562     if (zds->outBufferMode == ZSTD_bm_buffered) {
19563         size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
19564         size_t const decodedSize = ZSTD_decompressContinue(zds,
19565                 zds->outBuff + zds->outStart, dstSize, src, srcSize);
19566         FORWARD_IF_ERROR(decodedSize, "");
19567         if (!decodedSize && !isSkipFrame) {
19568             zds->streamStage = zdss_read;
19569         } else {
19570             zds->outEnd = zds->outStart + decodedSize;
19571             zds->streamStage = zdss_flush;
19572         }
19573     } else {
19574         /* Write directly into the output buffer */
19575         size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op);
19576         size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
19577         FORWARD_IF_ERROR(decodedSize, "");
19578         *op += decodedSize;
19579         /* Flushing is not needed. */
19580         zds->streamStage = zdss_read;
19581         assert(*op <= oend);
19582         assert(zds->outBufferMode == ZSTD_bm_stable);
19583     }
19584     return 0;
19585 }
19586
19587 size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
19588 {
19589     const char* const src = (const char*)input->src;
19590     const char* const istart = input->pos != 0 ? src + input->pos : src;
19591     const char* const iend = input->size != 0 ? src + input->size : src;
19592     const char* ip = istart;
19593     char* const dst = (char*)output->dst;
19594     char* const ostart = output->pos != 0 ? dst + output->pos : dst;
19595     char* const oend = output->size != 0 ? dst + output->size : dst;
19596     char* op = ostart;
19597     U32 someMoreWork = 1;
19598
19599     DEBUGLOG(5, "ZSTD_decompressStream");
19600     RETURN_ERROR_IF(
19601         input->pos > input->size,
19602         srcSize_wrong,
19603         "forbidden. in: pos: %u   vs size: %u",
19604         (U32)input->pos, (U32)input->size);
19605     RETURN_ERROR_IF(
19606         output->pos > output->size,
19607         dstSize_tooSmall,
19608         "forbidden. out: pos: %u   vs size: %u",
19609         (U32)output->pos, (U32)output->size);
19610     DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
19611     FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), "");
19612
19613     while (someMoreWork) {
19614         switch(zds->streamStage)
19615         {
19616         case zdss_init :
19617             DEBUGLOG(5, "stage zdss_init => transparent reset ");
19618             zds->streamStage = zdss_loadHeader;
19619             zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
19620 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
19621             zds->legacyVersion = 0;
19622 #endif
19623             zds->hostageByte = 0;
19624             zds->expectedOutBuffer = *output;
19625             ZSTD_FALLTHROUGH;
19626
19627         case zdss_loadHeader :
19628             DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
19629 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
19630             if (zds->legacyVersion) {
19631                 RETURN_ERROR_IF(zds->staticSize, memory_allocation,
19632                     "legacy support is incompatible with static dctx");
19633                 {   size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
19634                     if (hint==0) zds->streamStage = zdss_init;
19635                     return hint;
19636             }   }
19637 #endif
19638             {   size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
19639                 if (zds->refMultipleDDicts && zds->ddictSet) {
19640                     ZSTD_DCtx_selectFrameDDict(zds);
19641                 }
19642                 if (ZSTD_isError(hSize)) {
19643 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
19644                     U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
19645                     if (legacyVersion) {
19646                         ZSTD_DDict const* const ddict = ZSTD_getDDict(zds);
19647                         const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL;
19648                         size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0;
19649                         DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
19650                         RETURN_ERROR_IF(zds->staticSize, memory_allocation,
19651                             "legacy support is incompatible with static dctx");
19652                         FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext,
19653                                     zds->previousLegacyVersion, legacyVersion,
19654                                     dict, dictSize), "");
19655                         zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
19656                         {   size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
19657                             if (hint==0) zds->streamStage = zdss_init;   /* or stay in stage zdss_loadHeader */
19658                             return hint;
19659                     }   }
19660 #endif
19661                     return hSize;   /* error */
19662                 }
19663                 if (hSize != 0) {   /* need more input */
19664                     size_t const toLoad = hSize - zds->lhSize;   /* if hSize!=0, hSize > zds->lhSize */
19665                     size_t const remainingInput = (size_t)(iend-ip);
19666                     assert(iend >= ip);
19667                     if (toLoad > remainingInput) {   /* not enough input to load full header */
19668                         if (remainingInput > 0) {
19669                             ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
19670                             zds->lhSize += remainingInput;
19671                         }
19672                         input->pos = input->size;
19673                         /* check first few bytes */
19674                         FORWARD_IF_ERROR(
19675                             ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format),
19676                             "First few bytes detected incorrect" );
19677                         /* return hint input size */
19678                         return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
19679                     }
19680                     assert(ip != NULL);
19681                     ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
19682                     break;
19683             }   }
19684
19685             /* check for single-pass mode opportunity */
19686             if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
19687                 && zds->fParams.frameType != ZSTD_skippableFrame
19688                 && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
19689                 size_t const cSize = ZSTD_findFrameCompressedSize_advanced(istart, (size_t)(iend-istart), zds->format);
19690                 if (cSize <= (size_t)(iend-istart)) {
19691                     /* shortcut : using single-pass mode */
19692                     size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
19693                     if (ZSTD_isError(decompressedSize)) return decompressedSize;
19694                     DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()");
19695                     assert(istart != NULL);
19696                     ip = istart + cSize;
19697                     op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */
19698                     zds->expected = 0;
19699                     zds->streamStage = zdss_init;
19700                     someMoreWork = 0;
19701                     break;
19702             }   }
19703
19704             /* Check output buffer is large enough for ZSTD_odm_stable. */
19705             if (zds->outBufferMode == ZSTD_bm_stable
19706                 && zds->fParams.frameType != ZSTD_skippableFrame
19707                 && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
19708                 && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
19709                 RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small");
19710             }
19711
19712             /* Consume header (see ZSTDds_decodeFrameHeader) */
19713             DEBUGLOG(4, "Consume header");
19714             FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
19715
19716             if (zds->format == ZSTD_f_zstd1
19717                 && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
19718                 zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
19719                 zds->stage = ZSTDds_skipFrame;
19720             } else {
19721                 FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), "");
19722                 zds->expected = ZSTD_blockHeaderSize;
19723                 zds->stage = ZSTDds_decodeBlockHeader;
19724             }
19725
19726             /* control buffer memory usage */
19727             DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)",
19728                         (U32)(zds->fParams.windowSize >>10),
19729                         (U32)(zds->maxWindowSize >> 10) );
19730             zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
19731             RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
19732                             frameParameter_windowTooLarge, "");
19733             if (zds->maxBlockSizeParam != 0)
19734                 zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam);
19735
19736             /* Adapt buffer sizes to frame header instructions */
19737             {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
19738                 size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
19739                         ? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax)
19740                         : 0;
19741
19742                 ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
19743
19744                 {   int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
19745                     int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
19746
19747                     if (tooSmall || tooLarge) {
19748                         size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
19749                         DEBUGLOG(4, "inBuff  : from %u to %u",
19750                                     (U32)zds->inBuffSize, (U32)neededInBuffSize);
19751                         DEBUGLOG(4, "outBuff : from %u to %u",
19752                                     (U32)zds->outBuffSize, (U32)neededOutBuffSize);
19753                         if (zds->staticSize) {  /* static DCtx */
19754                             DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
19755                             assert(zds->staticSize >= sizeof(ZSTD_DCtx));  /* controlled at init */
19756                             RETURN_ERROR_IF(
19757                                 bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
19758                                 memory_allocation, "");
19759                         } else {
19760                             VG_(free)(zds->inBuff);
19761                             zds->inBuffSize = 0;
19762                             zds->outBuffSize = 0;
19763                             zds->inBuff = (char*)VG_(malloc)("zstddeclib.ZSTD_decompressStream.1", bufferSize);
19764                             RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
19765                         }
19766                         zds->inBuffSize = neededInBuffSize;
19767                         zds->outBuff = zds->inBuff + zds->inBuffSize;
19768                         zds->outBuffSize = neededOutBuffSize;
19769             }   }   }
19770             zds->streamStage = zdss_read;
19771             ZSTD_FALLTHROUGH;
19772
19773         case zdss_read:
19774             DEBUGLOG(5, "stage zdss_read");
19775             {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip));
19776                 DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
19777                 if (neededInSize==0) {  /* end of frame */
19778                     zds->streamStage = zdss_init;
19779                     someMoreWork = 0;
19780                     break;
19781                 }
19782                 if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
19783                     FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
19784                     assert(ip != NULL);
19785                     ip += neededInSize;
19786                     /* Function modifies the stage so we must break */
19787                     break;
19788             }   }
19789             if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
19790             zds->streamStage = zdss_load;
19791             ZSTD_FALLTHROUGH;
19792
19793         case zdss_load:
19794             {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
19795                 size_t const toLoad = neededInSize - zds->inPos;
19796                 int const isSkipFrame = ZSTD_isSkipFrame(zds);
19797                 size_t loadedSize;
19798                 /* At this point we shouldn't be decompressing a block that we can stream. */
19799                 assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)));
19800                 if (isSkipFrame) {
19801                     loadedSize = MIN(toLoad, (size_t)(iend-ip));
19802                 } else {
19803                     RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
19804                                     corruption_detected,
19805                                     "should never happen");
19806                     loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
19807                 }
19808                 if (loadedSize != 0) {
19809                     /* ip may be NULL */
19810                     ip += loadedSize;
19811                     zds->inPos += loadedSize;
19812                 }
19813                 if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
19814
19815                 /* decode loaded input */
19816                 zds->inPos = 0;   /* input is consumed */
19817                 FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), "");
19818                 /* Function modifies the stage so we must break */
19819                 break;
19820             }
19821         case zdss_flush:
19822             {
19823                 size_t const toFlushSize = zds->outEnd - zds->outStart;
19824                 size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
19825
19826                 op = op ? op + flushedSize : op;
19827
19828                 zds->outStart += flushedSize;
19829                 if (flushedSize == toFlushSize) {  /* flush completed */
19830                     zds->streamStage = zdss_read;
19831                     if ( (zds->outBuffSize < zds->fParams.frameContentSize)
19832                         && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
19833                         DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
19834                                 (int)(zds->outBuffSize - zds->outStart),
19835                                 (U32)zds->fParams.blockSizeMax);
19836                         zds->outStart = zds->outEnd = 0;
19837                     }
19838                     break;
19839             }   }
19840             /* cannot complete flush */
19841             someMoreWork = 0;
19842             break;
19843
19844         default:
19845             assert(0);    /* impossible */
19846             RETURN_ERROR(GENERIC, "impossible to reach");   /* some compilers require default to do something */
19847     }   }
19848
19849     /* result */
19850     input->pos = (size_t)(ip - (const char*)(input->src));
19851     output->pos = (size_t)(op - (char*)(output->dst));
19852
19853     /* Update the expected output buffer for ZSTD_obm_stable. */
19854     zds->expectedOutBuffer = *output;
19855
19856     if ((ip==istart) && (op==ostart)) {  /* no forward progress */
19857         zds->noForwardProgress ++;
19858         if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
19859             RETURN_ERROR_IF(op==oend, noForwardProgress_destFull, "");
19860             RETURN_ERROR_IF(ip==iend, noForwardProgress_inputEmpty, "");
19861             assert(0);
19862         }
19863     } else {
19864         zds->noForwardProgress = 0;
19865     }
19866     {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
19867         if (!nextSrcSizeHint) {   /* frame fully decoded */
19868             if (zds->outEnd == zds->outStart) {  /* output fully flushed */
19869                 if (zds->hostageByte) {
19870                     if (input->pos >= input->size) {
19871                         /* can't release hostage (not present) */
19872                         zds->streamStage = zdss_read;
19873                         return 1;
19874                     }
19875                     input->pos++;  /* release hostage */
19876                 }   /* zds->hostageByte */
19877                 return 0;
19878             }  /* zds->outEnd == zds->outStart */
19879             if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
19880                 input->pos--;   /* note : pos > 0, otherwise, impossible to finish reading last block */
19881                 zds->hostageByte=1;
19882             }
19883             return 1;
19884         }  /* nextSrcSizeHint==0 */
19885         nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block);   /* preload header of next block */
19886         assert(zds->inPos <= nextSrcSizeHint);
19887         nextSrcSizeHint -= zds->inPos;   /* part already loaded*/
19888         return nextSrcSizeHint;
19889     }
19890 }
19891
19892 size_t ZSTD_decompressStream_simpleArgs (
19893                             ZSTD_DCtx* dctx,
19894                             void* dst, size_t dstCapacity, size_t* dstPos,
19895                       const void* src, size_t srcSize, size_t* srcPos)
19896 {
19897     ZSTD_outBuffer output;
19898     ZSTD_inBuffer  input;
19899     output.dst = dst;
19900     output.size = dstCapacity;
19901     output.pos = *dstPos;
19902     input.src = src;
19903     input.size = srcSize;
19904     input.pos = *srcPos;
19905     {   size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
19906         *dstPos = output.pos;
19907         *srcPos = input.pos;
19908         return cErr;
19909     }
19910 }
19911 /**** ended inlining decompress/zstd_decompress.c ****/
19912 /**** start inlining decompress/zstd_decompress_block.c ****/
19913 /*
19914  * Copyright (c) Meta Platforms, Inc. and affiliates.
19915  * All rights reserved.
19916  *
19917  * This source code is licensed under both the BSD-style license (found in the
19918  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
19919  * in the COPYING file in the root directory of this source tree).
19920  * You may select, at your option, one of the above-listed licenses.
19921  */
19922
19923 /* zstd_decompress_block :
19924  * this module takes care of decompressing _compressed_ block */
19925
19926 /*-*******************************************************
19927 *  Dependencies
19928 *********************************************************/
19929 /**** skipping file: ../common/zstd_deps.h ****/
19930 /**** skipping file: ../common/compiler.h ****/
19931 /**** skipping file: ../common/cpu.h ****/
19932 /**** skipping file: ../common/mem.h ****/
19933 #define FSE_STATIC_LINKING_ONLY
19934 /**** skipping file: ../common/fse.h ****/
19935 /**** skipping file: ../common/huf.h ****/
19936 /**** skipping file: ../common/zstd_internal.h ****/
19937 /**** skipping file: zstd_decompress_internal.h ****/
19938 /**** skipping file: zstd_ddict.h ****/
19939 /**** skipping file: zstd_decompress_block.h ****/
19940 /**** skipping file: ../common/bits.h ****/
19941
19942 /*_*******************************************************
19943 *  Macros
19944 **********************************************************/
19945
19946 /* These two optional macros force the use one way or another of the two
19947  * ZSTD_decompressSequences implementations. You can't force in both directions
19948  * at the same time.
19949  */
19950 #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
19951     defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
19952 #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
19953 #endif
19954
19955
19956 /*_*******************************************************
19957 *  Memory operations
19958 **********************************************************/
19959 static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
19960
19961
19962 /*-*************************************************************
19963  *   Block decoding
19964  ***************************************************************/
19965
19966 static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx)
19967 {
19968     size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
19969     assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
19970     return blockSizeMax;
19971 }
19972
19973 /*! ZSTD_getcBlockSize() :
19974  *  Provides the size of compressed block from block header `src` */
19975 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
19976                           blockProperties_t* bpPtr)
19977 {
19978     RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
19979
19980     {   U32 const cBlockHeader = MEM_readLE24(src);
19981         U32 const cSize = cBlockHeader >> 3;
19982         bpPtr->lastBlock = cBlockHeader & 1;
19983         bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
19984         bpPtr->origSize = cSize;   /* only useful for RLE */
19985         if (bpPtr->blockType == bt_rle) return 1;
19986         RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
19987         return cSize;
19988     }
19989 }
19990
19991 /* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
19992 static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
19993     const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
19994 {
19995     size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
19996     assert(litSize <= blockSizeMax);
19997     assert(dctx->isFrameDecompression || streaming == not_streaming);
19998     assert(expectedWriteSize <= blockSizeMax);
19999     if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) {
20000         /* If we aren't streaming, we can just put the literals after the output
20001          * of the current block. We don't need to worry about overwriting the
20002          * extDict of our window, because it doesn't exist.
20003          * So if we have space after the end of the block, just put it there.
20004          */
20005         dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;
20006         dctx->litBufferEnd = dctx->litBuffer + litSize;
20007         dctx->litBufferLocation = ZSTD_in_dst;
20008     } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) {
20009         /* Literals fit entirely within the extra buffer, put them there to avoid
20010          * having to split the literals.
20011          */
20012         dctx->litBuffer = dctx->litExtraBuffer;
20013         dctx->litBufferEnd = dctx->litBuffer + litSize;
20014         dctx->litBufferLocation = ZSTD_not_in_dst;
20015     } else {
20016         assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
20017         /* Literals must be split between the output block and the extra lit
20018          * buffer. We fill the extra lit buffer with the tail of the literals,
20019          * and put the rest of the literals at the end of the block, with
20020          * WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
20021          * This MUST not write more than our maxBlockSize beyond dst, because in
20022          * streaming mode, that could overwrite part of our extDict window.
20023          */
20024         if (splitImmediately) {
20025             /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
20026             dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
20027             dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
20028         } else {
20029             /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
20030             dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
20031             dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
20032         }
20033         dctx->litBufferLocation = ZSTD_split;
20034         assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);
20035     }
20036 }
20037
20038 /*! ZSTD_decodeLiteralsBlock() :
20039  * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
20040  * in the dstBuffer.  If there is room to do so, it will be stored in full in the excess dst space after where the current
20041  * block will be output.  Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
20042  * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
20043  *
20044  * @return : nb of bytes read from src (< srcSize )
20045  *  note : symbol not declared but exposed for fullbench */
20046 static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
20047                           const void* src, size_t srcSize,   /* note : srcSize < BLOCKSIZE */
20048                           void* dst, size_t dstCapacity, const streaming_operation streaming)
20049 {
20050     DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
20051     RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
20052
20053     {   const BYTE* const istart = (const BYTE*) src;
20054         symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
20055         size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
20056
20057         switch(litEncType)
20058         {
20059         case set_repeat:
20060             DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
20061             RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
20062             ZSTD_FALLTHROUGH;
20063
20064         case set_compressed:
20065             RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
20066             {   size_t lhSize, litSize, litCSize;
20067                 U32 singleStream=0;
20068                 U32 const lhlCode = (istart[0] >> 2) & 3;
20069                 U32 const lhc = MEM_readLE32(istart);
20070                 size_t hufSuccess;
20071                 size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
20072                 int const flags = 0
20073                     | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
20074                     | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
20075                 switch(lhlCode)
20076                 {
20077                 case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
20078                     /* 2 - 2 - 10 - 10 */
20079                     singleStream = !lhlCode;
20080                     lhSize = 3;
20081                     litSize  = (lhc >> 4) & 0x3FF;
20082                     litCSize = (lhc >> 14) & 0x3FF;
20083                     break;
20084                 case 2:
20085                     /* 2 - 2 - 14 - 14 */
20086                     lhSize = 4;
20087                     litSize  = (lhc >> 4) & 0x3FFF;
20088                     litCSize = lhc >> 18;
20089                     break;
20090                 case 3:
20091                     /* 2 - 2 - 18 - 18 */
20092                     lhSize = 5;
20093                     litSize  = (lhc >> 4) & 0x3FFFF;
20094                     litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
20095                     break;
20096                 }
20097                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
20098                 RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
20099                 if (!singleStream)
20100                     RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
20101                         "Not enough literals (%zu) for the 4-streams mode (min %u)",
20102                         litSize, MIN_LITERALS_FOR_4_STREAMS);
20103                 RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
20104                 RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
20105                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
20106
20107                 /* prefetch huffman table if cold */
20108                 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
20109                     PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
20110                 }
20111
20112                 if (litEncType==set_repeat) {
20113                     if (singleStream) {
20114                         hufSuccess = HUF_decompress1X_usingDTable(
20115                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
20116                             dctx->HUFptr, flags);
20117                     } else {
20118                         assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
20119                         hufSuccess = HUF_decompress4X_usingDTable(
20120                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
20121                             dctx->HUFptr, flags);
20122                     }
20123                 } else {
20124                     if (singleStream) {
20125 #if defined(HUF_FORCE_DECOMPRESS_X2)
20126                         hufSuccess = HUF_decompress1X_DCtx_wksp(
20127                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
20128                             istart+lhSize, litCSize, dctx->workspace,
20129                             sizeof(dctx->workspace), flags);
20130 #else
20131                         hufSuccess = HUF_decompress1X1_DCtx_wksp(
20132                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
20133                             istart+lhSize, litCSize, dctx->workspace,
20134                             sizeof(dctx->workspace), flags);
20135 #endif
20136                     } else {
20137                         hufSuccess = HUF_decompress4X_hufOnly_wksp(
20138                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
20139                             istart+lhSize, litCSize, dctx->workspace,
20140                             sizeof(dctx->workspace), flags);
20141                     }
20142                 }
20143                 if (dctx->litBufferLocation == ZSTD_split)
20144                 {
20145                     assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
20146                     ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
20147                     ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
20148                     dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
20149                     dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
20150                     assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);
20151                 }
20152
20153                 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
20154
20155                 dctx->litPtr = dctx->litBuffer;
20156                 dctx->litSize = litSize;
20157                 dctx->litEntropy = 1;
20158                 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
20159                 return litCSize + lhSize;
20160             }
20161
20162         case set_basic:
20163             {   size_t litSize, lhSize;
20164                 U32 const lhlCode = ((istart[0]) >> 2) & 3;
20165                 size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
20166                 switch(lhlCode)
20167                 {
20168                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
20169                     lhSize = 1;
20170                     litSize = istart[0] >> 3;
20171                     break;
20172                 case 1:
20173                     lhSize = 2;
20174                     litSize = MEM_readLE16(istart) >> 4;
20175                     break;
20176                 case 3:
20177                     lhSize = 3;
20178                     RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
20179                     litSize = MEM_readLE24(istart) >> 4;
20180                     break;
20181                 }
20182
20183                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
20184                 RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
20185                 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
20186                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
20187                 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
20188                     RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
20189                     if (dctx->litBufferLocation == ZSTD_split)
20190                     {
20191                         ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
20192                         ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
20193                     }
20194                     else
20195                     {
20196                         ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
20197                     }
20198                     dctx->litPtr = dctx->litBuffer;
20199                     dctx->litSize = litSize;
20200                     return lhSize+litSize;
20201                 }
20202                 /* direct reference into compressed stream */
20203                 dctx->litPtr = istart+lhSize;
20204                 dctx->litSize = litSize;
20205                 dctx->litBufferEnd = dctx->litPtr + litSize;
20206                 dctx->litBufferLocation = ZSTD_not_in_dst;
20207                 return lhSize+litSize;
20208             }
20209
20210         case set_rle:
20211             {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
20212                 size_t litSize, lhSize;
20213                 size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
20214                 switch(lhlCode)
20215                 {
20216                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
20217                     lhSize = 1;
20218                     litSize = istart[0] >> 3;
20219                     break;
20220                 case 1:
20221                     lhSize = 2;
20222                     RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
20223                     litSize = MEM_readLE16(istart) >> 4;
20224                     break;
20225                 case 3:
20226                     lhSize = 3;
20227                     RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
20228                     litSize = MEM_readLE24(istart) >> 4;
20229                     break;
20230                 }
20231                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
20232                 RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
20233                 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
20234                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
20235                 if (dctx->litBufferLocation == ZSTD_split)
20236                 {
20237                     ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
20238                     ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
20239                 }
20240                 else
20241                 {
20242                     ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
20243                 }
20244                 dctx->litPtr = dctx->litBuffer;
20245                 dctx->litSize = litSize;
20246                 return lhSize+1;
20247             }
20248         default:
20249             RETURN_ERROR(corruption_detected, "impossible");
20250         }
20251     }
20252 }
20253
20254 /* Hidden declaration for fullbench */
20255 size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
20256                           const void* src, size_t srcSize,
20257                           void* dst, size_t dstCapacity);
20258 size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
20259                           const void* src, size_t srcSize,
20260                           void* dst, size_t dstCapacity)
20261 {
20262     dctx->isFrameDecompression = 0;
20263     return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming);
20264 }
20265
20266 /* Default FSE distribution tables.
20267  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
20268  * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
20269  * They were generated programmatically with following method :
20270  * - start from default distributions, present in /lib/common/zstd_internal.h
20271  * - generate tables normally, using ZSTD_buildFSETable()
20272  * - printout the content of tables
20273  * - pretify output, report below, test with fuzzer to ensure it's correct */
20274
20275 /* Default FSE distribution table for Literal Lengths */
20276 static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
20277      {  1,  1,  1, LL_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
20278      /* nextState, nbAddBits, nbBits, baseVal */
20279      {  0,  0,  4,    0},  { 16,  0,  4,    0},
20280      { 32,  0,  5,    1},  {  0,  0,  5,    3},
20281      {  0,  0,  5,    4},  {  0,  0,  5,    6},
20282      {  0,  0,  5,    7},  {  0,  0,  5,    9},
20283      {  0,  0,  5,   10},  {  0,  0,  5,   12},
20284      {  0,  0,  6,   14},  {  0,  1,  5,   16},
20285      {  0,  1,  5,   20},  {  0,  1,  5,   22},
20286      {  0,  2,  5,   28},  {  0,  3,  5,   32},
20287      {  0,  4,  5,   48},  { 32,  6,  5,   64},
20288      {  0,  7,  5,  128},  {  0,  8,  6,  256},
20289      {  0, 10,  6, 1024},  {  0, 12,  6, 4096},
20290      { 32,  0,  4,    0},  {  0,  0,  4,    1},
20291      {  0,  0,  5,    2},  { 32,  0,  5,    4},
20292      {  0,  0,  5,    5},  { 32,  0,  5,    7},
20293      {  0,  0,  5,    8},  { 32,  0,  5,   10},
20294      {  0,  0,  5,   11},  {  0,  0,  6,   13},
20295      { 32,  1,  5,   16},  {  0,  1,  5,   18},
20296      { 32,  1,  5,   22},  {  0,  2,  5,   24},
20297      { 32,  3,  5,   32},  {  0,  3,  5,   40},
20298      {  0,  6,  4,   64},  { 16,  6,  4,   64},
20299      { 32,  7,  5,  128},  {  0,  9,  6,  512},
20300      {  0, 11,  6, 2048},  { 48,  0,  4,    0},
20301      { 16,  0,  4,    1},  { 32,  0,  5,    2},
20302      { 32,  0,  5,    3},  { 32,  0,  5,    5},
20303      { 32,  0,  5,    6},  { 32,  0,  5,    8},
20304      { 32,  0,  5,    9},  { 32,  0,  5,   11},
20305      { 32,  0,  5,   12},  {  0,  0,  6,   15},
20306      { 32,  1,  5,   18},  { 32,  1,  5,   20},
20307      { 32,  2,  5,   24},  { 32,  2,  5,   28},
20308      { 32,  3,  5,   40},  { 32,  4,  5,   48},
20309      {  0, 16,  6,65536},  {  0, 15,  6,32768},
20310      {  0, 14,  6,16384},  {  0, 13,  6, 8192},
20311 };   /* LL_defaultDTable */
20312
20313 /* Default FSE distribution table for Offset Codes */
20314 static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
20315     {  1,  1,  1, OF_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
20316     /* nextState, nbAddBits, nbBits, baseVal */
20317     {  0,  0,  5,    0},     {  0,  6,  4,   61},
20318     {  0,  9,  5,  509},     {  0, 15,  5,32765},
20319     {  0, 21,  5,2097149},   {  0,  3,  5,    5},
20320     {  0,  7,  4,  125},     {  0, 12,  5, 4093},
20321     {  0, 18,  5,262141},    {  0, 23,  5,8388605},
20322     {  0,  5,  5,   29},     {  0,  8,  4,  253},
20323     {  0, 14,  5,16381},     {  0, 20,  5,1048573},
20324     {  0,  2,  5,    1},     { 16,  7,  4,  125},
20325     {  0, 11,  5, 2045},     {  0, 17,  5,131069},
20326     {  0, 22,  5,4194301},   {  0,  4,  5,   13},
20327     { 16,  8,  4,  253},     {  0, 13,  5, 8189},
20328     {  0, 19,  5,524285},    {  0,  1,  5,    1},
20329     { 16,  6,  4,   61},     {  0, 10,  5, 1021},
20330     {  0, 16,  5,65533},     {  0, 28,  5,268435453},
20331     {  0, 27,  5,134217725}, {  0, 26,  5,67108861},
20332     {  0, 25,  5,33554429},  {  0, 24,  5,16777213},
20333 };   /* OF_defaultDTable */
20334
20335
20336 /* Default FSE distribution table for Match Lengths */
20337 static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
20338     {  1,  1,  1, ML_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
20339     /* nextState, nbAddBits, nbBits, baseVal */
20340     {  0,  0,  6,    3},  {  0,  0,  4,    4},
20341     { 32,  0,  5,    5},  {  0,  0,  5,    6},
20342     {  0,  0,  5,    8},  {  0,  0,  5,    9},
20343     {  0,  0,  5,   11},  {  0,  0,  6,   13},
20344     {  0,  0,  6,   16},  {  0,  0,  6,   19},
20345     {  0,  0,  6,   22},  {  0,  0,  6,   25},
20346     {  0,  0,  6,   28},  {  0,  0,  6,   31},
20347     {  0,  0,  6,   34},  {  0,  1,  6,   37},
20348     {  0,  1,  6,   41},  {  0,  2,  6,   47},
20349     {  0,  3,  6,   59},  {  0,  4,  6,   83},
20350     {  0,  7,  6,  131},  {  0,  9,  6,  515},
20351     { 16,  0,  4,    4},  {  0,  0,  4,    5},
20352     { 32,  0,  5,    6},  {  0,  0,  5,    7},
20353     { 32,  0,  5,    9},  {  0,  0,  5,   10},
20354     {  0,  0,  6,   12},  {  0,  0,  6,   15},
20355     {  0,  0,  6,   18},  {  0,  0,  6,   21},
20356     {  0,  0,  6,   24},  {  0,  0,  6,   27},
20357     {  0,  0,  6,   30},  {  0,  0,  6,   33},
20358     {  0,  1,  6,   35},  {  0,  1,  6,   39},
20359     {  0,  2,  6,   43},  {  0,  3,  6,   51},
20360     {  0,  4,  6,   67},  {  0,  5,  6,   99},
20361     {  0,  8,  6,  259},  { 32,  0,  4,    4},
20362     { 48,  0,  4,    4},  { 16,  0,  4,    5},
20363     { 32,  0,  5,    7},  { 32,  0,  5,    8},
20364     { 32,  0,  5,   10},  { 32,  0,  5,   11},
20365     {  0,  0,  6,   14},  {  0,  0,  6,   17},
20366     {  0,  0,  6,   20},  {  0,  0,  6,   23},
20367     {  0,  0,  6,   26},  {  0,  0,  6,   29},
20368     {  0,  0,  6,   32},  {  0, 16,  6,65539},
20369     {  0, 15,  6,32771},  {  0, 14,  6,16387},
20370     {  0, 13,  6, 8195},  {  0, 12,  6, 4099},
20371     {  0, 11,  6, 2051},  {  0, 10,  6, 1027},
20372 };   /* ML_defaultDTable */
20373
20374
20375 static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
20376 {
20377     void* ptr = dt;
20378     ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
20379     ZSTD_seqSymbol* const cell = dt + 1;
20380
20381     DTableH->tableLog = 0;
20382     DTableH->fastMode = 0;
20383
20384     cell->nbBits = 0;
20385     cell->nextState = 0;
20386     assert(nbAddBits < 255);
20387     cell->nbAdditionalBits = nbAddBits;
20388     cell->baseValue = baseValue;
20389 }
20390
20391
20392 /* ZSTD_buildFSETable() :
20393  * generate FSE decoding table for one symbol (ll, ml or off)
20394  * cannot fail if input is valid =>
20395  * all inputs are presumed validated at this stage */
20396 FORCE_INLINE_TEMPLATE
20397 void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
20398             const short* normalizedCounter, unsigned maxSymbolValue,
20399             const U32* baseValue, const U8* nbAdditionalBits,
20400             unsigned tableLog, void* wksp, size_t wkspSize)
20401 {
20402     ZSTD_seqSymbol* const tableDecode = dt+1;
20403     U32 const maxSV1 = maxSymbolValue + 1;
20404     U32 const tableSize = 1 << tableLog;
20405
20406     U16* symbolNext = (U16*)wksp;
20407     BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
20408     U32 highThreshold = tableSize - 1;
20409
20410
20411     /* Sanity Checks */
20412     assert(maxSymbolValue <= MaxSeq);
20413     assert(tableLog <= MaxFSELog);
20414     assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
20415     (void)wkspSize;
20416     /* Init, lay down lowprob symbols */
20417     {   ZSTD_seqSymbol_header DTableH;
20418         DTableH.tableLog = tableLog;
20419         DTableH.fastMode = 1;
20420         {   S16 const largeLimit= (S16)(1 << (tableLog-1));
20421             U32 s;
20422             for (s=0; s<maxSV1; s++) {
20423                 if (normalizedCounter[s]==-1) {
20424                     tableDecode[highThreshold--].baseValue = s;
20425                     symbolNext[s] = 1;
20426                 } else {
20427                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
20428                     assert(normalizedCounter[s]>=0);
20429                     symbolNext[s] = (U16)normalizedCounter[s];
20430         }   }   }
20431         ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
20432     }
20433
20434     /* Spread symbols */
20435     assert(tableSize <= 512);
20436     /* Specialized symbol spreading for the case when there are
20437      * no low probability (-1 count) symbols. When compressing
20438      * small blocks we avoid low probability symbols to hit this
20439      * case, since header decoding speed matters more.
20440      */
20441     if (highThreshold == tableSize - 1) {
20442         size_t const tableMask = tableSize-1;
20443         size_t const step = FSE_TABLESTEP(tableSize);
20444         /* First lay down the symbols in order.
20445          * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
20446          * misses since small blocks generally have small table logs, so nearly
20447          * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
20448          * our buffer to handle the over-write.
20449          */
20450         {
20451             U64 const add = 0x0101010101010101ull;
20452             size_t pos = 0;
20453             U64 sv = 0;
20454             U32 s;
20455             for (s=0; s<maxSV1; ++s, sv += add) {
20456                 int i;
20457                 int const n = normalizedCounter[s];
20458                 MEM_write64(spread + pos, sv);
20459                 for (i = 8; i < n; i += 8) {
20460                     MEM_write64(spread + pos + i, sv);
20461                 }
20462                 assert(n>=0);
20463                 pos += (size_t)n;
20464             }
20465         }
20466         /* Now we spread those positions across the table.
20467          * The benefit of doing it in two stages is that we avoid the
20468          * variable size inner loop, which caused lots of branch misses.
20469          * Now we can run through all the positions without any branch misses.
20470          * We unroll the loop twice, since that is what empirically worked best.
20471          */
20472         {
20473             size_t position = 0;
20474             size_t s;
20475             size_t const unroll = 2;
20476             assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
20477             for (s = 0; s < (size_t)tableSize; s += unroll) {
20478                 size_t u;
20479                 for (u = 0; u < unroll; ++u) {
20480                     size_t const uPosition = (position + (u * step)) & tableMask;
20481                     tableDecode[uPosition].baseValue = spread[s + u];
20482                 }
20483                 position = (position + (unroll * step)) & tableMask;
20484             }
20485             assert(position == 0);
20486         }
20487     } else {
20488         U32 const tableMask = tableSize-1;
20489         U32 const step = FSE_TABLESTEP(tableSize);
20490         U32 s, position = 0;
20491         for (s=0; s<maxSV1; s++) {
20492             int i;
20493             int const n = normalizedCounter[s];
20494             for (i=0; i<n; i++) {
20495                 tableDecode[position].baseValue = s;
20496                 position = (position + step) & tableMask;
20497                 while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask;   /* lowprob area */
20498         }   }
20499         assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
20500     }
20501
20502     /* Build Decoding table */
20503     {
20504         U32 u;
20505         for (u=0; u<tableSize; u++) {
20506             U32 const symbol = tableDecode[u].baseValue;
20507             U32 const nextState = symbolNext[symbol]++;
20508             tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
20509             tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
20510             assert(nbAdditionalBits[symbol] < 255);
20511             tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
20512             tableDecode[u].baseValue = baseValue[symbol];
20513         }
20514     }
20515 }
20516
20517 /* Avoids the FORCE_INLINE of the _body() function. */
20518 static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
20519             const short* normalizedCounter, unsigned maxSymbolValue,
20520             const U32* baseValue, const U8* nbAdditionalBits,
20521             unsigned tableLog, void* wksp, size_t wkspSize)
20522 {
20523     ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
20524             baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
20525 }
20526
20527 #if DYNAMIC_BMI2
20528 BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
20529             const short* normalizedCounter, unsigned maxSymbolValue,
20530             const U32* baseValue, const U8* nbAdditionalBits,
20531             unsigned tableLog, void* wksp, size_t wkspSize)
20532 {
20533     ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
20534             baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
20535 }
20536 #endif
20537
20538 void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
20539             const short* normalizedCounter, unsigned maxSymbolValue,
20540             const U32* baseValue, const U8* nbAdditionalBits,
20541             unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
20542 {
20543 #if DYNAMIC_BMI2
20544     if (bmi2) {
20545         ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
20546                 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
20547         return;
20548     }
20549 #endif
20550     (void)bmi2;
20551     ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
20552             baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
20553 }
20554
20555
20556 /*! ZSTD_buildSeqTable() :
20557  * @return : nb bytes read from src,
20558  *           or an error code if it fails */
20559 static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
20560                                  symbolEncodingType_e type, unsigned max, U32 maxLog,
20561                                  const void* src, size_t srcSize,
20562                                  const U32* baseValue, const U8* nbAdditionalBits,
20563                                  const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
20564                                  int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
20565                                  int bmi2)
20566 {
20567     switch(type)
20568     {
20569     case set_rle :
20570         RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
20571         RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
20572         {   U32 const symbol = *(const BYTE*)src;
20573             U32 const baseline = baseValue[symbol];
20574             U8 const nbBits = nbAdditionalBits[symbol];
20575             ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
20576         }
20577         *DTablePtr = DTableSpace;
20578         return 1;
20579     case set_basic :
20580         *DTablePtr = defaultTable;
20581         return 0;
20582     case set_repeat:
20583         RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
20584         /* prefetch FSE table if used */
20585         if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
20586             const void* const pStart = *DTablePtr;
20587             size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
20588             PREFETCH_AREA(pStart, pSize);
20589         }
20590         return 0;
20591     case set_compressed :
20592         {   unsigned tableLog;
20593             S16 norm[MaxSeq+1];
20594             size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
20595             RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
20596             RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
20597             ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
20598             *DTablePtr = DTableSpace;
20599             return headerSize;
20600         }
20601     default :
20602         assert(0);
20603         RETURN_ERROR(GENERIC, "impossible");
20604     }
20605 }
20606
20607 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
20608                              const void* src, size_t srcSize)
20609 {
20610     const BYTE* const istart = (const BYTE*)src;
20611     const BYTE* const iend = istart + srcSize;
20612     const BYTE* ip = istart;
20613     int nbSeq;
20614     DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
20615
20616     /* check */
20617     RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
20618
20619     /* SeqHead */
20620     nbSeq = *ip++;
20621     if (nbSeq > 0x7F) {
20622         if (nbSeq == 0xFF) {
20623             RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
20624             nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
20625             ip+=2;
20626         } else {
20627             RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
20628             nbSeq = ((nbSeq-0x80)<<8) + *ip++;
20629         }
20630     }
20631     *nbSeqPtr = nbSeq;
20632
20633     if (nbSeq == 0) {
20634         /* No sequence : section ends immediately */
20635         RETURN_ERROR_IF(ip != iend, corruption_detected,
20636             "extraneous data present in the Sequences section");
20637         return (size_t)(ip - istart);
20638     }
20639
20640     /* FSE table descriptors */
20641     RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
20642     RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
20643     {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
20644         symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
20645         symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
20646         ip++;
20647
20648         /* Build DTables */
20649         {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
20650                                                       LLtype, MaxLL, LLFSELog,
20651                                                       ip, iend-ip,
20652                                                       LL_base, LL_bits,
20653                                                       LL_defaultDTable, dctx->fseEntropy,
20654                                                       dctx->ddictIsCold, nbSeq,
20655                                                       dctx->workspace, sizeof(dctx->workspace),
20656                                                       ZSTD_DCtx_get_bmi2(dctx));
20657             RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
20658             ip += llhSize;
20659         }
20660
20661         {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
20662                                                       OFtype, MaxOff, OffFSELog,
20663                                                       ip, iend-ip,
20664                                                       OF_base, OF_bits,
20665                                                       OF_defaultDTable, dctx->fseEntropy,
20666                                                       dctx->ddictIsCold, nbSeq,
20667                                                       dctx->workspace, sizeof(dctx->workspace),
20668                                                       ZSTD_DCtx_get_bmi2(dctx));
20669             RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
20670             ip += ofhSize;
20671         }
20672
20673         {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
20674                                                       MLtype, MaxML, MLFSELog,
20675                                                       ip, iend-ip,
20676                                                       ML_base, ML_bits,
20677                                                       ML_defaultDTable, dctx->fseEntropy,
20678                                                       dctx->ddictIsCold, nbSeq,
20679                                                       dctx->workspace, sizeof(dctx->workspace),
20680                                                       ZSTD_DCtx_get_bmi2(dctx));
20681             RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
20682             ip += mlhSize;
20683         }
20684     }
20685
20686     return ip-istart;
20687 }
20688
20689
20690 typedef struct {
20691     size_t litLength;
20692     size_t matchLength;
20693     size_t offset;
20694 } seq_t;
20695
20696 typedef struct {
20697     size_t state;
20698     const ZSTD_seqSymbol* table;
20699 } ZSTD_fseState;
20700
20701 typedef struct {
20702     BIT_DStream_t DStream;
20703     ZSTD_fseState stateLL;
20704     ZSTD_fseState stateOffb;
20705     ZSTD_fseState stateML;
20706     size_t prevOffset[ZSTD_REP_NUM];
20707 } seqState_t;
20708
20709 /*! ZSTD_overlapCopy8() :
20710  *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
20711  *  If the offset is < 8 then the offset is spread to at least 8 bytes.
20712  *
20713  *  Precondition: *ip <= *op
20714  *  Postcondition: *op - *op >= 8
20715  */
20716 HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
20717     assert(*ip <= *op);
20718     if (offset < 8) {
20719         /* close range match, overlap */
20720         static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
20721         static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
20722         int const sub2 = dec64table[offset];
20723         (*op)[0] = (*ip)[0];
20724         (*op)[1] = (*ip)[1];
20725         (*op)[2] = (*ip)[2];
20726         (*op)[3] = (*ip)[3];
20727         *ip += dec32table[offset];
20728         ZSTD_copy4(*op+4, *ip);
20729         *ip -= sub2;
20730     } else {
20731         ZSTD_copy8(*op, *ip);
20732     }
20733     *ip += 8;
20734     *op += 8;
20735     assert(*op - *ip >= 8);
20736 }
20737
20738 /*! ZSTD_safecopy() :
20739  *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
20740  *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
20741  *  This function is only called in the uncommon case where the sequence is near the end of the block. It
20742  *  should be fast for a single long sequence, but can be slow for several short sequences.
20743  *
20744  *  @param ovtype controls the overlap detection
20745  *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
20746  *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
20747  *           The src buffer must be before the dst buffer.
20748  */
20749 static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
20750     ptrdiff_t const diff = op - ip;
20751     BYTE* const oend = op + length;
20752
20753     assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
20754            (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
20755
20756     if (length < 8) {
20757         /* Handle short lengths. */
20758         while (op < oend) *op++ = *ip++;
20759         return;
20760     }
20761     if (ovtype == ZSTD_overlap_src_before_dst) {
20762         /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
20763         assert(length >= 8);
20764         ZSTD_overlapCopy8(&op, &ip, diff);
20765         length -= 8;
20766         assert(op - ip >= 8);
20767         assert(op <= oend);
20768     }
20769
20770     if (oend <= oend_w) {
20771         /* No risk of overwrite. */
20772         ZSTD_wildcopy(op, ip, length, ovtype);
20773         return;
20774     }
20775     if (op <= oend_w) {
20776         /* Wildcopy until we get close to the end. */
20777         assert(oend > oend_w);
20778         ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
20779         ip += oend_w - op;
20780         op += oend_w - op;
20781     }
20782     /* Handle the leftovers. */
20783     while (op < oend) *op++ = *ip++;
20784 }
20785
20786 /* ZSTD_safecopyDstBeforeSrc():
20787  * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
20788  * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
20789 static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
20790     ptrdiff_t const diff = op - ip;
20791     BYTE* const oend = op + length;
20792
20793     if (length < 8 || diff > -8) {
20794         /* Handle short lengths, close overlaps, and dst not before src. */
20795         while (op < oend) *op++ = *ip++;
20796         return;
20797     }
20798
20799     if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
20800         ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
20801         ip += oend - WILDCOPY_OVERLENGTH - op;
20802         op += oend - WILDCOPY_OVERLENGTH - op;
20803     }
20804
20805     /* Handle the leftovers. */
20806     while (op < oend) *op++ = *ip++;
20807 }
20808
20809 /* ZSTD_execSequenceEnd():
20810  * This version handles cases that are near the end of the output buffer. It requires
20811  * more careful checks to make sure there is no overflow. By separating out these hard
20812  * and unlikely cases, we can speed up the common cases.
20813  *
20814  * NOTE: This function needs to be fast for a single long sequence, but doesn't need
20815  * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
20816  */
20817 FORCE_NOINLINE
20818 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
20819 size_t ZSTD_execSequenceEnd(BYTE* op,
20820     BYTE* const oend, seq_t sequence,
20821     const BYTE** litPtr, const BYTE* const litLimit,
20822     const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
20823 {
20824     BYTE* const oLitEnd = op + sequence.litLength;
20825     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
20826     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
20827     const BYTE* match = oLitEnd - sequence.offset;
20828     BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
20829
20830     /* bounds checks : careful of address space overflow in 32-bit mode */
20831     RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
20832     RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
20833     assert(op < op + sequenceLength);
20834     assert(oLitEnd < op + sequenceLength);
20835
20836     /* copy literals */
20837     ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
20838     op = oLitEnd;
20839     *litPtr = iLitEnd;
20840
20841     /* copy Match */
20842     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
20843         /* offset beyond prefix */
20844         RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
20845         match = dictEnd - (prefixStart - match);
20846         if (match + sequence.matchLength <= dictEnd) {
20847             ZSTD_memmove(oLitEnd, match, sequence.matchLength);
20848             return sequenceLength;
20849         }
20850         /* span extDict & currentPrefixSegment */
20851         {   size_t const length1 = dictEnd - match;
20852         ZSTD_memmove(oLitEnd, match, length1);
20853         op = oLitEnd + length1;
20854         sequence.matchLength -= length1;
20855         match = prefixStart;
20856         }
20857     }
20858     ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
20859     return sequenceLength;
20860 }
20861
20862 /* ZSTD_execSequenceEndSplitLitBuffer():
20863  * This version is intended to be used during instances where the litBuffer is still split.  It is kept separate to avoid performance impact for the good case.
20864  */
20865 FORCE_NOINLINE
20866 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
20867 size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
20868     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
20869     const BYTE** litPtr, const BYTE* const litLimit,
20870     const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
20871 {
20872     BYTE* const oLitEnd = op + sequence.litLength;
20873     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
20874     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
20875     const BYTE* match = oLitEnd - sequence.offset;
20876
20877
20878     /* bounds checks : careful of address space overflow in 32-bit mode */
20879     RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
20880     RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
20881     assert(op < op + sequenceLength);
20882     assert(oLitEnd < op + sequenceLength);
20883
20884     /* copy literals */
20885     RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
20886     ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
20887     op = oLitEnd;
20888     *litPtr = iLitEnd;
20889
20890     /* copy Match */
20891     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
20892         /* offset beyond prefix */
20893         RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
20894         match = dictEnd - (prefixStart - match);
20895         if (match + sequence.matchLength <= dictEnd) {
20896             ZSTD_memmove(oLitEnd, match, sequence.matchLength);
20897             return sequenceLength;
20898         }
20899         /* span extDict & currentPrefixSegment */
20900         {   size_t const length1 = dictEnd - match;
20901         ZSTD_memmove(oLitEnd, match, length1);
20902         op = oLitEnd + length1;
20903         sequence.matchLength -= length1;
20904         match = prefixStart;
20905         }
20906     }
20907     ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
20908     return sequenceLength;
20909 }
20910
20911 HINT_INLINE
20912 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
20913 size_t ZSTD_execSequence(BYTE* op,
20914     BYTE* const oend, seq_t sequence,
20915     const BYTE** litPtr, const BYTE* const litLimit,
20916     const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
20917 {
20918     BYTE* const oLitEnd = op + sequence.litLength;
20919     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
20920     BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
20921     BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;   /* risk : address space underflow on oend=NULL */
20922     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
20923     const BYTE* match = oLitEnd - sequence.offset;
20924
20925     assert(op != NULL /* Precondition */);
20926     assert(oend_w < oend /* No underflow */);
20927
20928 #if defined(__aarch64__)
20929     /* prefetch sequence starting from match that will be used for copy later */
20930     PREFETCH_L1(match);
20931 #endif
20932     /* Handle edge cases in a slow path:
20933      *   - Read beyond end of literals
20934      *   - Match end is within WILDCOPY_OVERLIMIT of oend
20935      *   - 32-bit mode and the match length overflows
20936      */
20937     if (UNLIKELY(
20938         iLitEnd > litLimit ||
20939         oMatchEnd > oend_w ||
20940         (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
20941         return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
20942
20943     /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
20944     assert(op <= oLitEnd /* No overflow */);
20945     assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
20946     assert(oMatchEnd <= oend /* No underflow */);
20947     assert(iLitEnd <= litLimit /* Literal length is in bounds */);
20948     assert(oLitEnd <= oend_w /* Can wildcopy literals */);
20949     assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
20950
20951     /* Copy Literals:
20952      * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
20953      * We likely don't need the full 32-byte wildcopy.
20954      */
20955     assert(WILDCOPY_OVERLENGTH >= 16);
20956     ZSTD_copy16(op, (*litPtr));
20957     if (UNLIKELY(sequence.litLength > 16)) {
20958         ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
20959     }
20960     op = oLitEnd;
20961     *litPtr = iLitEnd;   /* update for next sequence */
20962
20963     /* Copy Match */
20964     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
20965         /* offset beyond prefix -> go into extDict */
20966         RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
20967         match = dictEnd + (match - prefixStart);
20968         if (match + sequence.matchLength <= dictEnd) {
20969             ZSTD_memmove(oLitEnd, match, sequence.matchLength);
20970             return sequenceLength;
20971         }
20972         /* span extDict & currentPrefixSegment */
20973         {   size_t const length1 = dictEnd - match;
20974         ZSTD_memmove(oLitEnd, match, length1);
20975         op = oLitEnd + length1;
20976         sequence.matchLength -= length1;
20977         match = prefixStart;
20978         }
20979     }
20980     /* Match within prefix of 1 or more bytes */
20981     assert(op <= oMatchEnd);
20982     assert(oMatchEnd <= oend_w);
20983     assert(match >= prefixStart);
20984     assert(sequence.matchLength >= 1);
20985
20986     /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
20987      * without overlap checking.
20988      */
20989     if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
20990         /* We bet on a full wildcopy for matches, since we expect matches to be
20991          * longer than literals (in general). In silesia, ~10% of matches are longer
20992          * than 16 bytes.
20993          */
20994         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
20995         return sequenceLength;
20996     }
20997     assert(sequence.offset < WILDCOPY_VECLEN);
20998
20999     /* Copy 8 bytes and spread the offset to be >= 8. */
21000     ZSTD_overlapCopy8(&op, &match, sequence.offset);
21001
21002     /* If the match length is > 8 bytes, then continue with the wildcopy. */
21003     if (sequence.matchLength > 8) {
21004         assert(op < oMatchEnd);
21005         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
21006     }
21007     return sequenceLength;
21008 }
21009
21010 HINT_INLINE
21011 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
21012 size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
21013     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
21014     const BYTE** litPtr, const BYTE* const litLimit,
21015     const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
21016 {
21017     BYTE* const oLitEnd = op + sequence.litLength;
21018     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
21019     BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
21020     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
21021     const BYTE* match = oLitEnd - sequence.offset;
21022
21023     assert(op != NULL /* Precondition */);
21024     assert(oend_w < oend /* No underflow */);
21025     /* Handle edge cases in a slow path:
21026      *   - Read beyond end of literals
21027      *   - Match end is within WILDCOPY_OVERLIMIT of oend
21028      *   - 32-bit mode and the match length overflows
21029      */
21030     if (UNLIKELY(
21031             iLitEnd > litLimit ||
21032             oMatchEnd > oend_w ||
21033             (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
21034         return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
21035
21036     /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
21037     assert(op <= oLitEnd /* No overflow */);
21038     assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
21039     assert(oMatchEnd <= oend /* No underflow */);
21040     assert(iLitEnd <= litLimit /* Literal length is in bounds */);
21041     assert(oLitEnd <= oend_w /* Can wildcopy literals */);
21042     assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
21043
21044     /* Copy Literals:
21045      * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
21046      * We likely don't need the full 32-byte wildcopy.
21047      */
21048     assert(WILDCOPY_OVERLENGTH >= 16);
21049     ZSTD_copy16(op, (*litPtr));
21050     if (UNLIKELY(sequence.litLength > 16)) {
21051         ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
21052     }
21053     op = oLitEnd;
21054     *litPtr = iLitEnd;   /* update for next sequence */
21055
21056     /* Copy Match */
21057     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
21058         /* offset beyond prefix -> go into extDict */
21059         RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
21060         match = dictEnd + (match - prefixStart);
21061         if (match + sequence.matchLength <= dictEnd) {
21062             ZSTD_memmove(oLitEnd, match, sequence.matchLength);
21063             return sequenceLength;
21064         }
21065         /* span extDict & currentPrefixSegment */
21066         {   size_t const length1 = dictEnd - match;
21067             ZSTD_memmove(oLitEnd, match, length1);
21068             op = oLitEnd + length1;
21069             sequence.matchLength -= length1;
21070             match = prefixStart;
21071     }   }
21072     /* Match within prefix of 1 or more bytes */
21073     assert(op <= oMatchEnd);
21074     assert(oMatchEnd <= oend_w);
21075     assert(match >= prefixStart);
21076     assert(sequence.matchLength >= 1);
21077
21078     /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
21079      * without overlap checking.
21080      */
21081     if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
21082         /* We bet on a full wildcopy for matches, since we expect matches to be
21083          * longer than literals (in general). In silesia, ~10% of matches are longer
21084          * than 16 bytes.
21085          */
21086         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
21087         return sequenceLength;
21088     }
21089     assert(sequence.offset < WILDCOPY_VECLEN);
21090
21091     /* Copy 8 bytes and spread the offset to be >= 8. */
21092     ZSTD_overlapCopy8(&op, &match, sequence.offset);
21093
21094     /* If the match length is > 8 bytes, then continue with the wildcopy. */
21095     if (sequence.matchLength > 8) {
21096         assert(op < oMatchEnd);
21097         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
21098     }
21099     return sequenceLength;
21100 }
21101
21102
21103 static void
21104 ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
21105 {
21106     const void* ptr = dt;
21107     const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
21108     DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
21109     DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
21110                 (U32)DStatePtr->state, DTableH->tableLog);
21111     BIT_reloadDStream(bitD);
21112     DStatePtr->table = dt + 1;
21113 }
21114
21115 FORCE_INLINE_TEMPLATE void
21116 ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
21117 {
21118     size_t const lowBits = BIT_readBits(bitD, nbBits);
21119     DStatePtr->state = nextState + lowBits;
21120 }
21121
21122 /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
21123  * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
21124  * bits before reloading. This value is the maximum number of bytes we read
21125  * after reloading when we are decoding long offsets.
21126  */
21127 #define LONG_OFFSETS_MAX_EXTRA_BITS_32                       \
21128     (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32       \
21129         ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32  \
21130         : 0)
21131
21132 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
21133
21134 /**
21135  * ZSTD_decodeSequence():
21136  * @p longOffsets : tells the decoder to reload more bit while decoding large offsets
21137  *                  only used in 32-bit mode
21138  * @return : Sequence (litL + matchL + offset)
21139  */
21140 FORCE_INLINE_TEMPLATE seq_t
21141 ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)
21142 {
21143     seq_t seq;
21144     /*
21145      * ZSTD_seqSymbol is a 64 bits wide structure.
21146      * It can be loaded in one operation
21147      * and its fields extracted by simply shifting or bit-extracting on aarch64.
21148      * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
21149      * operations that cause performance drop. This can be avoided by using this
21150      * ZSTD_memcpy hack.
21151      */
21152 #if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
21153     ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
21154     ZSTD_seqSymbol* const llDInfo = &llDInfoS;
21155     ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
21156     ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
21157     ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
21158     ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
21159     ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
21160 #else
21161     const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
21162     const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
21163     const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
21164 #endif
21165     seq.matchLength = mlDInfo->baseValue;
21166     seq.litLength = llDInfo->baseValue;
21167     {   U32 const ofBase = ofDInfo->baseValue;
21168         BYTE const llBits = llDInfo->nbAdditionalBits;
21169         BYTE const mlBits = mlDInfo->nbAdditionalBits;
21170         BYTE const ofBits = ofDInfo->nbAdditionalBits;
21171         BYTE const totalBits = llBits+mlBits+ofBits;
21172
21173         U16 const llNext = llDInfo->nextState;
21174         U16 const mlNext = mlDInfo->nextState;
21175         U16 const ofNext = ofDInfo->nextState;
21176         U32 const llnbBits = llDInfo->nbBits;
21177         U32 const mlnbBits = mlDInfo->nbBits;
21178         U32 const ofnbBits = ofDInfo->nbBits;
21179
21180         assert(llBits <= MaxLLBits);
21181         assert(mlBits <= MaxMLBits);
21182         assert(ofBits <= MaxOff);
21183         /*
21184          * As gcc has better branch and block analyzers, sometimes it is only
21185          * valuable to mark likeliness for clang, it gives around 3-4% of
21186          * performance.
21187          */
21188
21189         /* sequence */
21190         {   size_t offset;
21191             if (ofBits > 1) {
21192                 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
21193                 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
21194                 ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
21195                 ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
21196                 if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
21197                     /* Always read extra bits, this keeps the logic simple,
21198                      * avoids branches, and avoids accidentally reading 0 bits.
21199                      */
21200                     U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
21201                     offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
21202                     BIT_reloadDStream(&seqState->DStream);
21203                     offset += BIT_readBitsFast(&seqState->DStream, extraBits);
21204                 } else {
21205                     offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
21206                     if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
21207                 }
21208                 seqState->prevOffset[2] = seqState->prevOffset[1];
21209                 seqState->prevOffset[1] = seqState->prevOffset[0];
21210                 seqState->prevOffset[0] = offset;
21211             } else {
21212                 U32 const ll0 = (llDInfo->baseValue == 0);
21213                 if (LIKELY((ofBits == 0))) {
21214                     offset = seqState->prevOffset[ll0];
21215                     seqState->prevOffset[1] = seqState->prevOffset[!ll0];
21216                     seqState->prevOffset[0] = offset;
21217                 } else {
21218                     offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
21219                     {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
21220                         temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
21221                         if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
21222                         seqState->prevOffset[1] = seqState->prevOffset[0];
21223                         seqState->prevOffset[0] = offset = temp;
21224             }   }   }
21225             seq.offset = offset;
21226         }
21227
21228         if (mlBits > 0)
21229             seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
21230
21231         if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
21232             BIT_reloadDStream(&seqState->DStream);
21233         if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
21234             BIT_reloadDStream(&seqState->DStream);
21235         /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
21236         ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
21237
21238         if (llBits > 0)
21239             seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
21240
21241         if (MEM_32bits())
21242             BIT_reloadDStream(&seqState->DStream);
21243
21244         DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
21245                     (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
21246
21247         if (!isLastSeq) {
21248             /* don't update FSE state for last Sequence */
21249             ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */
21250             ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */
21251             if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
21252             ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */
21253             BIT_reloadDStream(&seqState->DStream);
21254         }
21255     }
21256
21257     return seq;
21258 }
21259
21260 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
21261 #if DEBUGLEVEL >= 1
21262 static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
21263 {
21264     size_t const windowSize = dctx->fParams.windowSize;
21265     /* No dictionary used. */
21266     if (dctx->dictContentEndForFuzzing == NULL) return 0;
21267     /* Dictionary is our prefix. */
21268     if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
21269     /* Dictionary is not our ext-dict. */
21270     if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
21271     /* Dictionary is not within our window size. */
21272     if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
21273     /* Dictionary is active. */
21274     return 1;
21275 }
21276 #endif
21277
21278 static void ZSTD_assertValidSequence(
21279         ZSTD_DCtx const* dctx,
21280         BYTE const* op, BYTE const* oend,
21281         seq_t const seq,
21282         BYTE const* prefixStart, BYTE const* virtualStart)
21283 {
21284 #if DEBUGLEVEL >= 1
21285     if (dctx->isFrameDecompression) {
21286         size_t const windowSize = dctx->fParams.windowSize;
21287         size_t const sequenceSize = seq.litLength + seq.matchLength;
21288         BYTE const* const oLitEnd = op + seq.litLength;
21289         DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
21290                 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
21291         assert(op <= oend);
21292         assert((size_t)(oend - op) >= sequenceSize);
21293         assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
21294         if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
21295             size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
21296             /* Offset must be within the dictionary. */
21297             assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
21298             assert(seq.offset <= windowSize + dictSize);
21299         } else {
21300             /* Offset must be within our window. */
21301             assert(seq.offset <= windowSize);
21302         }
21303     }
21304 #else
21305     (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
21306 #endif
21307 }
21308 #endif
21309
21310 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
21311
21312
21313 FORCE_INLINE_TEMPLATE size_t
21314 DONT_VECTORIZE
21315 ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
21316                                void* dst, size_t maxDstSize,
21317                          const void* seqStart, size_t seqSize, int nbSeq,
21318                          const ZSTD_longOffset_e isLongOffset)
21319 {
21320     const BYTE* ip = (const BYTE*)seqStart;
21321     const BYTE* const iend = ip + seqSize;
21322     BYTE* const ostart = (BYTE*)dst;
21323     BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
21324     BYTE* op = ostart;
21325     const BYTE* litPtr = dctx->litPtr;
21326     const BYTE* litBufferEnd = dctx->litBufferEnd;
21327     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
21328     const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
21329     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
21330     DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
21331
21332     /* Literals are split between internal buffer & output buffer */
21333     if (nbSeq) {
21334         seqState_t seqState;
21335         dctx->fseEntropy = 1;
21336         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
21337         RETURN_ERROR_IF(
21338             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
21339             corruption_detected, "");
21340         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
21341         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
21342         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
21343         assert(dst != NULL);
21344
21345         ZSTD_STATIC_ASSERT(
21346                 BIT_DStream_unfinished < BIT_DStream_completed &&
21347                 BIT_DStream_endOfBuffer < BIT_DStream_completed &&
21348                 BIT_DStream_completed < BIT_DStream_overflow);
21349
21350         /* decompress without overrunning litPtr begins */
21351         {   seq_t sequence = {0,0,0};  /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
21352             /* Align the decompression loop to 32 + 16 bytes.
21353                 *
21354                 * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
21355                 * speed swings based on the alignment of the decompression loop. This
21356                 * performance swing is caused by parts of the decompression loop falling
21357                 * out of the DSB. The entire decompression loop should fit in the DSB,
21358                 * when it can't we get much worse performance. You can measure if you've
21359                 * hit the good case or the bad case with this perf command for some
21360                 * compressed file test.zst:
21361                 *
21362                 *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
21363                 *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
21364                 *
21365                 * If you see most cycles served out of the MITE you've hit the bad case.
21366                 * If you see most cycles served out of the DSB you've hit the good case.
21367                 * If it is pretty even then you may be in an okay case.
21368                 *
21369                 * This issue has been reproduced on the following CPUs:
21370                 *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
21371                 *               Use Instruments->Counters to get DSB/MITE cycles.
21372                 *               I never got performance swings, but I was able to
21373                 *               go from the good case of mostly DSB to half of the
21374                 *               cycles served from MITE.
21375                 *   - Coffeelake: Intel i9-9900k
21376                 *   - Coffeelake: Intel i7-9700k
21377                 *
21378                 * I haven't been able to reproduce the instability or DSB misses on any
21379                 * of the following CPUS:
21380                 *   - Haswell
21381                 *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
21382                 *   - Skylake
21383                 *
21384                 * Alignment is done for each of the three major decompression loops:
21385                 *   - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
21386                 *   - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
21387                 *   - ZSTD_decompressSequences_body
21388                 * Alignment choices are made to minimize large swings on bad cases and influence on performance
21389                 * from changes external to this code, rather than to overoptimize on the current commit.
21390                 *
21391                 * If you are seeing performance stability this script can help test.
21392                 * It tests on 4 commits in zstd where I saw performance change.
21393                 *
21394                 *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
21395                 */
21396 #if defined(__GNUC__) && defined(__x86_64__)
21397             __asm__(".p2align 6");
21398 #  if __GNUC__ >= 7
21399             /* good for gcc-7, gcc-9, and gcc-11 */
21400             __asm__("nop");
21401             __asm__(".p2align 5");
21402             __asm__("nop");
21403             __asm__(".p2align 4");
21404 #    if __GNUC__ == 8 || __GNUC__ == 10
21405             /* good for gcc-8 and gcc-10 */
21406             __asm__("nop");
21407             __asm__(".p2align 3");
21408 #    endif
21409 #  endif
21410 #endif
21411
21412             /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
21413             for ( ; nbSeq; nbSeq--) {
21414                 sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
21415                 if (litPtr + sequence.litLength > dctx->litBufferEnd) break;
21416                 {   size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
21417 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
21418                     assert(!ZSTD_isError(oneSeqSize));
21419                     ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
21420 #endif
21421                     if (UNLIKELY(ZSTD_isError(oneSeqSize)))
21422                         return oneSeqSize;
21423                     DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
21424                     op += oneSeqSize;
21425             }   }
21426             DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
21427
21428             /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
21429             if (nbSeq > 0) {
21430                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
21431                 DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength);
21432                 if (leftoverLit) {
21433                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
21434                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
21435                     sequence.litLength -= leftoverLit;
21436                     op += leftoverLit;
21437                 }
21438                 litPtr = dctx->litExtraBuffer;
21439                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
21440                 dctx->litBufferLocation = ZSTD_not_in_dst;
21441                 {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
21442 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
21443                     assert(!ZSTD_isError(oneSeqSize));
21444                     ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
21445 #endif
21446                     if (UNLIKELY(ZSTD_isError(oneSeqSize)))
21447                         return oneSeqSize;
21448                     DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
21449                     op += oneSeqSize;
21450                 }
21451                 nbSeq--;
21452             }
21453         }
21454
21455         if (nbSeq > 0) {
21456             /* there is remaining lit from extra buffer */
21457
21458 #if defined(__GNUC__) && defined(__x86_64__)
21459             __asm__(".p2align 6");
21460             __asm__("nop");
21461 #  if __GNUC__ != 7
21462             /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
21463             __asm__(".p2align 4");
21464             __asm__("nop");
21465             __asm__(".p2align 3");
21466 #  elif __GNUC__ >= 11
21467             __asm__(".p2align 3");
21468 #  else
21469             __asm__(".p2align 5");
21470             __asm__("nop");
21471             __asm__(".p2align 3");
21472 #  endif
21473 #endif
21474
21475             for ( ; nbSeq ; nbSeq--) {
21476                 seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
21477                 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
21478 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
21479                 assert(!ZSTD_isError(oneSeqSize));
21480                 ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
21481 #endif
21482                 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
21483                     return oneSeqSize;
21484                 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
21485                 op += oneSeqSize;
21486             }
21487         }
21488
21489         /* check if reached exact end */
21490         DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
21491         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
21492         DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed);
21493         RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
21494         /* save reps for next block */
21495         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
21496     }
21497
21498     /* last literal segment */
21499     if (dctx->litBufferLocation == ZSTD_split) {
21500         /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
21501         size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
21502         DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);
21503         RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
21504         if (op != NULL) {
21505             ZSTD_memmove(op, litPtr, lastLLSize);
21506             op += lastLLSize;
21507         }
21508         litPtr = dctx->litExtraBuffer;
21509         litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
21510         dctx->litBufferLocation = ZSTD_not_in_dst;
21511     }
21512     /* copy last literals from internal buffer */
21513     {   size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
21514         DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);
21515         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
21516         if (op != NULL) {
21517             ZSTD_memcpy(op, litPtr, lastLLSize);
21518             op += lastLLSize;
21519     }   }
21520
21521     DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
21522     return (size_t)(op - ostart);
21523 }
21524
21525 FORCE_INLINE_TEMPLATE size_t
21526 DONT_VECTORIZE
21527 ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
21528     void* dst, size_t maxDstSize,
21529     const void* seqStart, size_t seqSize, int nbSeq,
21530     const ZSTD_longOffset_e isLongOffset)
21531 {
21532     const BYTE* ip = (const BYTE*)seqStart;
21533     const BYTE* const iend = ip + seqSize;
21534     BYTE* const ostart = (BYTE*)dst;
21535     BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer;
21536     BYTE* op = ostart;
21537     const BYTE* litPtr = dctx->litPtr;
21538     const BYTE* const litEnd = litPtr + dctx->litSize;
21539     const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
21540     const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
21541     const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
21542     DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
21543
21544     /* Regen sequences */
21545     if (nbSeq) {
21546         seqState_t seqState;
21547         dctx->fseEntropy = 1;
21548         { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
21549         RETURN_ERROR_IF(
21550             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
21551             corruption_detected, "");
21552         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
21553         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
21554         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
21555         assert(dst != NULL);
21556
21557 #if defined(__GNUC__) && defined(__x86_64__)
21558             __asm__(".p2align 6");
21559             __asm__("nop");
21560 #  if __GNUC__ >= 7
21561             __asm__(".p2align 5");
21562             __asm__("nop");
21563             __asm__(".p2align 3");
21564 #  else
21565             __asm__(".p2align 4");
21566             __asm__("nop");
21567             __asm__(".p2align 3");
21568 #  endif
21569 #endif
21570
21571         for ( ; nbSeq ; nbSeq--) {
21572             seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
21573             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
21574 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
21575             assert(!ZSTD_isError(oneSeqSize));
21576             ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
21577 #endif
21578             if (UNLIKELY(ZSTD_isError(oneSeqSize)))
21579                 return oneSeqSize;
21580             DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
21581             op += oneSeqSize;
21582         }
21583
21584         /* check if reached exact end */
21585         assert(nbSeq == 0);
21586         RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
21587         /* save reps for next block */
21588         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
21589     }
21590
21591     /* last literal segment */
21592     {   size_t const lastLLSize = (size_t)(litEnd - litPtr);
21593         DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);
21594         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
21595         if (op != NULL) {
21596             ZSTD_memcpy(op, litPtr, lastLLSize);
21597             op += lastLLSize;
21598     }   }
21599
21600     DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
21601     return (size_t)(op - ostart);
21602 }
21603
21604 static size_t
21605 ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
21606                                  void* dst, size_t maxDstSize,
21607                            const void* seqStart, size_t seqSize, int nbSeq,
21608                            const ZSTD_longOffset_e isLongOffset)
21609 {
21610     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
21611 }
21612
21613 static size_t
21614 ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
21615                                                void* dst, size_t maxDstSize,
21616                                          const void* seqStart, size_t seqSize, int nbSeq,
21617                                          const ZSTD_longOffset_e isLongOffset)
21618 {
21619     return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
21620 }
21621 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
21622
21623 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
21624
21625 FORCE_INLINE_TEMPLATE
21626
21627 size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
21628                    const BYTE* const prefixStart, const BYTE* const dictEnd)
21629 {
21630     prefetchPos += sequence.litLength;
21631     {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
21632         /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
21633          * No consequence though : memory address is only used for prefetching, not for dereferencing */
21634         const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);
21635         PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
21636     }
21637     return prefetchPos + sequence.matchLength;
21638 }
21639
21640 /* This decoding function employs prefetching
21641  * to reduce latency impact of cache misses.
21642  * It's generally employed when block contains a significant portion of long-distance matches
21643  * or when coupled with a "cold" dictionary */
21644 FORCE_INLINE_TEMPLATE size_t
21645 ZSTD_decompressSequencesLong_body(
21646                                ZSTD_DCtx* dctx,
21647                                void* dst, size_t maxDstSize,
21648                          const void* seqStart, size_t seqSize, int nbSeq,
21649                          const ZSTD_longOffset_e isLongOffset)
21650 {
21651     const BYTE* ip = (const BYTE*)seqStart;
21652     const BYTE* const iend = ip + seqSize;
21653     BYTE* const ostart = (BYTE*)dst;
21654     BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
21655     BYTE* op = ostart;
21656     const BYTE* litPtr = dctx->litPtr;
21657     const BYTE* litBufferEnd = dctx->litBufferEnd;
21658     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
21659     const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
21660     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
21661
21662     /* Regen sequences */
21663     if (nbSeq) {
21664 #define STORED_SEQS 8
21665 #define STORED_SEQS_MASK (STORED_SEQS-1)
21666 #define ADVANCED_SEQS STORED_SEQS
21667         seq_t sequences[STORED_SEQS];
21668         int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
21669         seqState_t seqState;
21670         int seqNb;
21671         size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
21672
21673         dctx->fseEntropy = 1;
21674         { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
21675         assert(dst != NULL);
21676         assert(iend >= ip);
21677         RETURN_ERROR_IF(
21678             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
21679             corruption_detected, "");
21680         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
21681         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
21682         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
21683
21684         /* prepare in advance */
21685         for (seqNb=0; seqNb<seqAdvance; seqNb++) {
21686             seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
21687             prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
21688             sequences[seqNb] = sequence;
21689         }
21690
21691         /* decompress without stomping litBuffer */
21692         for (; seqNb < nbSeq; seqNb++) {
21693             seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
21694
21695             if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {
21696                 /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
21697                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
21698                 if (leftoverLit)
21699                 {
21700                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
21701                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
21702                     sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
21703                     op += leftoverLit;
21704                 }
21705                 litPtr = dctx->litExtraBuffer;
21706                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
21707                 dctx->litBufferLocation = ZSTD_not_in_dst;
21708                 {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
21709 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
21710                     assert(!ZSTD_isError(oneSeqSize));
21711                     ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
21712 #endif
21713                     if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
21714
21715                     prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
21716                     sequences[seqNb & STORED_SEQS_MASK] = sequence;
21717                     op += oneSeqSize;
21718             }   }
21719             else
21720             {
21721                 /* lit buffer is either wholly contained in first or second split, or not split at all*/
21722                 size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
21723                     ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
21724                     ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
21725 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
21726                 assert(!ZSTD_isError(oneSeqSize));
21727                 ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
21728 #endif
21729                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
21730
21731                 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
21732                 sequences[seqNb & STORED_SEQS_MASK] = sequence;
21733                 op += oneSeqSize;
21734             }
21735         }
21736         RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
21737
21738         /* finish queue */
21739         seqNb -= seqAdvance;
21740         for ( ; seqNb<nbSeq ; seqNb++) {
21741             seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
21742             if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {
21743                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
21744                 if (leftoverLit) {
21745                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
21746                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
21747                     sequence->litLength -= leftoverLit;
21748                     op += leftoverLit;
21749                 }
21750                 litPtr = dctx->litExtraBuffer;
21751                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
21752                 dctx->litBufferLocation = ZSTD_not_in_dst;
21753                 {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
21754 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
21755                     assert(!ZSTD_isError(oneSeqSize));
21756                     ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
21757 #endif
21758                     if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
21759                     op += oneSeqSize;
21760                 }
21761             }
21762             else
21763             {
21764                 size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
21765                     ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
21766                     ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
21767 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
21768                 assert(!ZSTD_isError(oneSeqSize));
21769                 ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
21770 #endif
21771                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
21772                 op += oneSeqSize;
21773             }
21774         }
21775
21776         /* save reps for next block */
21777         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
21778     }
21779
21780     /* last literal segment */
21781     if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */
21782         size_t const lastLLSize = litBufferEnd - litPtr;
21783         RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
21784         if (op != NULL) {
21785             ZSTD_memmove(op, litPtr, lastLLSize);
21786             op += lastLLSize;
21787         }
21788         litPtr = dctx->litExtraBuffer;
21789         litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
21790     }
21791     {   size_t const lastLLSize = litBufferEnd - litPtr;
21792         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
21793         if (op != NULL) {
21794             ZSTD_memmove(op, litPtr, lastLLSize);
21795             op += lastLLSize;
21796         }
21797     }
21798
21799     return (size_t)(op - ostart);
21800 }
21801
21802 static size_t
21803 ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
21804                                  void* dst, size_t maxDstSize,
21805                            const void* seqStart, size_t seqSize, int nbSeq,
21806                            const ZSTD_longOffset_e isLongOffset)
21807 {
21808     return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
21809 }
21810 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
21811
21812
21813
21814 #if DYNAMIC_BMI2
21815
21816 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
21817 static BMI2_TARGET_ATTRIBUTE size_t
21818 DONT_VECTORIZE
21819 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
21820                                  void* dst, size_t maxDstSize,
21821                            const void* seqStart, size_t seqSize, int nbSeq,
21822                            const ZSTD_longOffset_e isLongOffset)
21823 {
21824     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
21825 }
21826 static BMI2_TARGET_ATTRIBUTE size_t
21827 DONT_VECTORIZE
21828 ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
21829                                  void* dst, size_t maxDstSize,
21830                            const void* seqStart, size_t seqSize, int nbSeq,
21831                            const ZSTD_longOffset_e isLongOffset)
21832 {
21833     return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
21834 }
21835 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
21836
21837 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
21838 static BMI2_TARGET_ATTRIBUTE size_t
21839 ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
21840                                  void* dst, size_t maxDstSize,
21841                            const void* seqStart, size_t seqSize, int nbSeq,
21842                            const ZSTD_longOffset_e isLongOffset)
21843 {
21844     return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
21845 }
21846 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
21847
21848 #endif /* DYNAMIC_BMI2 */
21849
21850 typedef size_t (*ZSTD_decompressSequences_t)(
21851                             ZSTD_DCtx* dctx,
21852                             void* dst, size_t maxDstSize,
21853                             const void* seqStart, size_t seqSize, int nbSeq,
21854                             const ZSTD_longOffset_e isLongOffset);
21855
21856 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
21857 static size_t
21858 ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
21859                    const void* seqStart, size_t seqSize, int nbSeq,
21860                    const ZSTD_longOffset_e isLongOffset)
21861 {
21862     DEBUGLOG(5, "ZSTD_decompressSequences");
21863 #if DYNAMIC_BMI2
21864     if (ZSTD_DCtx_get_bmi2(dctx)) {
21865         return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
21866     }
21867 #endif
21868     return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
21869 }
21870 static size_t
21871 ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
21872                                  const void* seqStart, size_t seqSize, int nbSeq,
21873                                  const ZSTD_longOffset_e isLongOffset)
21874 {
21875     DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
21876 #if DYNAMIC_BMI2
21877     if (ZSTD_DCtx_get_bmi2(dctx)) {
21878         return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
21879     }
21880 #endif
21881     return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
21882 }
21883 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
21884
21885
21886 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
21887 /* ZSTD_decompressSequencesLong() :
21888  * decompression function triggered when a minimum share of offsets is considered "long",
21889  * aka out of cache.
21890  * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
21891  * This function will try to mitigate main memory latency through the use of prefetching */
21892 static size_t
21893 ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
21894                              void* dst, size_t maxDstSize,
21895                              const void* seqStart, size_t seqSize, int nbSeq,
21896                              const ZSTD_longOffset_e isLongOffset)
21897 {
21898     DEBUGLOG(5, "ZSTD_decompressSequencesLong");
21899 #if DYNAMIC_BMI2
21900     if (ZSTD_DCtx_get_bmi2(dctx)) {
21901         return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
21902     }
21903 #endif
21904   return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
21905 }
21906 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
21907
21908
21909 /**
21910  * @returns The total size of the history referenceable by zstd, including
21911  * both the prefix and the extDict. At @p op any offset larger than this
21912  * is invalid.
21913  */
21914 static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
21915 {
21916     return (size_t)(op - virtualStart);
21917 }
21918
21919 typedef struct {
21920     unsigned longOffsetShare;
21921     unsigned maxNbAdditionalBits;
21922 } ZSTD_OffsetInfo;
21923
21924 /* ZSTD_getOffsetInfo() :
21925  * condition : offTable must be valid
21926  * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
21927  *           compared to maximum possible of (1<<OffFSELog),
21928  *           as well as the maximum number additional bits required.
21929  */
21930 static ZSTD_OffsetInfo
21931 ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
21932 {
21933     ZSTD_OffsetInfo info = {0, 0};
21934     /* If nbSeq == 0, then the offTable is uninitialized, but we have
21935      * no sequences, so both values should be 0.
21936      */
21937     if (nbSeq != 0) {
21938         const void* ptr = offTable;
21939         U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
21940         const ZSTD_seqSymbol* table = offTable + 1;
21941         U32 const max = 1 << tableLog;
21942         U32 u;
21943         DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
21944
21945         assert(max <= (1 << OffFSELog));  /* max not too large */
21946         for (u=0; u<max; u++) {
21947             info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
21948             if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
21949         }
21950
21951         assert(tableLog <= OffFSELog);
21952         info.longOffsetShare <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
21953     }
21954
21955     return info;
21956 }
21957
21958 /**
21959  * @returns The maximum offset we can decode in one read of our bitstream, without
21960  * reloading more bits in the middle of the offset bits read. Any offsets larger
21961  * than this must use the long offset decoder.
21962  */
21963 static size_t ZSTD_maxShortOffset(void)
21964 {
21965     if (MEM_64bits()) {
21966         /* We can decode any offset without reloading bits.
21967          * This might change if the max window size grows.
21968          */
21969         ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
21970         return (size_t)-1;
21971     } else {
21972         /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
21973          * This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
21974          * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
21975          */
21976         size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
21977         size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
21978         assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
21979         return maxOffset;
21980     }
21981 }
21982
21983 size_t
21984 ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
21985                               void* dst, size_t dstCapacity,
21986                         const void* src, size_t srcSize, const streaming_operation streaming)
21987 {   /* blockType == blockCompressed */
21988     const BYTE* ip = (const BYTE*)src;
21989     DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
21990
21991     /* Note : the wording of the specification
21992      * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
21993      * This generally does not happen, as it makes little sense,
21994      * since an uncompressed block would feature same size and have no decompression cost.
21995      * Also, note that decoder from reference libzstd before < v1.5.4
21996      * would consider this edge case as an error.
21997      * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
21998      * for broader compatibility with the deployed ecosystem of zstd decoders */
21999     RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
22000
22001     /* Decode literals section */
22002     {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
22003         DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
22004         if (ZSTD_isError(litCSize)) return litCSize;
22005         ip += litCSize;
22006         srcSize -= litCSize;
22007     }
22008
22009     /* Build Decoding Tables */
22010     {
22011         /* Compute the maximum block size, which must also work when !frame and fParams are unset.
22012          * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
22013          */
22014         size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
22015         size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart);
22016         /* isLongOffset must be true if there are long offsets.
22017          * Offsets are long if they are larger than ZSTD_maxShortOffset().
22018          * We don't expect that to be the case in 64-bit mode.
22019          *
22020          * We check here to see if our history is large enough to allow long offsets.
22021          * If it isn't, then we can't possible have (valid) long offsets. If the offset
22022          * is invalid, then it is okay to read it incorrectly.
22023          *
22024          * If isLongOffsets is true, then we will later check our decoding table to see
22025          * if it is even possible to generate long offsets.
22026          */
22027         ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
22028         /* These macros control at build-time which decompressor implementation
22029          * we use. If neither is defined, we do some inspection and dispatch at
22030          * runtime.
22031          */
22032 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
22033     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
22034         int usePrefetchDecoder = dctx->ddictIsCold;
22035 #else
22036         /* Set to 1 to avoid computing offset info if we don't need to.
22037          * Otherwise this value is ignored.
22038          */
22039         int usePrefetchDecoder = 1;
22040 #endif
22041         int nbSeq;
22042         size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
22043         if (ZSTD_isError(seqHSize)) return seqHSize;
22044         ip += seqHSize;
22045         srcSize -= seqHSize;
22046
22047         RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
22048         RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
22049                 "invalid dst");
22050
22051         /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
22052          * compute information about the share of long offsets, and the maximum nbAdditionalBits.
22053          * NOTE: could probably use a larger nbSeq limit
22054          */
22055         if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
22056             ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
22057             if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
22058                 /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
22059                  * enough, then we know it is impossible to have too long an offset in this block, so we can
22060                  * use the regular offset decoder.
22061                  */
22062                 isLongOffset = ZSTD_lo_isRegularOffset;
22063             }
22064             if (!usePrefetchDecoder) {
22065                 U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
22066                 usePrefetchDecoder = (info.longOffsetShare >= minShare);
22067             }
22068         }
22069
22070         dctx->ddictIsCold = 0;
22071
22072 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
22073     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
22074         if (usePrefetchDecoder) {
22075 #else
22076         (void)usePrefetchDecoder;
22077         {
22078 #endif
22079 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
22080             return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
22081 #endif
22082         }
22083
22084 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
22085         /* else */
22086         if (dctx->litBufferLocation == ZSTD_split)
22087             return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
22088         else
22089             return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
22090 #endif
22091     }
22092 }
22093
22094
22095 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
22096 void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
22097 {
22098     if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
22099         dctx->dictEnd = dctx->previousDstEnd;
22100         dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
22101         dctx->prefixStart = dst;
22102         dctx->previousDstEnd = dst;
22103     }
22104 }
22105
22106
22107 size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
22108                                        void* dst, size_t dstCapacity,
22109                                  const void* src, size_t srcSize)
22110 {
22111     size_t dSize;
22112     dctx->isFrameDecompression = 0;
22113     ZSTD_checkContinuity(dctx, dst, dstCapacity);
22114     dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming);
22115     FORWARD_IF_ERROR(dSize, "");
22116     dctx->previousDstEnd = (char*)dst + dSize;
22117     return dSize;
22118 }
22119
22120
22121 /* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
22122 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
22123                             void* dst, size_t dstCapacity,
22124                       const void* src, size_t srcSize)
22125 {
22126     return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
22127 }
22128 /**** ended inlining decompress/zstd_decompress_block.c ****/