common/lz4/lz4hc.c

   1 /*
   2     LZ4 HC - High Compression Mode of LZ4
   3     Copyright (C) 2011-2017, Yann Collet.
   4
   5     BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
   6
   7     Redistribution and use in source and binary forms, with or without
   8     modification, are permitted provided that the following conditions are
   9     met:
  10
  11     * Redistributions of source code must retain the above copyright
  12     notice, this list of conditions and the following disclaimer.
  13     * Redistributions in binary form must reproduce the above
  14     copyright notice, this list of conditions and the following disclaimer
  15     in the documentation and/or other materials provided with the
  16     distribution.
  17
  18     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22     OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30     You can contact the author at :
  31        - LZ4 source repository : https://github.com/lz4/lz4
  32        - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
  33 */
  34 /* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
  35
  36
  37 /* *************************************
  38 *  Tuning Parameter
  39 ***************************************/
  40
  41 /*! HEAPMODE :
  42  *  Select how default compression function will allocate workplace memory,
  43  *  in stack (0:fastest), or in heap (1:requires malloc()).
  44  *  Since workplace is rather large, heap mode is recommended.
  45  */
  46 #ifndef LZ4HC_HEAPMODE
  47 #  define LZ4HC_HEAPMODE 1
  48 #endif
  49
  50
  51 /*===    Dependency    ===*/
  52 #define LZ4_HC_STATIC_LINKING_ONLY
  53 #include "lz4hc.h"
  54
  55
  56 /*===   Common LZ4 definitions   ===*/
  57 #if defined(__GNUC__)
  58 #  pragma GCC diagnostic ignored "-Wunused-function"
  59 #endif
  60 #if defined (__clang__)
  61 #  pragma clang diagnostic ignored "-Wunused-function"
  62 #endif
  63
  64 /*===   Enums   ===*/
  65 typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
  66
  67
  68 #define LZ4_COMMONDEFS_ONLY
  69 #ifndef LZ4_SRC_INCLUDED
  70 #include "lz4.c"   /* LZ4_count, constants, mem */
  71 #endif
  72
  73 /*===   Constants   ===*/
  74 #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
  75 #define LZ4_OPT_NUM   (1<<12)
  76
  77
  78 /*===   Macros   ===*/
  79 #define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
  80 #define MAX(a,b)   ( (a) > (b) ? (a) : (b) )
  81 #define HASH_FUNCTION(i)         (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
  82 #define DELTANEXTMAXD(p)         chainTable[(p) & LZ4HC_MAXD_MASK]    /* flexible, LZ4HC_MAXD dependent */
  83 #define DELTANEXTU16(table, pos) table[(U16)(pos)]   /* faster */
  84 /* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */
  85 #define UPDATABLE(ip, op, anchor) &ip, &op, &anchor
  86
  87 static U32 LZ4HC_hashPtr(const void *ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
  88
  89
  90 /**************************************
  91 *  HC Compression
  92 **************************************/
  93 static void LZ4HC_clearTables(LZ4HC_CCtx_internal *hc4) {
  94     MEM_INIT((void *)hc4->hashTable, 0, sizeof(hc4->hashTable));
  95     MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
  96 }
  97
  98 static void LZ4HC_init_internal(LZ4HC_CCtx_internal *hc4, const BYTE *start) {
  99     uptrval startingOffset = (uptrval)(hc4->end - hc4->base);
 100     if (startingOffset > 1 GB) {
 101         LZ4HC_clearTables(hc4);
 102         startingOffset = 0;
 103     }
 104     startingOffset += 64 KB;
 105     hc4->nextToUpdate = (U32) startingOffset;
 106     hc4->base = start - startingOffset;
 107     hc4->end = start;
 108     hc4->dictBase = start - startingOffset;
 109     hc4->dictLimit = (U32) startingOffset;
 110     hc4->lowLimit = (U32) startingOffset;
 111 }
 112
 113
 114 /* Update chains up to ip (excluded) */
 115 LZ4_FORCE_INLINE void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4, const BYTE *ip) {
 116     U16 *const chainTable = hc4->chainTable;
 117     U32 *const hashTable  = hc4->hashTable;
 118     const BYTE *const base = hc4->base;
 119     U32 const target = (U32)(ip - base);
 120     U32 idx = hc4->nextToUpdate;
 121
 122     while (idx < target) {
 123         U32 const h = LZ4HC_hashPtr(base + idx);
 124         size_t delta = idx - hashTable[h];
 125         if (delta > LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX;
 126         DELTANEXTU16(chainTable, idx) = (U16)delta;
 127         hashTable[h] = idx;
 128         idx++;
 129     }
 130
 131     hc4->nextToUpdate = target;
 132 }
 133
 134 /** LZ4HC_countBack() :
 135  * @return : negative value, nb of common bytes before ip/match */
 136 LZ4_FORCE_INLINE
 137 int LZ4HC_countBack(const BYTE *const ip, const BYTE *const match,
 138                     const BYTE *const iMin, const BYTE *const mMin) {
 139     int back = 0;
 140     int const min = (int)MAX(iMin - ip, mMin - match);
 141     assert(min <= 0);
 142     assert(ip >= iMin);
 143     assert((size_t)(ip - iMin) < (1U << 31));
 144     assert(match >= mMin);
 145     assert((size_t)(match - mMin) < (1U << 31));
 146     while ((back > min)
 147             && (ip[back - 1] == match[back - 1]))
 148         back--;
 149     return back;
 150 }
 151
 152 #if defined(_MSC_VER)
 153 #  define LZ4HC_rotl32(x,r) _rotl(x,r)
 154 #else
 155 #  define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r)))
 156 #endif
 157
 158
 159 static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern) {
 160     size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3;
 161     if (bitsToRotate == 0)
 162         return pattern;
 163     return LZ4HC_rotl32(pattern, (int)bitsToRotate);
 164 }
 165
 166 /* LZ4HC_countPattern() :
 167  * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
 168 static unsigned
 169 LZ4HC_countPattern(const BYTE *ip, const BYTE *const iEnd, U32 const pattern32) {
 170     const BYTE *const iStart = ip;
 171     reg_t const pattern = (sizeof(pattern) == 8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32;
 172
 173     while (likely(ip < iEnd - (sizeof(pattern) - 1))) {
 174         reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
 175         if (!diff) { ip += sizeof(pattern); continue; }
 176         ip += LZ4_NbCommonBytes(diff);
 177         return (unsigned)(ip - iStart);
 178     }
 179
 180     if (LZ4_isLittleEndian()) {
 181         reg_t patternByte = pattern;
 182         while ((ip < iEnd) && (*ip == (BYTE)patternByte)) {
 183             ip++;
 184             patternByte >>= 8;
 185         }
 186     } else {  /* big endian */
 187         U32 bitOffset = (sizeof(pattern) * 8) - 8;
 188         while (ip < iEnd) {
 189             BYTE const byte = (BYTE)(pattern >> bitOffset);
 190             if (*ip != byte) break;
 191             ip ++;
 192             bitOffset -= 8;
 193         }
 194     }
 195
 196     return (unsigned)(ip - iStart);
 197 }
 198
 199 /* LZ4HC_reverseCountPattern() :
 200  * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!)
 201  * read using natural platform endianess */
 202 static unsigned
 203 LZ4HC_reverseCountPattern(const BYTE *ip, const BYTE *const iLow, U32 pattern) {
 204     const BYTE *const iStart = ip;
 205
 206     while (likely(ip >= iLow + 4)) {
 207         if (LZ4_read32(ip - 4) != pattern) break;
 208         ip -= 4;
 209     }
 210     {
 211         const BYTE *bytePtr = (const BYTE *)(&pattern) + 3; /* works for any endianess */
 212         while (likely(ip > iLow)) {
 213             if (ip[-1] != *bytePtr) break;
 214             ip--;
 215             bytePtr--;
 216         }
 217     }
 218     return (unsigned)(iStart - ip);
 219 }
 220
 221 /* LZ4HC_protectDictEnd() :
 222  * Checks if the match is in the last 3 bytes of the dictionary, so reading the
 223  * 4 byte MINMATCH would overflow.
 224  * @returns true if the match index is okay.
 225  */
 226 static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex) {
 227     return ((U32)((dictLimit - 1) - matchIndex) >= 3);
 228 }
 229
 230 typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
 231 typedef enum { favorCompressionRatio = 0, favorDecompressionSpeed } HCfavor_e;
 232
 233 LZ4_FORCE_INLINE int
 234 LZ4HC_InsertAndGetWiderMatch(
 235     LZ4HC_CCtx_internal *hc4,
 236     const BYTE *const ip,
 237     const BYTE *const iLowLimit,
 238     const BYTE *const iHighLimit,
 239     int longest,
 240     const BYTE **matchpos,
 241     const BYTE **startpos,
 242     const int maxNbAttempts,
 243     const int patternAnalysis,
 244     const int chainSwap,
 245     const dictCtx_directive dict,
 246     const HCfavor_e favorDecSpeed) {
 247     U16 *const chainTable = hc4->chainTable;
 248     U32 *const HashTable = hc4->hashTable;
 249     const LZ4HC_CCtx_internal *const dictCtx = hc4->dictCtx;
 250     const BYTE *const base = hc4->base;
 251     const U32 dictLimit = hc4->dictLimit;
 252     const BYTE *const lowPrefixPtr = base + dictLimit;
 253     const U32 ipIndex = (U32)(ip - base);
 254     const U32 lowestMatchIndex = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
 255     const BYTE *const dictBase = hc4->dictBase;
 256     int const lookBackLength = (int)(ip - iLowLimit);
 257     int nbAttempts = maxNbAttempts;
 258     U32 matchChainPos = 0;
 259     U32 const pattern = LZ4_read32(ip);
 260     U32 matchIndex;
 261     repeat_state_e repeat = rep_untested;
 262     size_t srcPatternLength = 0;
 263
 264     DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
 265     /* First Match */
 266     LZ4HC_Insert(hc4, ip);
 267     matchIndex = HashTable[LZ4HC_hashPtr(ip)];
 268     DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
 269              matchIndex, lowestMatchIndex);
 270
 271     while ((matchIndex >= lowestMatchIndex) && (nbAttempts > 0)) {
 272         int matchLength = 0;
 273         nbAttempts--;
 274         assert(matchIndex < ipIndex);
 275         if (favorDecSpeed && (ipIndex - matchIndex < 8)) {
 276             /* do nothing */
 277         } else if (matchIndex >= dictLimit) {   /* within current Prefix */
 278             const BYTE *const matchPtr = base + matchIndex;
 279             assert(matchPtr >= lowPrefixPtr);
 280             assert(matchPtr < ip);
 281             assert(longest >= 1);
 282             if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) {
 283                 if (LZ4_read32(matchPtr) == pattern) {
 284                     int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0;
 285                     matchLength = MINMATCH + (int)LZ4_count(ip + MINMATCH, matchPtr + MINMATCH, iHighLimit);
 286                     matchLength -= back;
 287                     if (matchLength > longest) {
 288                         longest = matchLength;
 289                         *matchpos = matchPtr + back;
 290                         *startpos = ip + back;
 291                     }
 292                 }
 293             }
 294         } else {   /* lowestMatchIndex <= matchIndex < dictLimit */
 295             const BYTE *const matchPtr = dictBase + matchIndex;
 296             if (LZ4_read32(matchPtr) == pattern) {
 297                 const BYTE *const dictStart = dictBase + hc4->lowLimit;
 298                 int back = 0;
 299                 const BYTE *vLimit = ip + (dictLimit - matchIndex);
 300                 if (vLimit > iHighLimit) vLimit = iHighLimit;
 301                 matchLength = (int)LZ4_count(ip + MINMATCH, matchPtr + MINMATCH, vLimit) + MINMATCH;
 302                 if ((ip + matchLength == vLimit) && (vLimit < iHighLimit))
 303                     matchLength += LZ4_count(ip + matchLength, lowPrefixPtr, iHighLimit);
 304                 back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0;
 305                 matchLength -= back;
 306                 if (matchLength > longest) {
 307                     longest = matchLength;
 308                     *matchpos = base + matchIndex + back;   /* virtual pos, relative to ip, to retrieve offset */
 309                     *startpos = ip + back;
 310                 }
 311             }
 312         }
 313
 314         if (chainSwap && matchLength == longest) {  /* better match => select a better chain */
 315             assert(lookBackLength == 0); /* search forward only */
 316             if (matchIndex + (U32)longest <= ipIndex) {
 317                 int const kTrigger = 4;
 318                 U32 distanceToNextMatch = 1;
 319                 int const end = longest - MINMATCH + 1;
 320                 int step = 1;
 321                 int accel = 1 << kTrigger;
 322                 int pos;
 323                 for (pos = 0; pos < end; pos += step) {
 324                     U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos);
 325                     step = (accel++ >> kTrigger);
 326                     if (candidateDist > distanceToNextMatch) {
 327                         distanceToNextMatch = candidateDist;
 328                         matchChainPos = (U32)pos;
 329                         accel = 1 << kTrigger;
 330                     }
 331                 }
 332                 if (distanceToNextMatch > 1) {
 333                     if (distanceToNextMatch > matchIndex) break;   /* avoid overflow */
 334                     matchIndex -= distanceToNextMatch;
 335                     continue;
 336                 }
 337             }
 338         }
 339
 340         {
 341             U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex);
 342             if (patternAnalysis && distNextMatch == 1 && matchChainPos == 0) {
 343                 U32 const matchCandidateIdx = matchIndex - 1;
 344                 /* may be a repeated pattern */
 345                 if (repeat == rep_untested) {
 346                     if (((pattern & 0xFFFF) == (pattern >> 16))
 347                             & ((pattern & 0xFF)   == (pattern >> 24))) {
 348                         repeat = rep_confirmed;
 349                         srcPatternLength = LZ4HC_countPattern(ip + sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
 350                     } else {
 351                         repeat = rep_not;
 352                     }
 353                 }
 354                 if ((repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex)
 355                         && LZ4HC_protectDictEnd(dictLimit, matchCandidateIdx)) {
 356                     const int extDict = matchCandidateIdx < dictLimit;
 357                     const BYTE *const matchPtr = (extDict ? dictBase : base) + matchCandidateIdx;
 358                     if (LZ4_read32(matchPtr) == pattern) {  /* good candidate */
 359                         const BYTE *const dictStart = dictBase + hc4->lowLimit;
 360                         const BYTE *const iLimit = extDict ? dictBase + dictLimit : iHighLimit;
 361                         size_t forwardPatternLength = LZ4HC_countPattern(matchPtr + sizeof(pattern), iLimit, pattern) + sizeof(pattern);
 362                         if (extDict && matchPtr + forwardPatternLength == iLimit) {
 363                             U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern);
 364                             forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern);
 365                         }
 366                         {
 367                             const BYTE *const lowestMatchPtr = extDict ? dictStart : lowPrefixPtr;
 368                             size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
 369                             size_t currentSegmentLength;
 370                             if (!extDict && matchPtr - backLength == lowPrefixPtr && hc4->lowLimit < dictLimit) {
 371                                 U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern);
 372                                 backLength += LZ4HC_reverseCountPattern(dictBase + dictLimit, dictStart, rotatedPattern);
 373                             }
 374                             /* Limit backLength not go further than lowestMatchIndex */
 375                             backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex);
 376                             assert(matchCandidateIdx - backLength >= lowestMatchIndex);
 377                             currentSegmentLength = backLength + forwardPatternLength;
 378                             /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */
 379                             if ((currentSegmentLength >= srcPatternLength)    /* current pattern segment large enough to contain full srcPatternLength */
 380                                     && (forwardPatternLength <= srcPatternLength)) {  /* haven't reached this position yet */
 381                                 U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength;  /* best position, full pattern, might be followed by more match */
 382                                 if (LZ4HC_protectDictEnd(dictLimit, newMatchIndex))
 383                                     matchIndex = newMatchIndex;
 384                                 else {
 385                                     /* Can only happen if started in the prefix */
 386                                     assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
 387                                     matchIndex = dictLimit;
 388                                 }
 389                             } else {
 390                                 U32 const newMatchIndex = matchCandidateIdx - (U32)backLength;   /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
 391                                 if (!LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) {
 392                                     assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
 393                                     matchIndex = dictLimit;
 394                                 } else {
 395                                     matchIndex = newMatchIndex;
 396                                     if (lookBackLength == 0) { /* no back possible */
 397                                         size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
 398                                         if ((size_t)longest < maxML) {
 399                                             assert(base + matchIndex != ip);
 400                                             if ((size_t)(ip - base) - matchIndex > LZ4_DISTANCE_MAX) break;
 401                                             assert(maxML < 2 GB);
 402                                             longest = (int)maxML;
 403                                             *matchpos = base + matchIndex;   /* virtual pos, relative to ip, to retrieve offset */
 404                                             *startpos = ip;
 405                                         }
 406                                         {
 407                                             U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
 408                                             if (distToNextPattern > matchIndex) break;  /* avoid overflow */
 409                                             matchIndex -= distToNextPattern;
 410                                         }
 411                                     }
 412                                 }
 413                             }
 414                         }
 415                         continue;
 416                     }
 417                 }
 418             }
 419         }   /* PA optimization */
 420
 421         /* follow current chain */
 422         matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos);
 423
 424     }  /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
 425
 426     if (dict == usingDictCtxHc
 427             && nbAttempts > 0
 428             && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) {
 429         size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->base);
 430         U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
 431         assert(dictEndOffset <= 1 GB);
 432         matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
 433         while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
 434             const BYTE *const matchPtr = dictCtx->base + dictMatchIndex;
 435
 436             if (LZ4_read32(matchPtr) == pattern) {
 437                 int mlt;
 438                 int back = 0;
 439                 const BYTE *vLimit = ip + (dictEndOffset - dictMatchIndex);
 440                 if (vLimit > iHighLimit) vLimit = iHighLimit;
 441                 mlt = (int)LZ4_count(ip + MINMATCH, matchPtr + MINMATCH, vLimit) + MINMATCH;
 442                 back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0;
 443                 mlt -= back;
 444                 if (mlt > longest) {
 445                     longest = mlt;
 446                     *matchpos = base + matchIndex + back;
 447                     *startpos = ip + back;
 448                 }
 449             }
 450
 451             {
 452                 U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex);
 453                 dictMatchIndex -= nextOffset;
 454                 matchIndex -= nextOffset;
 455             }
 456         }
 457     }
 458
 459     return longest;
 460 }
 461
 462 LZ4_FORCE_INLINE
 463 int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal *const hc4,   /* Index table will be updated */
 464                                  const BYTE *const ip, const BYTE *const iLimit,
 465                                  const BYTE **matchpos,
 466                                  const int maxNbAttempts,
 467                                  const int patternAnalysis,
 468                                  const dictCtx_directive dict) {
 469     const BYTE *uselessPtr = ip;
 470     /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
 471      * but this won't be the case here, as we define iLowLimit==ip,
 472      * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
 473     return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH - 1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio);
 474 }
 475
 476 /* LZ4HC_encodeSequence() :
 477  * @return : 0 if ok,
 478  *           1 if buffer issue detected */
 479 LZ4_FORCE_INLINE int LZ4HC_encodeSequence(
 480     const BYTE **ip,
 481     BYTE **op,
 482     const BYTE **anchor,
 483     int matchLength,
 484     const BYTE *const match,
 485     limitedOutput_directive limit,
 486     BYTE *oend) {
 487     size_t length;
 488     BYTE *const token = (*op)++;
 489
 490 #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
 491     static const BYTE *start = NULL;
 492     static U32 totalCost = 0;
 493     U32 const pos = (start == NULL) ? 0 : (U32)(*anchor - start);
 494     U32 const ll = (U32)(*ip - *anchor);
 495     U32 const llAdd = (ll >= 15) ? ((ll - 15) / 255) + 1 : 0;
 496     U32 const mlAdd = (matchLength >= 19) ? ((matchLength - 19) / 255) + 1 : 0;
 497     U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
 498     if (start == NULL) start = *anchor; /* only works for single segment */
 499     /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
 500     DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u",
 501              pos,
 502              (U32)(*ip - *anchor), matchLength, (U32)(*ip - match),
 503              cost, totalCost);
 504     totalCost += cost;
 505 #endif
 506
 507     /* Encode Literal length */
 508     length = (size_t)(*ip - *anchor);
 509     LZ4_STATIC_ASSERT(notLimited == 0);
 510     /* Check output limit */
 511     if (limit && ((*op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
 512         DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
 513                  (int)length, (int)(oend - *op));
 514         return 1;
 515     }
 516     if (length >= RUN_MASK) {
 517         size_t len = length - RUN_MASK;
 518         *token = (RUN_MASK << ML_BITS);
 519         for (; len >= 255 ; len -= 255) * (*op)++ = 255;
 520         *(*op)++ = (BYTE)len;
 521     } else {
 522         *token = (BYTE)(length << ML_BITS);
 523     }
 524
 525     /* Copy Literals */
 526     LZ4_wildCopy8(*op, *anchor, (*op) + length);
 527     *op += length;
 528
 529     /* Encode Offset */
 530     assert((*ip - match) <= LZ4_DISTANCE_MAX);     /* note : consider providing offset as a value, rather than as a pointer difference */
 531     LZ4_writeLE16(*op, (U16)(*ip - match));
 532     *op += 2;
 533
 534     /* Encode MatchLength */
 535     assert(matchLength >= MINMATCH);
 536     length = (size_t)matchLength - MINMATCH;
 537     if (limit && (*op + (length / 255) + (1 + LASTLITERALS) > oend)) {
 538         DEBUGLOG(6, "Not enough room to write match length");
 539         return 1;   /* Check output limit */
 540     }
 541     if (length >= ML_MASK) {
 542         *token += ML_MASK;
 543         length -= ML_MASK;
 544         for (; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; }
 545         if (length >= 255) { length -= 255; *(*op)++ = 255; }
 546         *(*op)++ = (BYTE)length;
 547     } else {
 548         *token += (BYTE)(length);
 549     }
 550
 551     /* Prepare next loop */
 552     *ip += matchLength;
 553     *anchor = *ip;
 554
 555     return 0;
 556 }
 557
 558 LZ4_FORCE_INLINE int LZ4HC_compress_hashChain(
 559     LZ4HC_CCtx_internal *const ctx,
 560     const char *const src,
 561     char *const dst,
 562     int *srcSizePtr,
 563     int const maxOutputSize,
 564     int maxNbAttempts,
 565     const limitedOutput_directive limit,
 566     const dictCtx_directive dict
 567 ) {
 568     const int inputSize = *srcSizePtr;
 569     const int patternAnalysis = (maxNbAttempts > 128);   /* levels 9+ */
 570
 571     const BYTE *ip = (const BYTE *) src;
 572     const BYTE *anchor = ip;
 573     const BYTE *const iend = ip + inputSize;
 574     const BYTE *const mflimit = iend - MFLIMIT;
 575     const BYTE *const matchlimit = (iend - LASTLITERALS);
 576
 577     BYTE *optr = (BYTE *) dst;
 578     BYTE *op = (BYTE *) dst;
 579     BYTE *oend = op + maxOutputSize;
 580
 581     int   ml0, ml, ml2, ml3;
 582     const BYTE *start0;
 583     const BYTE *ref0;
 584     const BYTE *ref = NULL;
 585     const BYTE *start2 = NULL;
 586     const BYTE *ref2 = NULL;
 587     const BYTE *start3 = NULL;
 588     const BYTE *ref3 = NULL;
 589
 590     /* init */
 591     *srcSizePtr = 0;
 592     if (limit == fillOutput) oend -= LASTLITERALS;                  /* Hack for support LZ4 format restriction */
 593     if (inputSize < LZ4_minLength) goto _last_literals;             /* Input too small, no compression (all literals) */
 594
 595     /* Main Loop */
 596     while (ip <= mflimit) {
 597         ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict);
 598         if (ml < MINMATCH) { ip++; continue; }
 599
 600         /* saved, in case we would skip too much */
 601         start0 = ip;
 602         ref0 = ref;
 603         ml0 = ml;
 604
 605 _Search2:
 606         if (ip + ml <= mflimit) {
 607             ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
 608                                                ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
 609                                                maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
 610         } else {
 611             ml2 = ml;
 612         }
 613
 614         if (ml2 == ml) { /* No better match => encode ML1 */
 615             optr = op;
 616             if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
 617             continue;
 618         }
 619
 620         if (start0 < ip) {   /* first match was skipped at least once */
 621             if (start2 < ip + ml0) {  /* squeezing ML1 between ML0(original ML1) and ML2 */
 622                 ip = start0;
 623                 ref = ref0;
 624                 ml = ml0;  /* restore initial ML1 */
 625             }
 626         }
 627
 628         /* Here, start0==ip */
 629         if ((start2 - ip) < 3) {  /* First Match too small : removed */
 630             ml = ml2;
 631             ip = start2;
 632             ref = ref2;
 633             goto _Search2;
 634         }
 635
 636 _Search3:
 637         /* At this stage, we have :
 638         *  ml2 > ml1, and
 639         *  ip1+3 <= ip2 (usually < ip1+ml1) */
 640         if ((start2 - ip) < OPTIMAL_ML) {
 641             int correction;
 642             int new_ml = ml;
 643             if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
 644             if (ip + new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
 645             correction = new_ml - (int)(start2 - ip);
 646             if (correction > 0) {
 647                 start2 += correction;
 648                 ref2 += correction;
 649                 ml2 -= correction;
 650             }
 651         }
 652         /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
 653
 654         if (start2 + ml2 <= mflimit) {
 655             ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
 656                                                start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
 657                                                maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
 658         } else {
 659             ml3 = ml2;
 660         }
 661
 662         if (ml3 == ml2) {  /* No better match => encode ML1 and ML2 */
 663             /* ip & ref are known; Now for ml */
 664             if (start2 < ip + ml)  ml = (int)(start2 - ip);
 665             /* Now, encode 2 sequences */
 666             optr = op;
 667             if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
 668             ip = start2;
 669             optr = op;
 670             if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) {
 671                 ml  = ml2;
 672                 ref = ref2;
 673                 goto _dest_overflow;
 674             }
 675             continue;
 676         }
 677
 678         if (start3 < ip + ml + 3) { /* Not enough space for match 2 : remove it */
 679             if (start3 >= (ip + ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
 680                 if (start2 < ip + ml) {
 681                     int correction = (int)(ip + ml - start2);
 682                     start2 += correction;
 683                     ref2 += correction;
 684                     ml2 -= correction;
 685                     if (ml2 < MINMATCH) {
 686                         start2 = start3;
 687                         ref2 = ref3;
 688                         ml2 = ml3;
 689                     }
 690                 }
 691
 692                 optr = op;
 693                 if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
 694                 ip  = start3;
 695                 ref = ref3;
 696                 ml  = ml3;
 697
 698                 start0 = start2;
 699                 ref0 = ref2;
 700                 ml0 = ml2;
 701                 goto _Search2;
 702             }
 703
 704             start2 = start3;
 705             ref2 = ref3;
 706             ml2 = ml3;
 707             goto _Search3;
 708         }
 709
 710         /*
 711         * OK, now we have 3 ascending matches;
 712         * let's write the first one ML1.
 713         * ip & ref are known; Now decide ml.
 714         */
 715         if (start2 < ip + ml) {
 716             if ((start2 - ip) < OPTIMAL_ML) {
 717                 int correction;
 718                 if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
 719                 if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
 720                 correction = ml - (int)(start2 - ip);
 721                 if (correction > 0) {
 722                     start2 += correction;
 723                     ref2 += correction;
 724                     ml2 -= correction;
 725                 }
 726             } else {
 727                 ml = (int)(start2 - ip);
 728             }
 729         }
 730         optr = op;
 731         if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
 732
 733         /* ML2 becomes ML1 */
 734         ip = start2;
 735         ref = ref2;
 736         ml = ml2;
 737
 738         /* ML3 becomes ML2 */
 739         start2 = start3;
 740         ref2 = ref3;
 741         ml2 = ml3;
 742
 743         /* let's find a new ML3 */
 744         goto _Search3;
 745     }
 746
 747 _last_literals:
 748     /* Encode Last Literals */
 749     {
 750         size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
 751         size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
 752         size_t const totalSize = 1 + llAdd + lastRunSize;
 753         if (limit == fillOutput) oend += LASTLITERALS;  /* restore correct value */
 754         if (limit && (op + totalSize > oend)) {
 755             if (limit == limitedOutput) return 0;
 756             /* adapt lastRunSize to fill 'dest' */
 757             lastRunSize  = (size_t)(oend - op) - 1 /*token*/;
 758             llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
 759             lastRunSize -= llAdd;
 760         }
 761         DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
 762         ip = anchor + lastRunSize;  /* can be != iend if limit==fillOutput */
 763
 764         if (lastRunSize >= RUN_MASK) {
 765             size_t accumulator = lastRunSize - RUN_MASK;
 766             *op++ = (RUN_MASK << ML_BITS);
 767             for (; accumulator >= 255 ; accumulator -= 255) * op++ = 255;
 768             *op++ = (BYTE) accumulator;
 769         } else {
 770             *op++ = (BYTE)(lastRunSize << ML_BITS);
 771         }
 772         memcpy(op, anchor, lastRunSize);
 773         op += lastRunSize;
 774     }
 775
 776     /* End */
 777     *srcSizePtr = (int)(((const char *)ip) - src);
 778     return (int)(((char *)op) - dst);
 779
 780 _dest_overflow:
 781     if (limit == fillOutput) {
 782         /* Assumption : ip, anchor, ml and ref must be set correctly */
 783         size_t const ll = (size_t)(ip - anchor);
 784         size_t const ll_addbytes = (ll + 240) / 255;
 785         size_t const ll_totalCost = 1 + ll_addbytes + ll;
 786         BYTE *const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
 787         DEBUGLOG(6, "Last sequence overflowing");
 788         op = optr;  /* restore correct out pointer */
 789         if (op + ll_totalCost <= maxLitPos) {
 790             /* ll validated; now adjust match length */
 791             size_t const bytesLeftForMl = (size_t)(maxLitPos - (op + ll_totalCost));
 792             size_t const maxMlSize = MINMATCH + (ML_MASK - 1) + (bytesLeftForMl * 255);
 793             assert(maxMlSize < INT_MAX);
 794             assert(ml >= 0);
 795             if ((size_t)ml > maxMlSize) ml = (int)maxMlSize;
 796             if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) {
 797                 LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend);
 798             }
 799         }
 800         goto _last_literals;
 801     }
 802     /* compression failed */
 803     return 0;
 804 }
 805
 806
 807 static int LZ4HC_compress_optimal(LZ4HC_CCtx_internal *ctx,
 808                                   const char *const src, char *dst,
 809                                   int *srcSizePtr, int dstCapacity,
 810                                   int const nbSearches, size_t sufficient_len,
 811                                   const limitedOutput_directive limit, int const fullUpdate,
 812                                   const dictCtx_directive dict,
 813                                   HCfavor_e favorDecSpeed);
 814
 815
 816 LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal(
 817     LZ4HC_CCtx_internal *const ctx,
 818     const char *const src,
 819     char *const dst,
 820     int *const srcSizePtr,
 821     int const dstCapacity,
 822     int cLevel,
 823     const limitedOutput_directive limit,
 824     const dictCtx_directive dict
 825 ) {
 826     typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
 827     typedef struct {
 828         lz4hc_strat_e strat;
 829         int nbSearches;
 830         U32 targetLength;
 831     } cParams_t;
 832     static const cParams_t clTable[LZ4HC_CLEVEL_MAX + 1] = {
 833         { lz4hc,     2, 16 },  /* 0, unused */
 834         { lz4hc,     2, 16 },  /* 1, unused */
 835         { lz4hc,     2, 16 },  /* 2, unused */
 836         { lz4hc,     4, 16 },  /* 3 */
 837         { lz4hc,     8, 16 },  /* 4 */
 838         { lz4hc,    16, 16 },  /* 5 */
 839         { lz4hc,    32, 16 },  /* 6 */
 840         { lz4hc,    64, 16 },  /* 7 */
 841         { lz4hc,   128, 16 },  /* 8 */
 842         { lz4hc,   256, 16 },  /* 9 */
 843         { lz4opt,   96, 64 },  /*10==LZ4HC_CLEVEL_OPT_MIN*/
 844         { lz4opt,  512, 128 }, /*11 */
 845         { lz4opt, 16384, LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
 846     };
 847
 848     DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
 849              ctx, src, *srcSizePtr, limit);
 850
 851     if (limit == fillOutput && dstCapacity < 1) return 0;   /* Impossible to store anything */
 852     if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;    /* Unsupported input size (too large or negative) */
 853
 854     ctx->end += *srcSizePtr;
 855     if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT;   /* note : convention is different from lz4frame, maybe something to review */
 856     cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
 857     {
 858         cParams_t const cParam = clTable[cLevel];
 859         HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio;
 860         int result;
 861
 862         if (cParam.strat == lz4hc) {
 863             result = LZ4HC_compress_hashChain(ctx,
 864                                               src, dst, srcSizePtr, dstCapacity,
 865                                               cParam.nbSearches, limit, dict);
 866         } else {
 867             assert(cParam.strat == lz4opt);
 868             result = LZ4HC_compress_optimal(ctx,
 869                                             src, dst, srcSizePtr, dstCapacity,
 870                                             cParam.nbSearches, cParam.targetLength, limit,
 871                                             cLevel == LZ4HC_CLEVEL_MAX,   /* ultra mode */
 872                                             dict, favor);
 873         }
 874         if (result <= 0) ctx->dirty = 1;
 875         return result;
 876     }
 877 }
 878
 879 static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal *ctxPtr, const BYTE *newBlock);
 880
 881 static int
 882 LZ4HC_compress_generic_noDictCtx(
 883     LZ4HC_CCtx_internal *const ctx,
 884     const char *const src,
 885     char *const dst,
 886     int *const srcSizePtr,
 887     int const dstCapacity,
 888     int cLevel,
 889     limitedOutput_directive limit
 890 ) {
 891     assert(ctx->dictCtx == NULL);
 892     return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx);
 893 }
 894
 895 static int
 896 LZ4HC_compress_generic_dictCtx(
 897     LZ4HC_CCtx_internal *const ctx,
 898     const char *const src,
 899     char *const dst,
 900     int *const srcSizePtr,
 901     int const dstCapacity,
 902     int cLevel,
 903     limitedOutput_directive limit
 904 ) {
 905     const size_t position = (size_t)(ctx->end - ctx->base) - ctx->lowLimit;
 906     assert(ctx->dictCtx != NULL);
 907     if (position >= 64 KB) {
 908         ctx->dictCtx = NULL;
 909         return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
 910     } else if (position == 0 && *srcSizePtr > 4 KB) {
 911         memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal));
 912         LZ4HC_setExternalDict(ctx, (const BYTE *)src);
 913         ctx->compressionLevel = (short)cLevel;
 914         return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
 915     } else {
 916         return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc);
 917     }
 918 }
 919
 920 static int
 921 LZ4HC_compress_generic(
 922     LZ4HC_CCtx_internal *const ctx,
 923     const char *const src,
 924     char *const dst,
 925     int *const srcSizePtr,
 926     int const dstCapacity,
 927     int cLevel,
 928     limitedOutput_directive limit
 929 ) {
 930     if (ctx->dictCtx == NULL) {
 931         return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
 932     } else {
 933         return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
 934     }
 935 }
 936
 937
 938 int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); }
 939
 940 #ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
 941                    * it reports an aligment of 8-bytes,
 942                    * while actually aligning LZ4_streamHC_t on 4 bytes. */
 943 static size_t LZ4_streamHC_t_alignment(void) {
 944     typedef struct { char c; LZ4_streamHC_t t; } t_a;
 945     return sizeof(t_a) - sizeof(LZ4_streamHC_t);
 946 }
 947 #endif
 948
 949 /* state is presumed correctly initialized,
 950  * in which case its size and alignment have already been validate */
 951 int LZ4_compress_HC_extStateHC_fastReset(void *state, const char *src, char *dst, int srcSize, int dstCapacity, int compressionLevel) {
 952     LZ4HC_CCtx_internal *const ctx = &((LZ4_streamHC_t *)state)->internal_donotuse;
 953 #ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
 954                    * it reports an aligment of 8-bytes,
 955                    * while actually aligning LZ4_streamHC_t on 4 bytes. */
 956     assert(((size_t)state & (LZ4_streamHC_t_alignment() - 1)) == 0);  /* check alignment */
 957 #endif
 958     if (((size_t)(state) & (sizeof(void *) -1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */
 959     LZ4_resetStreamHC_fast((LZ4_streamHC_t *)state, compressionLevel);
 960     LZ4HC_init_internal(ctx, (const BYTE *)src);
 961     if (dstCapacity < LZ4_compressBound(srcSize))
 962         return LZ4HC_compress_generic(ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput);
 963     else
 964         return LZ4HC_compress_generic(ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited);
 965 }
 966
 967 int LZ4_compress_HC_extStateHC(void *state, const char *src, char *dst, int srcSize, int dstCapacity, int compressionLevel) {
 968     LZ4_streamHC_t *const ctx = LZ4_initStreamHC(state, sizeof(*ctx));
 969     if (ctx == NULL) return 0; /* init failure */
 970     return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel);
 971 }
 972
 973 int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity, int compressionLevel) {
 974 #if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
 975     LZ4_streamHC_t *const statePtr = (LZ4_streamHC_t *)ALLOC(sizeof(LZ4_streamHC_t));
 976 #else
 977     LZ4_streamHC_t state;
 978     LZ4_streamHC_t *const statePtr = &state;
 979 #endif
 980     int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel);
 981 #if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
 982     FREEMEM(statePtr);
 983 #endif
 984     return cSize;
 985 }
 986
 987 /* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */
 988 int LZ4_compress_HC_destSize(void *state, const char *src, char *dst, int *sourceSizePtr, int targetDestSize, int cLevel) {
 989     LZ4_streamHC_t *const ctx = LZ4_initStreamHC(state, sizeof(*ctx));
 990     if (ctx == NULL) return 0; /* init failure */
 991     LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE *) src);
 992     LZ4_setCompressionLevel(ctx, cLevel);
 993     return LZ4HC_compress_generic(&ctx->internal_donotuse, src, dst, sourceSizePtr, targetDestSize, cLevel, fillOutput);
 994 }
 995
 996
 997
 998 /**************************************
 999 *  Streaming Functions
1000 **************************************/
1001 /* allocation */
1002 LZ4_streamHC_t *LZ4_createStreamHC(void) {
1003     LZ4_streamHC_t *const state = (LZ4_streamHC_t *)ALLOC(sizeof(LZ4_streamHC_t));
1004     if (LZ4_initStreamHC(state, sizeof(*state)) == NULL) {
1005         free(state);
1006         return NULL;
1007     }
1008     return state;
1009 }
1010
1011 int LZ4_freeStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr) {
1012     DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr);
1013     if (!LZ4_streamHCPtr) return 0;  /* support free on NULL */
1014     FREEMEM(LZ4_streamHCPtr);
1015     return 0;
1016 }
1017
1018 // Skip AddressSanitizer which breaks compilation strangely on
1019 // lz4/lz4hc.c: error: writing 2 bytes into a region of size 1 [-Werror=stringop-overflow=]
1020 //  |     LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = 0;
1021 ATTRIBUTE_NO_SANITIZE_ADDRESS
1022 LZ4_streamHC_t *LZ4_initStreamHC(void *buffer, size_t size) {
1023     LZ4_streamHC_t *const LZ4_streamHCPtr = (LZ4_streamHC_t *)buffer;
1024     if (buffer == NULL) return NULL;
1025     if (size < sizeof(LZ4_streamHC_t)) return NULL;
1026 #ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
1027                    * it reports an aligment of 8-bytes,
1028                    * while actually aligning LZ4_streamHC_t on 4 bytes. */
1029     if (((size_t)buffer) & (LZ4_streamHC_t_alignment() - 1)) return NULL;  /* alignment check */
1030 #endif
1031     /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
1032     LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE);
1033     DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", LZ4_streamHCPtr, (unsigned)size);
1034     /* end-base will trigger a clearTable on starting compression */
1035     LZ4_streamHCPtr->internal_donotuse.end = (const BYTE *)(ptrdiff_t) -1;
1036     LZ4_streamHCPtr->internal_donotuse.base = NULL;
1037     LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
1038     LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = 0;
1039     LZ4_streamHCPtr->internal_donotuse.dirty = 0;
1040     LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT);
1041     return LZ4_streamHCPtr;
1042 }
1043
1044 /* just a stub */
1045 void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel) {
1046     LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
1047     LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
1048 }
1049
1050 void LZ4_resetStreamHC_fast(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel) {
1051     DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel);
1052     if (LZ4_streamHCPtr->internal_donotuse.dirty) {
1053         LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
1054     } else {
1055         /* preserve end - base : can trigger clearTable's threshold */
1056         LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.base;
1057         LZ4_streamHCPtr->internal_donotuse.base = NULL;
1058         LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
1059     }
1060     LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
1061 }
1062
1063 void LZ4_setCompressionLevel(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel) {
1064     DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel);
1065     if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT;
1066     if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;
1067     LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel;
1068 }
1069
1070 void LZ4_favorDecompressionSpeed(LZ4_streamHC_t *LZ4_streamHCPtr, int favor) {
1071     LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor != 0);
1072 }
1073
1074 /* LZ4_loadDictHC() :
1075  * LZ4_streamHCPtr is presumed properly initialized */
1076 int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr,
1077                    const char *dictionary, int dictSize) {
1078     LZ4HC_CCtx_internal *const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
1079     DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize);
1080     assert(LZ4_streamHCPtr != NULL);
1081     if (dictSize > 64 KB) {
1082         dictionary += (size_t)dictSize - 64 KB;
1083         dictSize = 64 KB;
1084     }
1085     /* need a full initialization, there are bad side-effects when using resetFast() */
1086     {
1087         int const cLevel = ctxPtr->compressionLevel;
1088         LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
1089         LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel);
1090     }
1091     LZ4HC_init_internal(ctxPtr, (const BYTE *)dictionary);
1092     ctxPtr->end = (const BYTE *)dictionary + dictSize;
1093     if (dictSize >= 4) LZ4HC_Insert(ctxPtr, ctxPtr->end - 3);
1094     return dictSize;
1095 }
1096
1097 void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) {
1098     working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL;
1099 }
1100
1101 /* compression */
1102
1103 static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal *ctxPtr, const BYTE *newBlock) {
1104     DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock);
1105     if (ctxPtr->end >= ctxPtr->base + ctxPtr->dictLimit + 4)
1106         LZ4HC_Insert(ctxPtr, ctxPtr->end - 3);  /* Referencing remaining dictionary content */
1107
1108     /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
1109     ctxPtr->lowLimit  = ctxPtr->dictLimit;
1110     ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
1111     ctxPtr->dictBase  = ctxPtr->base;
1112     ctxPtr->base = newBlock - ctxPtr->dictLimit;
1113     ctxPtr->end  = newBlock;
1114     ctxPtr->nextToUpdate = ctxPtr->dictLimit;   /* match referencing will resume from there */
1115
1116     /* cannot reference an extDict and a dictCtx at the same time */
1117     ctxPtr->dictCtx = NULL;
1118 }
1119
1120 static int LZ4_compressHC_continue_generic(LZ4_streamHC_t *LZ4_streamHCPtr,
1121                                            const char *src, char *dst,
1122                                            int *srcSizePtr, int dstCapacity,
1123                                            limitedOutput_directive limit) {
1124     LZ4HC_CCtx_internal *const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
1125     DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
1126              LZ4_streamHCPtr, src, *srcSizePtr, limit);
1127     assert(ctxPtr != NULL);
1128     /* auto-init if forgotten */
1129     if (ctxPtr->base == NULL) LZ4HC_init_internal(ctxPtr, (const BYTE *) src);
1130
1131     /* Check overflow */
1132     if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) {
1133         size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
1134         if (dictSize > 64 KB) dictSize = 64 KB;
1135         LZ4_loadDictHC(LZ4_streamHCPtr, (const char *)(ctxPtr->end) - dictSize, (int)dictSize);
1136     }
1137
1138     /* Check if blocks follow each other */
1139     if ((const BYTE *)src != ctxPtr->end)
1140         LZ4HC_setExternalDict(ctxPtr, (const BYTE *)src);
1141
1142     /* Check overlapping input/dictionary space */
1143     {
1144         const BYTE *sourceEnd = (const BYTE *) src + *srcSizePtr;
1145         const BYTE *const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
1146         const BYTE *const dictEnd   = ctxPtr->dictBase + ctxPtr->dictLimit;
1147         if ((sourceEnd > dictBegin) && ((const BYTE *)src < dictEnd)) {
1148             if (sourceEnd > dictEnd) sourceEnd = dictEnd;
1149             ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
1150             if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
1151         }
1152     }
1153
1154     return LZ4HC_compress_generic(ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit);
1155 }
1156
1157 int LZ4_compress_HC_continue(LZ4_streamHC_t *LZ4_streamHCPtr, const char *src, char *dst, int srcSize, int dstCapacity) {
1158     if (dstCapacity < LZ4_compressBound(srcSize))
1159         return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput);
1160     else
1161         return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited);
1162 }
1163
1164 int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t *LZ4_streamHCPtr, const char *src, char *dst, int *srcSizePtr, int targetDestSize) {
1165     return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput);
1166 }
1167
1168
1169
1170 /* dictionary saving */
1171
1172 int LZ4_saveDictHC(LZ4_streamHC_t *LZ4_streamHCPtr, char *safeBuffer, int dictSize) {
1173     LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
1174     int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
1175     DEBUGLOG(4, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize);
1176     if (dictSize > 64 KB) dictSize = 64 KB;
1177     if (dictSize < 4) dictSize = 0;
1178     if (dictSize > prefixSize) dictSize = prefixSize;
1179     memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
1180     {
1181         U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
1182         streamPtr->end = (const BYTE *)safeBuffer + dictSize;
1183         streamPtr->base = streamPtr->end - endIndex;
1184         streamPtr->dictLimit = endIndex - (U32)dictSize;
1185         streamPtr->lowLimit = endIndex - (U32)dictSize;
1186         if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit;
1187     }
1188     return dictSize;
1189 }
1190
1191
1192 /***************************************************
1193 *  Deprecated Functions
1194 ***************************************************/
1195
1196 /* These functions currently generate deprecation warnings */
1197
1198 /* Wrappers for deprecated compression functions */
1199 int LZ4_compressHC(const char *src, char *dst, int srcSize) { return LZ4_compress_HC(src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
1200 int LZ4_compressHC_limitedOutput(const char *src, char *dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
1201 int LZ4_compressHC2(const char *src, char *dst, int srcSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
1202 int LZ4_compressHC2_limitedOutput(const char *src, char *dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
1203 int LZ4_compressHC_withStateHC(void *state, const char *src, char *dst, int srcSize) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
1204 int LZ4_compressHC_limitedOutput_withStateHC(void *state, const char *src, char *dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, 0); }
1205 int LZ4_compressHC2_withStateHC(void *state, const char *src, char *dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
1206 int LZ4_compressHC2_limitedOutput_withStateHC(void *state, const char *src, char *dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
1207 int LZ4_compressHC_continue(LZ4_streamHC_t *ctx, const char *src, char *dst, int srcSize) { return LZ4_compress_HC_continue(ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
1208 int LZ4_compressHC_limitedOutput_continue(LZ4_streamHC_t *ctx, const char *src, char *dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue(ctx, src, dst, srcSize, maxDstSize); }
1209
1210
1211 /* Deprecated streaming functions */
1212 int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }
1213
1214 /* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t)
1215  * @return : 0 on success, !=0 if error */
1216 int LZ4_resetStreamStateHC(void *state, char *inputBuffer) {
1217     LZ4_streamHC_t *const hc4 = LZ4_initStreamHC(state, sizeof(*hc4));
1218     if (hc4 == NULL) return 1;   /* init failed */
1219     LZ4HC_init_internal(&hc4->internal_donotuse, (const BYTE *)inputBuffer);
1220     return 0;
1221 }
1222
1223 void *LZ4_createHC(const char *inputBuffer) {
1224     LZ4_streamHC_t *const hc4 = LZ4_createStreamHC();
1225     if (hc4 == NULL) return NULL;   /* not enough memory */
1226     LZ4HC_init_internal(&hc4->internal_donotuse, (const BYTE *)inputBuffer);
1227     return hc4;
1228 }
1229
1230 int LZ4_freeHC(void *LZ4HC_Data) {
1231     if (!LZ4HC_Data) return 0;  /* support free on NULL */
1232     FREEMEM(LZ4HC_Data);
1233     return 0;
1234 }
1235
1236 int LZ4_compressHC2_continue(void *LZ4HC_Data, const char *src, char *dst, int srcSize, int cLevel) {
1237     return LZ4HC_compress_generic(&((LZ4_streamHC_t *)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited);
1238 }
1239
1240 int LZ4_compressHC2_limitedOutput_continue(void *LZ4HC_Data, const char *src, char *dst, int srcSize, int dstCapacity, int cLevel) {
1241     return LZ4HC_compress_generic(&((LZ4_streamHC_t *)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
1242 }
1243
1244 char *LZ4_slideInputBufferHC(void *LZ4HC_Data) {
1245     LZ4_streamHC_t *ctx = (LZ4_streamHC_t *)LZ4HC_Data;
1246     const BYTE *bufferStart = ctx->internal_donotuse.base + ctx->internal_donotuse.lowLimit;
1247     LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel);
1248     /* avoid const char * -> char * conversion warning :( */
1249     return (char *)(uptrval)bufferStart;
1250 }
1251
1252
1253 /* ================================================
1254  *  LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX])
1255  * ===============================================*/
1256 typedef struct {
1257     int price;
1258     int off;
1259     int mlen;
1260     int litlen;
1261 } LZ4HC_optimal_t;
1262
1263 /* price in bytes */
1264 LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen) {
1265     int price = litlen;
1266     assert(litlen >= 0);
1267     if (litlen >= (int)RUN_MASK)
1268         price += 1 + ((litlen - (int)RUN_MASK) / 255);
1269     return price;
1270 }
1271
1272
1273 /* requires mlen >= MINMATCH */
1274 LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) {
1275     int price = 1 + 2 ; /* token + 16-bit offset */
1276     assert(litlen >= 0);
1277     assert(mlen >= MINMATCH);
1278
1279     price += LZ4HC_literalsPrice(litlen);
1280
1281     if (mlen >= (int)(ML_MASK + MINMATCH))
1282         price += 1 + ((mlen - (int)(ML_MASK + MINMATCH)) / 255);
1283
1284     return price;
1285 }
1286
1287
1288 typedef struct {
1289     int off;
1290     int len;
1291 } LZ4HC_match_t;
1292
1293 LZ4_FORCE_INLINE LZ4HC_match_t
1294 LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal *const ctx,
1295                       const BYTE *ip, const BYTE *const iHighLimit,
1296                       int minLen, int nbSearches,
1297                       const dictCtx_directive dict,
1298                       const HCfavor_e favorDecSpeed) {
1299     LZ4HC_match_t match = { 0, 0 };
1300     const BYTE *matchPtr = NULL;
1301     /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
1302      * but this won't be the case here, as we define iLowLimit==ip,
1303      * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
1304     int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed);
1305     if (matchLength <= minLen) return match;
1306     if (favorDecSpeed) {
1307         if ((matchLength > 18) & (matchLength <= 36)) matchLength = 18; /* favor shortcut */
1308     }
1309     match.len = matchLength;
1310     match.off = (int)(ip - matchPtr);
1311     return match;
1312 }
1313
1314
1315 static int LZ4HC_compress_optimal(LZ4HC_CCtx_internal *ctx,
1316                                   const char *const src,
1317                                   char *dst,
1318                                   int *srcSizePtr,
1319                                   int dstCapacity,
1320                                   int const nbSearches,
1321                                   size_t sufficient_len,
1322                                   const limitedOutput_directive limit,
1323                                   int const fullUpdate,
1324                                   const dictCtx_directive dict,
1325                                   const HCfavor_e favorDecSpeed) {
1326     int retval = 0;
1327 #define TRAILING_LITERALS 3
1328 #ifdef LZ4HC_HEAPMODE
1329     LZ4HC_optimal_t *const opt = (LZ4HC_optimal_t *)malloc(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS));
1330 #else
1331     LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS];   /* ~64 KB, which is a bit large for stack... */
1332 #endif
1333
1334     const BYTE *ip = (const BYTE *) src;
1335     const BYTE *anchor = ip;
1336     const BYTE *const iend = ip + *srcSizePtr;
1337     const BYTE *const mflimit = iend - MFLIMIT;
1338     const BYTE *const matchlimit = iend - LASTLITERALS;
1339     BYTE *op = (BYTE *) dst;
1340     BYTE *opSaved = (BYTE *) dst;
1341     BYTE *oend = op + dstCapacity;
1342     int ovml = MINMATCH;  /* overflow - last sequence */
1343     const BYTE *ovref = NULL;
1344
1345     /* init */
1346 #ifdef LZ4HC_HEAPMODE
1347     if (opt == NULL) goto _return_label;
1348 #endif
1349     DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity);
1350     *srcSizePtr = 0;
1351     if (limit == fillOutput) oend -= LASTLITERALS;   /* Hack for support LZ4 format restriction */
1352     if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM - 1;
1353
1354     /* Main Loop */
1355     while (ip <= mflimit) {
1356         int const llen = (int)(ip - anchor);
1357         int best_mlen, best_off;
1358         int cur, last_match_pos = 0;
1359
1360         LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH - 1, nbSearches, dict, favorDecSpeed);
1361         if (firstMatch.len == 0) { ip++; continue; }
1362
1363         if ((size_t)firstMatch.len > sufficient_len) {
1364             /* good enough solution : immediate encoding */
1365             int const firstML = firstMatch.len;
1366             const BYTE *const matchPos = ip - firstMatch.off;
1367             opSaved = op;
1368             if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend)) {    /* updates ip, op and anchor */
1369                 ovml = firstML;
1370                 ovref = matchPos;
1371                 goto _dest_overflow;
1372             }
1373             continue;
1374         }
1375
1376         /* set prices for first positions (literals) */
1377         {
1378             int rPos;
1379             for (rPos = 0 ; rPos < MINMATCH ; rPos++) {
1380                 int const cost = LZ4HC_literalsPrice(llen + rPos);
1381                 opt[rPos].mlen = 1;
1382                 opt[rPos].off = 0;
1383                 opt[rPos].litlen = llen + rPos;
1384                 opt[rPos].price = cost;
1385                 DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
1386                          rPos, cost, opt[rPos].litlen);
1387             }
1388         }
1389         /* set prices using initial match */
1390         {
1391             int mlen = MINMATCH;
1392             int const matchML = firstMatch.len;   /* necessarily < sufficient_len < LZ4_OPT_NUM */
1393             int const offset = firstMatch.off;
1394             assert(matchML < LZ4_OPT_NUM);
1395             for (; mlen <= matchML ; mlen++) {
1396                 int const cost = LZ4HC_sequencePrice(llen, mlen);
1397                 opt[mlen].mlen = mlen;
1398                 opt[mlen].off = offset;
1399                 opt[mlen].litlen = llen;
1400                 opt[mlen].price = cost;
1401                 DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup",
1402                          mlen, cost, mlen);
1403             }
1404         }
1405         last_match_pos = firstMatch.len;
1406         {
1407             int addLit;
1408             for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
1409                 opt[last_match_pos + addLit].mlen = 1; /* literal */
1410                 opt[last_match_pos + addLit].off = 0;
1411                 opt[last_match_pos + addLit].litlen = addLit;
1412                 opt[last_match_pos + addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
1413                 DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
1414                          last_match_pos + addLit, opt[last_match_pos + addLit].price, addLit);
1415             }
1416         }
1417
1418         /* check further positions */
1419         for (cur = 1; cur < last_match_pos; cur++) {
1420             const BYTE *const curPtr = ip + cur;
1421             LZ4HC_match_t newMatch;
1422
1423             if (curPtr > mflimit) break;
1424             DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u",
1425                      cur, opt[cur].price, opt[cur + 1].price, cur + 1);
1426             if (fullUpdate) {
1427                 /* not useful to search here if next position has same (or lower) cost */
1428                 if ((opt[cur + 1].price <= opt[cur].price)
1429                         /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */
1430                         && (opt[cur + MINMATCH].price < opt[cur].price + 3/*min seq price*/))
1431                     continue;
1432             } else {
1433                 /* not useful to search here if next position has same (or lower) cost */
1434                 if (opt[cur + 1].price <= opt[cur].price) continue;
1435             }
1436
1437             DEBUGLOG(7, "search at rPos:%u", cur);
1438             if (fullUpdate)
1439                 newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH - 1, nbSearches, dict, favorDecSpeed);
1440             else
1441                 /* only test matches of minimum length; slightly faster, but misses a few bytes */
1442                 newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed);
1443             if (!newMatch.len) continue;
1444
1445             if (((size_t)newMatch.len > sufficient_len)
1446                     || (newMatch.len + cur >= LZ4_OPT_NUM)) {
1447                 /* immediate encoding */
1448                 best_mlen = newMatch.len;
1449                 best_off = newMatch.off;
1450                 last_match_pos = cur + 1;
1451                 goto encode;
1452             }
1453
1454             /* before match : set price with literals at beginning */
1455             {
1456                 int const baseLitlen = opt[cur].litlen;
1457                 int litlen;
1458                 for (litlen = 1; litlen < MINMATCH; litlen++) {
1459                     int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen + litlen);
1460                     int const pos = cur + litlen;
1461                     if (price < opt[pos].price) {
1462                         opt[pos].mlen = 1; /* literal */
1463                         opt[pos].off = 0;
1464                         opt[pos].litlen = baseLitlen + litlen;
1465                         opt[pos].price = price;
1466                         DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)",
1467                                  pos, price, opt[pos].litlen);
1468                     }
1469                 }
1470             }
1471
1472             /* set prices using match at position = cur */
1473             {
1474                 int const matchML = newMatch.len;
1475                 int ml = MINMATCH;
1476
1477                 assert(cur + newMatch.len < LZ4_OPT_NUM);
1478                 for (; ml <= matchML ; ml++) {
1479                     int const pos = cur + ml;
1480                     int const offset = newMatch.off;
1481                     int price;
1482                     int ll;
1483                     DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)",
1484                              pos, last_match_pos);
1485                     if (opt[cur].mlen == 1) {
1486                         ll = opt[cur].litlen;
1487                         price = ((cur > ll) ? opt[cur - ll].price : 0)
1488                                 + LZ4HC_sequencePrice(ll, ml);
1489                     } else {
1490                         ll = 0;
1491                         price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
1492                     }
1493
1494                     assert((U32)favorDecSpeed <= 1);
1495                     if (pos > last_match_pos + TRAILING_LITERALS
1496                             || price <= opt[pos].price - (int)favorDecSpeed) {
1497                         DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)",
1498                                  pos, price, ml);
1499                         assert(pos < LZ4_OPT_NUM);
1500                         if ((ml == matchML)   /* last pos of last match */
1501                                 && (last_match_pos < pos))
1502                             last_match_pos = pos;
1503                         opt[pos].mlen = ml;
1504                         opt[pos].off = offset;
1505                         opt[pos].litlen = ll;
1506                         opt[pos].price = price;
1507                     }
1508                 }
1509             }
1510             /* complete following positions with literals */
1511             {
1512                 int addLit;
1513                 for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
1514                     opt[last_match_pos + addLit].mlen = 1; /* literal */
1515                     opt[last_match_pos + addLit].off = 0;
1516                     opt[last_match_pos + addLit].litlen = addLit;
1517                     opt[last_match_pos + addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
1518                     DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos + addLit, opt[last_match_pos + addLit].price, addLit);
1519                 }
1520             }
1521         }  /* for (cur = 1; cur <= last_match_pos; cur++) */
1522
1523         assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS);
1524         best_mlen = opt[last_match_pos].mlen;
1525         best_off = opt[last_match_pos].off;
1526         cur = last_match_pos - best_mlen;
1527
1528 encode: /* cur, last_match_pos, best_mlen, best_off must be set */
1529         assert(cur < LZ4_OPT_NUM);
1530         assert(last_match_pos >= 1);  /* == 1 when only one candidate */
1531         DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos);
1532         {
1533             int candidate_pos = cur;
1534             int selected_matchLength = best_mlen;
1535             int selected_offset = best_off;
1536             while (1) {  /* from end to beginning */
1537                 int const next_matchLength = opt[candidate_pos].mlen;  /* can be 1, means literal */
1538                 int const next_offset = opt[candidate_pos].off;
1539                 DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength);
1540                 opt[candidate_pos].mlen = selected_matchLength;
1541                 opt[candidate_pos].off = selected_offset;
1542                 selected_matchLength = next_matchLength;
1543                 selected_offset = next_offset;
1544                 if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */
1545                 assert(next_matchLength > 0);  /* can be 1, means literal */
1546                 candidate_pos -= next_matchLength;
1547             }
1548         }
1549
1550         /* encode all recorded sequences in order */
1551         {
1552             int rPos = 0;  /* relative position (to ip) */
1553             while (rPos < last_match_pos) {
1554                 int const ml = opt[rPos].mlen;
1555                 int const offset = opt[rPos].off;
1556                 if (ml == 1) { ip++; rPos++; continue; }  /* literal; note: can end up with several literals, in which case, skip them */
1557                 rPos += ml;
1558                 assert(ml >= MINMATCH);
1559                 assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX));
1560                 opSaved = op;
1561                 if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend)) {    /* updates ip, op and anchor */
1562                     ovml = ml;
1563                     ovref = ip - offset;
1564                     goto _dest_overflow;
1565                 }
1566             }
1567         }
1568     }  /* while (ip <= mflimit) */
1569
1570 _last_literals:
1571     /* Encode Last Literals */
1572     {
1573         size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
1574         size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
1575         size_t const totalSize = 1 + llAdd + lastRunSize;
1576         if (limit == fillOutput) oend += LASTLITERALS;  /* restore correct value */
1577         if (limit && (op + totalSize > oend)) {
1578             if (limit == limitedOutput) { /* Check output limit */
1579                 retval = 0;
1580                 goto _return_label;
1581             }
1582             /* adapt lastRunSize to fill 'dst' */
1583             lastRunSize  = (size_t)(oend - op) - 1 /*token*/;
1584             llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
1585             lastRunSize -= llAdd;
1586         }
1587         DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
1588         ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
1589
1590         if (lastRunSize >= RUN_MASK) {
1591             size_t accumulator = lastRunSize - RUN_MASK;
1592             *op++ = (RUN_MASK << ML_BITS);
1593             for (; accumulator >= 255 ; accumulator -= 255) * op++ = 255;
1594             *op++ = (BYTE) accumulator;
1595         } else {
1596             *op++ = (BYTE)(lastRunSize << ML_BITS);
1597         }
1598         memcpy(op, anchor, lastRunSize);
1599         op += lastRunSize;
1600     }
1601
1602     /* End */
1603     *srcSizePtr = (int)(((const char *)ip) - src);
1604     retval = (int)((char *)op - dst);
1605     goto _return_label;
1606
1607 _dest_overflow:
1608     if (limit == fillOutput) {
1609         /* Assumption : ip, anchor, ovml and ovref must be set correctly */
1610         size_t const ll = (size_t)(ip - anchor);
1611         size_t const ll_addbytes = (ll + 240) / 255;
1612         size_t const ll_totalCost = 1 + ll_addbytes + ll;
1613         BYTE *const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
1614         DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend - 1 - opSaved));
1615         op = opSaved;  /* restore correct out pointer */
1616         if (op + ll_totalCost <= maxLitPos) {
1617             /* ll validated; now adjust match length */
1618             size_t const bytesLeftForMl = (size_t)(maxLitPos - (op + ll_totalCost));
1619             size_t const maxMlSize = MINMATCH + (ML_MASK - 1) + (bytesLeftForMl * 255);
1620             assert(maxMlSize < INT_MAX);
1621             assert(ovml >= 0);
1622             if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize;
1623             if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) {
1624                 DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml);
1625                 DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor);
1626                 LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend);
1627                 DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor);
1628             }
1629         }
1630         goto _last_literals;
1631     }
1632 _return_label:
1633 #ifdef LZ4HC_HEAPMODE
1634     free(opt);
1635 #endif
1636     return retval;
1637 }