1 /* LzFind.c -- Match finder for LZ algorithms
2 2009-04-22 : Igor Pavlov : Public domain */
9 #define kEmptyHashValue 0
10 #define kMaxValForNormalize ((uint32_t)0xFFFFFFFF)
11 #define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
12 #define kNormalizeMask (~(kNormalizeStepMin - 1))
13 #define kMaxHistorySize ((uint32_t)3 << 30)
15 #define kStartMaxLen 3
17 static void LzInWindow_Free(struct CMatchFinder
*p
, struct ISzAlloc
*alloc
)
21 alloc
->Free(alloc
, p
->bufferBase
);
26 /* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
28 static int LzInWindow_Create(struct CMatchFinder
*p
, uint32_t keepSizeReserv
, struct ISzAlloc
*alloc
)
30 uint32_t blockSize
= p
->keepSizeBefore
+ p
->keepSizeAfter
+ keepSizeReserv
;
33 p
->blockSize
= blockSize
;
36 if (p
->bufferBase
== 0 || p
->blockSize
!= blockSize
)
38 LzInWindow_Free(p
, alloc
);
39 p
->blockSize
= blockSize
;
40 p
->bufferBase
= (uint8_t *)alloc
->Alloc(alloc
, (size_t)blockSize
);
42 return (p
->bufferBase
!= 0);
45 uint8_t *MatchFinder_GetPointerToCurrentPos(struct CMatchFinder
*p
) { return p
->buffer
; }
46 static uint8_t MatchFinder_GetIndexByte(struct CMatchFinder
*p
, int32_t bindex
) { return p
->buffer
[bindex
]; }
48 static uint32_t MatchFinder_GetNumAvailableBytes(struct CMatchFinder
*p
) { return p
->streamPos
- p
->pos
; }
50 void MatchFinder_ReduceOffsets(struct CMatchFinder
*p
, uint32_t subValue
)
52 p
->posLimit
-= subValue
;
54 p
->streamPos
-= subValue
;
57 static void MatchFinder_ReadBlock(struct CMatchFinder
*p
)
59 if (p
->streamEndWasReached
|| p
->result
!= SZ_OK
)
63 uint32_t curSize
= 0xFFFFFFFF - p
->streamPos
;
64 if (curSize
> p
->directInputRem
)
65 curSize
= (uint32_t)p
->directInputRem
;
66 p
->directInputRem
-= curSize
;
67 p
->streamPos
+= curSize
;
68 if (p
->directInputRem
== 0)
69 p
->streamEndWasReached
= 1;
74 uint8_t *dest
= p
->buffer
+ (p
->streamPos
- p
->pos
);
75 size_t size
= (p
->bufferBase
+ p
->blockSize
- dest
);
78 p
->result
= p
->stream
->Read(p
->stream
, dest
, &size
);
79 if (p
->result
!= SZ_OK
)
83 p
->streamEndWasReached
= 1;
86 p
->streamPos
+= (uint32_t)size
;
87 if (p
->streamPos
- p
->pos
> p
->keepSizeAfter
)
92 void MatchFinder_MoveBlock(struct CMatchFinder
*p
)
94 memmove(p
->bufferBase
,
95 p
->buffer
- p
->keepSizeBefore
,
96 (size_t)(p
->streamPos
- p
->pos
+ p
->keepSizeBefore
));
97 p
->buffer
= p
->bufferBase
+ p
->keepSizeBefore
;
100 int MatchFinder_NeedMove(struct CMatchFinder
*p
)
104 /* if (p->streamEndWasReached) return 0; */
105 return ((size_t)(p
->bufferBase
+ p
->blockSize
- p
->buffer
) <= p
->keepSizeAfter
);
108 void MatchFinder_ReadIfRequired(struct CMatchFinder
*p
)
110 if (p
->streamEndWasReached
)
112 if (p
->keepSizeAfter
>= p
->streamPos
- p
->pos
)
113 MatchFinder_ReadBlock(p
);
116 static void MatchFinder_CheckAndMoveAndRead(struct CMatchFinder
*p
)
118 if (MatchFinder_NeedMove(p
))
119 MatchFinder_MoveBlock(p
);
120 MatchFinder_ReadBlock(p
);
123 static void MatchFinder_SetDefaultSettings(struct CMatchFinder
*p
)
131 #define kCrcPoly 0xEDB88320
133 void MatchFinder_Construct(struct CMatchFinder
*p
)
139 MatchFinder_SetDefaultSettings(p
);
141 for (i
= 0; i
< 256; i
++)
145 for (j
= 0; j
< 8; j
++)
146 r
= (r
>> 1) ^ (kCrcPoly
& ~((r
& 1) - 1));
151 static void MatchFinder_FreeThisClassMemory(struct CMatchFinder
*p
, struct ISzAlloc
*alloc
)
153 alloc
->Free(alloc
, p
->hash
);
157 void MatchFinder_Free(struct CMatchFinder
*p
, struct ISzAlloc
*alloc
)
159 MatchFinder_FreeThisClassMemory(p
, alloc
);
160 LzInWindow_Free(p
, alloc
);
163 static CLzRef
* AllocRefs(uint32_t num
, struct ISzAlloc
*alloc
)
165 size_t sizeInuint8_ts
= (size_t)num
* sizeof(CLzRef
);
166 if (sizeInuint8_ts
/ sizeof(CLzRef
) != num
)
168 return (CLzRef
*)alloc
->Alloc(alloc
, sizeInuint8_ts
);
171 int MatchFinder_Create(struct CMatchFinder
*p
, uint32_t historySize
,
172 uint32_t keepAddBufferBefore
, uint32_t matchMaxLen
, uint32_t keepAddBufferAfter
,
173 struct ISzAlloc
*alloc
)
176 if (historySize
> kMaxHistorySize
)
178 MatchFinder_Free(p
, alloc
);
181 sizeReserv
= historySize
>> 1;
182 if (historySize
> ((uint32_t)2 << 30))
183 sizeReserv
= historySize
>> 2;
184 sizeReserv
+= (keepAddBufferBefore
+ matchMaxLen
+ keepAddBufferAfter
) / 2 + (1 << 19);
186 p
->keepSizeBefore
= historySize
+ keepAddBufferBefore
+ 1;
187 p
->keepSizeAfter
= matchMaxLen
+ keepAddBufferAfter
;
188 /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
189 if (LzInWindow_Create(p
, sizeReserv
, alloc
))
191 uint32_t newCyclicBufferSize
= historySize
+ 1;
193 p
->matchMaxLen
= matchMaxLen
;
195 p
->fixedHashSize
= 0;
196 if (p
->numHashBytes
== 2)
200 hs
= historySize
- 1;
206 hs
|= 0xFFFF; /* don't change it! It's required for Deflate */
209 if (p
->numHashBytes
== 3)
217 if (p
->numHashBytes
> 2) p
->fixedHashSize
+= kHash2Size
;
218 if (p
->numHashBytes
> 3) p
->fixedHashSize
+= kHash3Size
;
219 if (p
->numHashBytes
> 4) p
->fixedHashSize
+= kHash4Size
;
220 hs
+= p
->fixedHashSize
;
224 uint32_t prevSize
= p
->hashSizeSum
+ p
->numSons
;
226 p
->historySize
= historySize
;
228 p
->cyclicBufferSize
= newCyclicBufferSize
;
229 p
->numSons
= (p
->btMode
? newCyclicBufferSize
* 2 : newCyclicBufferSize
);
230 newSize
= p
->hashSizeSum
+ p
->numSons
;
231 if (p
->hash
!= 0 && prevSize
== newSize
)
233 MatchFinder_FreeThisClassMemory(p
, alloc
);
234 p
->hash
= AllocRefs(newSize
, alloc
);
237 p
->son
= p
->hash
+ p
->hashSizeSum
;
242 MatchFinder_Free(p
, alloc
);
246 static void MatchFinder_SetLimits(struct CMatchFinder
*p
)
248 uint32_t limit
= kMaxValForNormalize
- p
->pos
;
249 uint32_t limit2
= p
->cyclicBufferSize
- p
->cyclicBufferPos
;
252 limit2
= p
->streamPos
- p
->pos
;
253 if (limit2
<= p
->keepSizeAfter
)
259 limit2
-= p
->keepSizeAfter
;
263 uint32_t lenLimit
= p
->streamPos
- p
->pos
;
264 if (lenLimit
> p
->matchMaxLen
)
265 lenLimit
= p
->matchMaxLen
;
266 p
->lenLimit
= lenLimit
;
268 p
->posLimit
= p
->pos
+ limit
;
271 void MatchFinder_Init(struct CMatchFinder
*p
)
274 for (i
= 0; i
< p
->hashSizeSum
; i
++)
275 p
->hash
[i
] = kEmptyHashValue
;
276 p
->cyclicBufferPos
= 0;
277 p
->buffer
= p
->bufferBase
;
278 p
->pos
= p
->streamPos
= p
->cyclicBufferSize
;
280 p
->streamEndWasReached
= 0;
281 MatchFinder_ReadBlock(p
);
282 MatchFinder_SetLimits(p
);
285 static uint32_t MatchFinder_GetSubValue(struct CMatchFinder
*p
)
287 return (p
->pos
- p
->historySize
- 1) & kNormalizeMask
;
290 void MatchFinder_Normalize3(uint32_t subValue
, CLzRef
*items
, uint32_t numItems
)
293 for (i
= 0; i
< numItems
; i
++)
295 uint32_t value
= items
[i
];
296 if (value
<= subValue
)
297 value
= kEmptyHashValue
;
304 static void MatchFinder_Normalize(struct CMatchFinder
*p
)
306 uint32_t subValue
= MatchFinder_GetSubValue(p
);
307 MatchFinder_Normalize3(subValue
, p
->hash
, p
->hashSizeSum
+ p
->numSons
);
308 MatchFinder_ReduceOffsets(p
, subValue
);
311 static void MatchFinder_CheckLimits(struct CMatchFinder
*p
)
313 if (p
->pos
== kMaxValForNormalize
)
314 MatchFinder_Normalize(p
);
315 if (!p
->streamEndWasReached
&& p
->keepSizeAfter
== p
->streamPos
- p
->pos
)
316 MatchFinder_CheckAndMoveAndRead(p
);
317 if (p
->cyclicBufferPos
== p
->cyclicBufferSize
)
318 p
->cyclicBufferPos
= 0;
319 MatchFinder_SetLimits(p
);
322 static uint32_t * Hc_GetMatchesSpec(uint32_t lenLimit
, uint32_t curMatch
, uint32_t pos
, const uint8_t *cur
, CLzRef
*son
,
323 uint32_t _cyclicBufferPos
, uint32_t _cyclicBufferSize
, uint32_t cutValue
,
324 uint32_t *distances
, uint32_t maxLen
)
326 son
[_cyclicBufferPos
] = curMatch
;
329 uint32_t delta
= pos
- curMatch
;
330 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
333 const uint8_t *pb
= cur
- delta
;
334 curMatch
= son
[_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)];
335 if (pb
[maxLen
] == cur
[maxLen
] && *pb
== *cur
)
338 while (++len
!= lenLimit
)
339 if (pb
[len
] != cur
[len
])
343 *distances
++ = maxLen
= len
;
344 *distances
++ = delta
- 1;
353 uint32_t * GetMatchesSpec1(uint32_t lenLimit
, uint32_t curMatch
, uint32_t pos
, const uint8_t *cur
, CLzRef
*son
,
354 uint32_t _cyclicBufferPos
, uint32_t _cyclicBufferSize
, uint32_t cutValue
,
355 uint32_t *distances
, uint32_t maxLen
)
357 CLzRef
*ptr0
= son
+ (_cyclicBufferPos
<< 1) + 1;
358 CLzRef
*ptr1
= son
+ (_cyclicBufferPos
<< 1);
359 uint32_t len0
= 0, len1
= 0;
362 uint32_t delta
= pos
- curMatch
;
363 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
365 *ptr0
= *ptr1
= kEmptyHashValue
;
369 CLzRef
*pair
= son
+ ((_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)) << 1);
370 const uint8_t *pb
= cur
- delta
;
371 uint32_t len
= (len0
< len1
? len0
: len1
);
372 if (pb
[len
] == cur
[len
])
374 if (++len
!= lenLimit
&& pb
[len
] == cur
[len
])
375 while (++len
!= lenLimit
)
376 if (pb
[len
] != cur
[len
])
380 *distances
++ = maxLen
= len
;
381 *distances
++ = delta
- 1;
390 if (pb
[len
] < cur
[len
])
408 static void SkipMatchesSpec(uint32_t lenLimit
, uint32_t curMatch
, uint32_t pos
, const uint8_t *cur
, CLzRef
*son
,
409 uint32_t _cyclicBufferPos
, uint32_t _cyclicBufferSize
, uint32_t cutValue
)
411 CLzRef
*ptr0
= son
+ (_cyclicBufferPos
<< 1) + 1;
412 CLzRef
*ptr1
= son
+ (_cyclicBufferPos
<< 1);
413 uint32_t len0
= 0, len1
= 0;
416 uint32_t delta
= pos
- curMatch
;
417 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
419 *ptr0
= *ptr1
= kEmptyHashValue
;
423 CLzRef
*pair
= son
+ ((_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)) << 1);
424 const uint8_t *pb
= cur
- delta
;
425 uint32_t len
= (len0
< len1
? len0
: len1
);
426 if (pb
[len
] == cur
[len
])
428 while (++len
!= lenLimit
)
429 if (pb
[len
] != cur
[len
])
440 if (pb
[len
] < cur
[len
])
459 ++p->cyclicBufferPos; \
461 if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
463 #define MOVE_POS_RET MOVE_POS return offset;
465 static void MatchFinder_MovePos(struct CMatchFinder
*p
) { MOVE_POS
; }
467 #define GET_MATCHES_HEADER2(minLen, ret_op) \
468 uint32_t lenLimit; uint32_t hashValue; const uint8_t *cur; uint32_t curMatch; \
469 lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
472 #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
473 #define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
475 #define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
477 #define GET_MATCHES_FOOTER(offset, maxLen) \
478 offset = (uint32_t)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
479 distances + offset, maxLen) - distances); MOVE_POS_RET;
481 #define SKIP_FOOTER \
482 SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
484 static uint32_t Bt2_MatchFinder_GetMatches(struct CMatchFinder
*p
, uint32_t *distances
)
487 GET_MATCHES_HEADER(2)
489 curMatch
= p
->hash
[hashValue
];
490 p
->hash
[hashValue
] = p
->pos
;
492 GET_MATCHES_FOOTER(offset
, 1)
495 uint32_t Bt3Zip_MatchFinder_GetMatches(struct CMatchFinder
*p
, uint32_t *distances
)
498 GET_MATCHES_HEADER(3)
500 curMatch
= p
->hash
[hashValue
];
501 p
->hash
[hashValue
] = p
->pos
;
503 GET_MATCHES_FOOTER(offset
, 2)
506 static uint32_t Bt3_MatchFinder_GetMatches(struct CMatchFinder
*p
, uint32_t *distances
)
508 uint32_t hash2Value
, delta2
, maxLen
, offset
;
509 GET_MATCHES_HEADER(3)
513 delta2
= p
->pos
- p
->hash
[hash2Value
];
514 curMatch
= p
->hash
[kFix3HashSize
+ hashValue
];
516 p
->hash
[hash2Value
] =
517 p
->hash
[kFix3HashSize
+ hashValue
] = p
->pos
;
522 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
524 for (; maxLen
!= lenLimit
; maxLen
++)
525 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
527 distances
[0] = maxLen
;
528 distances
[1] = delta2
- 1;
530 if (maxLen
== lenLimit
)
532 SkipMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
));
536 GET_MATCHES_FOOTER(offset
, maxLen
)
539 static uint32_t Bt4_MatchFinder_GetMatches(struct CMatchFinder
*p
, uint32_t *distances
)
541 uint32_t hash2Value
, hash3Value
, delta2
, delta3
, maxLen
, offset
;
542 GET_MATCHES_HEADER(4)
546 delta2
= p
->pos
- p
->hash
[ hash2Value
];
547 delta3
= p
->pos
- p
->hash
[kFix3HashSize
+ hash3Value
];
548 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
550 p
->hash
[ hash2Value
] =
551 p
->hash
[kFix3HashSize
+ hash3Value
] =
552 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
556 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
558 distances
[0] = maxLen
= 2;
559 distances
[1] = delta2
- 1;
562 if (delta2
!= delta3
&& delta3
< p
->cyclicBufferSize
&& *(cur
- delta3
) == *cur
)
565 distances
[offset
+ 1] = delta3
- 1;
571 for (; maxLen
!= lenLimit
; maxLen
++)
572 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
574 distances
[offset
- 2] = maxLen
;
575 if (maxLen
== lenLimit
)
577 SkipMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
));
583 GET_MATCHES_FOOTER(offset
, maxLen
)
586 static uint32_t Hc4_MatchFinder_GetMatches(struct CMatchFinder
*p
, uint32_t *distances
)
588 uint32_t hash2Value
, hash3Value
, delta2
, delta3
, maxLen
, offset
;
589 GET_MATCHES_HEADER(4)
593 delta2
= p
->pos
- p
->hash
[ hash2Value
];
594 delta3
= p
->pos
- p
->hash
[kFix3HashSize
+ hash3Value
];
595 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
597 p
->hash
[ hash2Value
] =
598 p
->hash
[kFix3HashSize
+ hash3Value
] =
599 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
603 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
605 distances
[0] = maxLen
= 2;
606 distances
[1] = delta2
- 1;
609 if (delta2
!= delta3
&& delta3
< p
->cyclicBufferSize
&& *(cur
- delta3
) == *cur
)
612 distances
[offset
+ 1] = delta3
- 1;
618 for (; maxLen
!= lenLimit
; maxLen
++)
619 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
621 distances
[offset
- 2] = maxLen
;
622 if (maxLen
== lenLimit
)
624 p
->son
[p
->cyclicBufferPos
] = curMatch
;
630 offset
= (uint32_t)(Hc_GetMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
),
631 distances
+ offset
, maxLen
) - (distances
));
635 uint32_t Hc3Zip_MatchFinder_GetMatches(struct CMatchFinder
*p
, uint32_t *distances
)
638 GET_MATCHES_HEADER(3)
640 curMatch
= p
->hash
[hashValue
];
641 p
->hash
[hashValue
] = p
->pos
;
642 offset
= (uint32_t)(Hc_GetMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
),
643 distances
, 2) - (distances
));
647 static void Bt2_MatchFinder_Skip(struct CMatchFinder
*p
, uint32_t num
)
653 curMatch
= p
->hash
[hashValue
];
654 p
->hash
[hashValue
] = p
->pos
;
660 void Bt3Zip_MatchFinder_Skip(struct CMatchFinder
*p
, uint32_t num
)
666 curMatch
= p
->hash
[hashValue
];
667 p
->hash
[hashValue
] = p
->pos
;
673 static void Bt3_MatchFinder_Skip(struct CMatchFinder
*p
, uint32_t num
)
680 curMatch
= p
->hash
[kFix3HashSize
+ hashValue
];
681 p
->hash
[hash2Value
] =
682 p
->hash
[kFix3HashSize
+ hashValue
] = p
->pos
;
688 static void Bt4_MatchFinder_Skip(struct CMatchFinder
*p
, uint32_t num
)
692 uint32_t hash2Value
, hash3Value
;
695 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
696 p
->hash
[ hash2Value
] =
697 p
->hash
[kFix3HashSize
+ hash3Value
] = p
->pos
;
698 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
704 static void Hc4_MatchFinder_Skip(struct CMatchFinder
*p
, uint32_t num
)
708 uint32_t hash2Value
, hash3Value
;
711 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
712 p
->hash
[ hash2Value
] =
713 p
->hash
[kFix3HashSize
+ hash3Value
] =
714 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
715 p
->son
[p
->cyclicBufferPos
] = curMatch
;
721 void Hc3Zip_MatchFinder_Skip(struct CMatchFinder
*p
, uint32_t num
)
727 curMatch
= p
->hash
[hashValue
];
728 p
->hash
[hashValue
] = p
->pos
;
729 p
->son
[p
->cyclicBufferPos
] = curMatch
;
735 void MatchFinder_CreateVTable(struct CMatchFinder
*p
, struct IMatchFinder
*vTable
)
737 vTable
->Init
= (Mf_Init_Func
)MatchFinder_Init
;
738 vTable
->GetIndexByte
= (Mf_GetIndexByte_Func
)MatchFinder_GetIndexByte
;
739 vTable
->GetNumAvailableBytes
= (Mf_GetNumAvailableBytes_Func
)MatchFinder_GetNumAvailableBytes
;
740 vTable
->GetPointerToCurrentPos
= (Mf_GetPointerToCurrentPos_Func
)MatchFinder_GetPointerToCurrentPos
;
743 vTable
->GetMatches
= (Mf_GetMatches_Func
)Hc4_MatchFinder_GetMatches
;
744 vTable
->Skip
= (Mf_Skip_Func
)Hc4_MatchFinder_Skip
;
746 else if (p
->numHashBytes
== 2)
748 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt2_MatchFinder_GetMatches
;
749 vTable
->Skip
= (Mf_Skip_Func
)Bt2_MatchFinder_Skip
;
751 else if (p
->numHashBytes
== 3)
753 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt3_MatchFinder_GetMatches
;
754 vTable
->Skip
= (Mf_Skip_Func
)Bt3_MatchFinder_Skip
;
758 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt4_MatchFinder_GetMatches
;
759 vTable
->Skip
= (Mf_Skip_Func
)Bt4_MatchFinder_Skip
;