1 /* lzo1x_9x.c -- implementation of the LZO1X-999 compression algorithm
3 This file is part of the LZO real-time data compression library.
5 Copyright (C) 2008 Markus Franz Xaver Johannes Oberhumer
6 Copyright (C) 2007 Markus Franz Xaver Johannes Oberhumer
7 Copyright (C) 2006 Markus Franz Xaver Johannes Oberhumer
8 Copyright (C) 2005 Markus Franz Xaver Johannes Oberhumer
9 Copyright (C) 2004 Markus Franz Xaver Johannes Oberhumer
10 Copyright (C) 2003 Markus Franz Xaver Johannes Oberhumer
11 Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer
12 Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer
13 Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
14 Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
15 Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
16 Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
17 Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
20 The LZO library is free software; you can redistribute it and/or
21 modify it under the terms of the GNU General Public License as
22 published by the Free Software Foundation; either version 2 of
23 the License, or (at your option) any later version.
25 The LZO library is distributed in the hope that it will be useful,
26 but WITHOUT ANY WARRANTY; without even the implied warranty of
27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 GNU General Public License for more details.
30 You should have received a copy of the GNU General Public License
31 along with the LZO library; see the file COPYING.
32 If not, write to the Free Software Foundation, Inc.,
33 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
35 Markus F.X.J. Oberhumer
36 <markus@oberhumer.com>
37 http://www.oberhumer.com/opensource/lzo/
41 #if !defined(LZO1X) && !defined(LZO1Y) && !defined(LZO1Z)
46 # include "config1x.h"
48 # include "config1y.h"
50 # include "config1z.h"
56 /***********************************************************************
58 ************************************************************************/
60 #define N M4_MAX_OFFSET /* size of ring buffer */
61 #define THRESHOLD 1 /* lower limit for match length */
62 #define F 2048 /* upper limit for match length */
64 #define SWD_BEST_OFF (LZO_MAX3( M2_MAX_LEN, M3_MAX_LEN, M4_MAX_LEN ) + 1)
67 # define LZO_COMPRESS_T lzo1x_999_t
68 # define lzo_swd_t lzo1x_999_swd_t
70 # define LZO_COMPRESS_T lzo1y_999_t
71 # define lzo_swd_t lzo1y_999_swd_t
72 # define lzo1x_999_compress_internal lzo1y_999_compress_internal
73 # define lzo1x_999_compress_dict lzo1y_999_compress_dict
74 # define lzo1x_999_compress_level lzo1y_999_compress_level
75 # define lzo1x_999_compress lzo1y_999_compress
77 # define LZO_COMPRESS_T lzo1z_999_t
78 # define lzo_swd_t lzo1z_999_swd_t
79 # define lzo1x_999_compress_internal lzo1z_999_compress_internal
80 # define lzo1x_999_compress_dict lzo1z_999_compress_dict
81 # define lzo1x_999_compress_level lzo1z_999_compress_level
82 # define lzo1x_999_compress lzo1z_999_compress
89 ((((((lzo_xint)b[p]<<3)^b[p+1])<<3)^b[p+2]) & (SWD_HSIZE-1))
91 #if 0 && defined(LZO_UNALIGNED_OK_4) && defined(LZO_ABI_LITTLE_ENDIAN)
93 (((* (lzo_uint32p) &b[p]) ^ ((* (lzo_uint32p) &b[p])>>10)) & (SWD_HSIZE-1))
96 #include "lzo_mchw.ch"
99 /* this is a public functions, but there is no prototype in a header file */
101 lzo1x_999_compress_internal ( const lzo_bytep in
, lzo_uint in_len
,
102 lzo_bytep out
, lzo_uintp out_len
,
104 const lzo_bytep dict
, lzo_uint dict_len
,
107 lzo_uint good_length
,
109 lzo_uint nice_length
,
114 /***********************************************************************
116 ************************************************************************/
119 code_match ( LZO_COMPRESS_T
*c
, lzo_bytep op
, lzo_uint m_len
, lzo_uint m_off
)
121 lzo_uint x_len
= m_len
;
122 lzo_uint x_off
= m_off
;
124 c
->match_bytes
+= (unsigned long) m_len
;
128 static lzo_uint last_m_len = 0, last_m_off = 0;
129 static lzo_uint prev_m_off[4];
130 static int prev_m_off_ptr = 0;
133 //if (m_len >= 3 && m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET)
134 if (m_len >= 3 && m_len <= M2_MAX_LEN)
136 //if (m_len == last_m_len && m_off == last_m_off)
137 //printf("last_m_len + last_m_off\n");
139 if (m_off == last_m_off)
140 printf("last_m_off\n");
143 for (i = 0; i < 4; i++)
144 if (m_off == prev_m_off[i])
145 printf("prev_m_off %d: %5ld\n",i,(long)m_off);
149 last_m_off = prev_m_off[prev_m_off_ptr] = m_off;
150 prev_m_off_ptr = (prev_m_off_ptr + 1) & 3;
157 assert(m_off
<= M1_MAX_OFFSET
);
158 assert(c
->r1_lit
> 0); assert(c
->r1_lit
< 4);
161 *op
++ = LZO_BYTE(M1_MARKER
| (m_off
>> 6));
162 *op
++ = LZO_BYTE(m_off
<< 2);
164 *op
++ = LZO_BYTE(M1_MARKER
| ((m_off
& 3) << 2));
165 *op
++ = LZO_BYTE(m_off
>> 2);
170 else if (m_len
<= M2_MAX_LEN
&& (m_off
<= M2_MAX_OFFSET
|| m_off
== c
->last_m_off
))
172 else if (m_len
<= M2_MAX_LEN
&& m_off
<= M2_MAX_OFFSET
)
178 *op
++ = LZO_BYTE(((m_len
- 1) << 5) | ((m_off
& 7) << 2));
179 *op
++ = LZO_BYTE(m_off
>> 3);
180 assert(op
[-2] >= M2_MARKER
);
183 *op
++ = LZO_BYTE(((m_len
+ 1) << 4) | ((m_off
& 3) << 2));
184 *op
++ = LZO_BYTE(m_off
>> 2);
185 assert(op
[-2] >= M2_MARKER
);
187 if (m_off
== c
->last_m_off
)
188 *op
++ = LZO_BYTE(((m_len
- 1) << 5) | (0x700 >> 6));
192 *op
++ = LZO_BYTE(((m_len
- 1) << 5) | (m_off
>> 6));
193 *op
++ = LZO_BYTE(m_off
<< 2);
198 else if (m_len
== M2_MIN_LEN
&& m_off
<= MX_MAX_OFFSET
&& c
->r1_lit
>= 4)
201 assert(m_off
> M2_MAX_OFFSET
);
202 m_off
-= 1 + M2_MAX_OFFSET
;
204 *op
++ = LZO_BYTE(M1_MARKER
| (m_off
>> 6));
205 *op
++ = LZO_BYTE(m_off
<< 2);
207 *op
++ = LZO_BYTE(M1_MARKER
| ((m_off
& 3) << 2));
208 *op
++ = LZO_BYTE(m_off
>> 2);
212 else if (m_off
<= M3_MAX_OFFSET
)
216 if (m_len
<= M3_MAX_LEN
)
217 *op
++ = LZO_BYTE(M3_MARKER
| (m_len
- 2));
221 *op
++ = M3_MARKER
| 0;
228 *op
++ = LZO_BYTE(m_len
);
231 *op
++ = LZO_BYTE(m_off
>> 6);
232 *op
++ = LZO_BYTE(m_off
<< 2);
234 *op
++ = LZO_BYTE(m_off
<< 2);
235 *op
++ = LZO_BYTE(m_off
>> 6);
244 assert(m_off
> 0x4000); assert(m_off
<= 0xbfff);
246 k
= (m_off
& 0x4000) >> 11;
247 if (m_len
<= M4_MAX_LEN
)
248 *op
++ = LZO_BYTE(M4_MARKER
| k
| (m_len
- 2));
252 *op
++ = LZO_BYTE(M4_MARKER
| k
| 0);
259 *op
++ = LZO_BYTE(m_len
);
262 *op
++ = LZO_BYTE(m_off
>> 6);
263 *op
++ = LZO_BYTE(m_off
<< 2);
265 *op
++ = LZO_BYTE(m_off
<< 2);
266 *op
++ = LZO_BYTE(m_off
>> 6);
271 c
->last_m_len
= x_len
;
272 c
->last_m_off
= x_off
;
278 STORE_RUN ( LZO_COMPRESS_T
*c
, lzo_bytep op
, const lzo_bytep ii
, lzo_uint t
)
280 c
->lit_bytes
+= (unsigned long) t
;
282 if (op
== c
->out
&& t
<= 238)
284 *op
++ = LZO_BYTE(17 + t
);
289 op
[-1] |= LZO_BYTE(t
);
291 op
[-2] |= LZO_BYTE(t
);
297 *op
++ = LZO_BYTE(t
- 3);
302 lzo_uint tt
= t
- 18;
311 *op
++ = LZO_BYTE(tt
);
314 do *op
++ = *ii
++; while (--t
> 0);
321 code_run ( LZO_COMPRESS_T
*c
, lzo_bytep op
, const lzo_bytep ii
,
322 lzo_uint lit
, lzo_uint m_len
)
327 op
= STORE_RUN(c
,op
,ii
,lit
);
342 /***********************************************************************
344 ************************************************************************/
347 len_of_coded_match ( lzo_uint m_len
, lzo_uint m_off
, lzo_uint lit
)
354 return (m_off
<= M1_MAX_OFFSET
&& lit
> 0 && lit
< 4) ? 2 : -1;
355 if (m_len
<= M2_MAX_LEN
&& m_off
<= M2_MAX_OFFSET
)
357 if (m_len
== M2_MIN_LEN
&& m_off
<= MX_MAX_OFFSET
&& lit
>= 4)
359 if (m_off
<= M3_MAX_OFFSET
)
361 if (m_len
<= M3_MAX_LEN
)
371 if (m_off
<= M4_MAX_OFFSET
)
373 if (m_len
<= M4_MAX_LEN
)
388 min_gain(lzo_uint ahead
, lzo_uint lit1
, lzo_uint lit2
, int l1
, int l2
, int l3
)
390 lzo_int lazy_match_min_gain
= 0;
393 lazy_match_min_gain
+= ahead
;
401 lazy_match_min_gain
+= (lit2
<= 3) ? 0 : 2;
403 lazy_match_min_gain
+= (lit2
<= 18) ? 0 : 1;
405 lazy_match_min_gain
+= (l2
- l1
) * 2;
407 lazy_match_min_gain
-= (ahead
- l3
) * 2;
409 if (lazy_match_min_gain
< 0)
410 lazy_match_min_gain
= 0;
414 if (lazy_match_min_gain
== 0)
415 lazy_match_min_gain
= 1;
418 return lazy_match_min_gain
;
422 /***********************************************************************
424 ************************************************************************/
428 void assert_match( const lzo_swd_p swd
, lzo_uint m_len
, lzo_uint m_off
)
430 const LZO_COMPRESS_T
*c
= swd
->c
;
434 if (m_off
<= (lzo_uint
) (c
->bp
- c
->in
))
436 assert(c
->bp
- m_off
+ m_len
< c
->ip
);
437 assert(lzo_memcmp(c
->bp
, c
->bp
- m_off
, m_len
) == 0);
441 assert(swd
->dict
!= NULL
);
442 d_off
= m_off
- (lzo_uint
) (c
->bp
- c
->in
);
443 assert(d_off
<= swd
->dict_len
);
446 assert(lzo_memcmp(c
->bp
, swd
->dict_end
- d_off
, d_off
) == 0);
447 assert(c
->in
+ m_len
- d_off
< c
->ip
);
448 assert(lzo_memcmp(c
->bp
+ d_off
, c
->in
, m_len
- d_off
) == 0);
452 assert(lzo_memcmp(c
->bp
, swd
->dict_end
- d_off
, m_len
) == 0);
457 # define assert_match(a,b,c) ((void)0)
461 #if defined(SWD_BEST_OFF)
464 better_match ( const lzo_swd_p swd
, lzo_uint
*m_len
, lzo_uint
*m_off
)
467 const LZO_COMPRESS_T
*c
= swd
->c
;
470 if (*m_len
<= M2_MIN_LEN
)
473 if (*m_off
== c
->last_m_off
&& *m_len
<= M2_MAX_LEN
)
476 if (*m_len
>= M2_MIN_LEN
+ 1 && *m_len
<= M2_MAX_LEN
+ 1 &&
477 c
->last_m_off
&& swd
->best_off
[*m_len
-1] == c
->last_m_off
)
480 *m_off
= swd
->best_off
[*m_len
];
486 if (*m_off
<= M2_MAX_OFFSET
)
491 if (*m_off
> M2_MAX_OFFSET
&&
492 *m_len
>= M2_MIN_LEN
+ 1 && *m_len
<= M2_MAX_LEN
+ 1 &&
493 swd
->best_off
[*m_len
-1] && swd
->best_off
[*m_len
-1] <= M2_MAX_OFFSET
)
496 *m_off
= swd
->best_off
[*m_len
];
503 if (*m_off
> M3_MAX_OFFSET
&&
504 *m_len
>= M4_MAX_LEN
+ 1 && *m_len
<= M2_MAX_LEN
+ 2 &&
505 swd
->best_off
[*m_len
-2] && swd
->best_off
[*m_len
-2] <= M2_MAX_OFFSET
)
508 *m_off
= swd
->best_off
[*m_len
];
515 if (*m_off
> M3_MAX_OFFSET
&&
516 *m_len
>= M4_MAX_LEN
+ 1 && *m_len
<= M3_MAX_LEN
+ 1 &&
517 swd
->best_off
[*m_len
-1] && swd
->best_off
[*m_len
-1] <= M3_MAX_OFFSET
)
520 *m_off
= swd
->best_off
[*m_len
];
528 /***********************************************************************
530 ************************************************************************/
533 lzo1x_999_compress_internal ( const lzo_bytep in
, lzo_uint in_len
,
534 lzo_bytep out
, lzo_uintp out_len
,
536 const lzo_bytep dict
, lzo_uint dict_len
,
539 lzo_uint good_length
,
541 lzo_uint nice_length
,
548 lzo_uint m_len
, m_off
;
550 LZO_COMPRESS_T
* const c
= &cc
;
551 lzo_swd_p
const swd
= (lzo_swd_p
) wrkmem
;
556 LZO_COMPILE_TIME_ASSERT(LZO1X_999_MEM_COMPRESS
>= SIZEOF_LZO_SWD_T
)
558 LZO_COMPILE_TIME_ASSERT(LZO1Y_999_MEM_COMPRESS
>= SIZEOF_LZO_SWD_T
)
560 LZO_COMPILE_TIME_ASSERT(LZO1Z_999_MEM_COMPRESS
>= SIZEOF_LZO_SWD_T
)
565 /* setup parameter defaults */
566 /* number of lazy match tries */
569 /* reduce lazy match search if we already have a match with this length */
570 if (good_length
<= 0)
572 /* do not try a lazy match if we already have a match with this length */
575 /* stop searching for longer matches than this one */
576 if (nice_length
<= 0)
578 /* don't search more positions than this */
580 max_chain
= SWD_MAX_CHAIN
;
584 c
->in_end
= in
+ in_len
;
587 c
->m1a_m
= c
->m1b_m
= c
->m2_m
= c
->m3_m
= c
->m4_m
= 0;
588 c
->lit1_r
= c
->lit2_r
= c
->lit3_r
= 0;
591 ii
= c
->ip
; /* point to start of literal run */
593 c
->r1_lit
= c
->r1_m_len
= 0;
595 r
= init_match(c
,swd
,dict
,dict_len
,flags
);
599 swd
->max_chain
= max_chain
;
601 swd
->nice_length
= nice_length
;
603 r
= find_match(c
,swd
,0,0);
612 c
->codesize
= pd(op
, out
);
617 assert(c
->bp
== c
->ip
- c
->look
);
621 assert(ii
+ lit
== c
->bp
);
622 assert(swd
->b_char
== *(c
->bp
));
625 (m_len
== 2 && (m_off
> M1_MAX_OFFSET
|| lit
== 0 || lit
>= 4)) ||
627 /* Do not accept this match for compressed-data compatibility
628 * with LZO v1.01 and before
629 * [ might be a problem for decompress() and optimize() ]
631 (m_len
== 2 && op
== out
) ||
633 (op
== out
&& lit
== 0))
638 else if (m_len
== M2_MIN_LEN
)
640 /* compression ratio improves if we code a literal in some cases */
641 if (m_off
> MX_MAX_OFFSET
&& lit
>= 4)
649 swd
->max_chain
= max_chain
;
650 r
= find_match(c
,swd
,1,0);
656 #if defined(SWD_BEST_OFF)
657 if (swd
->use_best_off
)
658 better_match(swd
,&m_len
,&m_off
);
660 assert_match(swd
,m_len
,m_off
);
664 /* shall we try a lazy match ? */
666 if (try_lazy
<= 0 || m_len
>= max_lazy
)
674 /* yes, try a lazy match */
675 l1
= len_of_coded_match(m_len
,m_off
,lit
);
678 max_ahead
= LZO_MIN((lzo_uint
)try_lazy
, (lzo_uint
)l1
- 1);
680 max_ahead
= LZO_MIN3(try_lazy
, l1
, m_len
- 1);
685 while (ahead
< max_ahead
&& c
->look
> m_len
)
687 lzo_int lazy_match_min_gain
;
689 if (m_len
>= good_length
)
690 swd
->max_chain
= max_chain
>> 2;
692 swd
->max_chain
= max_chain
;
693 r
= find_match(c
,swd
,1,0);
698 assert(ii
+ lit
+ ahead
== c
->bp
);
701 if (m_off
== c
->last_m_off
&& c
->m_off
!= c
->last_m_off
)
702 if (m_len
>= M2_MIN_LEN
&& m_len
<= M2_MAX_LEN
)
705 if (c
->m_len
< m_len
)
708 if (c
->m_len
== m_len
&& c
->m_off
>= m_off
)
711 #if defined(SWD_BEST_OFF)
712 if (swd
->use_best_off
)
713 better_match(swd
,&c
->m_len
,&c
->m_off
);
715 l2
= len_of_coded_match(c
->m_len
,c
->m_off
,lit
+ahead
);
719 if (c
->m_len
== m_len
&& l2
>= l1
)
725 /* compressed-data compatibility [see above] */
726 l3
= (op
== out
) ? -1 : len_of_coded_match(ahead
,m_off
,lit
);
728 l3
= len_of_coded_match(ahead
,m_off
,lit
);
731 lazy_match_min_gain
= min_gain(ahead
,lit
,lit
+ahead
,l1
,l2
,l3
);
732 if (c
->m_len
>= m_len
+ lazy_match_min_gain
)
735 assert_match(swd
,c
->m_len
,c
->m_off
);
739 /* code previous run */
740 op
= code_run(c
,op
,ii
,lit
,ahead
);
742 /* code shortened match */
743 op
= code_match(c
,op
,ahead
,m_off
);
748 assert(ii
+ lit
== c
->bp
);
750 goto lazy_match_done
;
755 assert(ii
+ lit
+ ahead
== c
->bp
);
758 op
= code_run(c
,op
,ii
,lit
,m_len
);
762 op
= code_match(c
,op
,m_len
,m_off
);
763 swd
->max_chain
= max_chain
;
764 r
= find_match(c
,swd
,m_len
,1+ahead
);
771 /* store final run */
773 op
= STORE_RUN(c
,op
,ii
,lit
);
775 #if defined(LZO_EOF_CODE)
776 *op
++ = M4_MARKER
| 1;
781 c
->codesize
= pd(op
, out
);
782 assert(c
->textsize
== in_len
);
784 *out_len
= pd(op
, out
);
786 if (c
->cb
&& c
->cb
->nprogress
)
787 (*c
->cb
->nprogress
)(c
->cb
, c
->textsize
, c
->codesize
, 0);
790 printf("%ld %ld -> %ld %ld: %ld %ld %ld %ld %ld %ld: %ld %ld %ld %ld\n",
791 (long) c
->textsize
, (long) in_len
, (long) c
->codesize
,
792 c
->match_bytes
, c
->m1a_m
, c
->m1b_m
, c
->m2_m
, c
->m3_m
, c
->m4_m
,
793 c
->lit_bytes
, c
->lit1_r
, c
->lit2_r
, c
->lit3_r
, c
->lazy
);
795 assert(c
->lit_bytes
+ c
->match_bytes
== in_len
);
801 /***********************************************************************
803 ************************************************************************/
806 lzo1x_999_compress_level ( const lzo_bytep in
, lzo_uint in_len
,
807 lzo_bytep out
, lzo_uintp out_len
,
809 const lzo_bytep dict
, lzo_uint dict_len
,
811 int compression_level
)
816 lzo_uint good_length
;
818 lzo_uint nice_length
;
822 { 0, 0, 0, 8, 4, 0 }, /* faster compression */
823 { 0, 0, 0, 16, 8, 0 },
824 { 0, 0, 0, 32, 16, 0 },
826 { 1, 4, 4, 16, 16, 0 },
827 { 1, 8, 16, 32, 32, 0 },
828 { 1, 8, 16, 128, 128, 0 },
830 { 2, 8, 32, 128, 256, 0 },
831 { 2, 32, 128, F
, 2048, 1 },
832 { 2, F
, F
, F
, 4096, 1 } /* max. compression */
835 if (compression_level
< 1 || compression_level
> 9)
838 compression_level
-= 1;
839 return lzo1x_999_compress_internal(in
, in_len
, out
, out_len
, wrkmem
,
841 c
[compression_level
].try_lazy
,
842 c
[compression_level
].good_length
,
843 c
[compression_level
].max_lazy
,
845 c
[compression_level
].nice_length
,
849 c
[compression_level
].max_chain
,
850 c
[compression_level
].flags
);
854 /***********************************************************************
856 ************************************************************************/
859 lzo1x_999_compress_dict ( const lzo_bytep in
, lzo_uint in_len
,
860 lzo_bytep out
, lzo_uintp out_len
,
862 const lzo_bytep dict
, lzo_uint dict_len
)
864 return lzo1x_999_compress_level(in
, in_len
, out
, out_len
, wrkmem
,
865 dict
, dict_len
, 0, 8);
869 lzo1x_999_compress ( const lzo_bytep in
, lzo_uint in_len
,
870 lzo_bytep out
, lzo_uintp out_len
,
873 return lzo1x_999_compress_level(in
, in_len
, out
, out_len
, wrkmem
,
874 NULL
, 0, (lzo_callback_p
) 0, 8);