1 // Ozunreduce / Old ZIP Unreduce - A single-header-file C/C++ library for
2 // decompressing ("expanding") ZIP "Reduce" (methods 2 through 5) compression
4 // This file, normally named ozunreduce.h, hereinafter referred to as "this
5 // file" or "this software", is an independent software library. It is okay to
6 // distribute this file by itself.
8 // This software was written from scratch by Jason Summers, using public
9 // specifications from the PKZIP/PKWARE APPNOTE.TXT files.
11 // For an overview of how to use this software, see the example code at the
14 // More information might be found at:
15 // * <https://entropymine.com/oldunzip/>
16 // * <https://github.com/jsummers/oldunzip>
19 ================================ TERMS OF USE ================================
20 These terms of use apply only to this file, and not to any other files that
21 might appear alongside it.
23 This software is dual-licensed. Choose the license you prefer:
24 ------------------------------------------------------------------------------
25 Licence option #1: MIT
26 Copyright (C) 2019 Jason Summers
28 Permission is hereby granted, free of charge, to any person obtaining a copy
29 of this software and associated documentation files (the "Software"), to deal
30 in the Software without restriction, including without limitation the rights
31 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
32 copies of the Software, and to permit persons to whom the Software is
33 furnished to do so, subject to the following conditions:
35 The above copyright notice and this permission notice shall be included in
36 all copies or substantial portions of the Software.
38 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
39 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
40 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
41 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
42 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
43 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
45 ------------------------------------------------------------------------------
46 Licence option #2: Public domain
48 This is free and unencumbered software released into the public domain.
50 Anyone is free to copy, modify, publish, use, compile, sell, or
51 distribute this software, either in source code form or as a compiled
52 binary, for any purpose, commercial or non-commercial, and by any
55 In jurisdictions that recognize copyright laws, the author or authors
56 of this software dedicate any and all copyright interest in the
57 software to the public domain. We make this dedication for the benefit
58 of the public at large and to the detriment of our heirs and
59 successors. We intend this dedication to be an overt act of
60 relinquishment in perpetuity of all present and future rights to this
61 software under copyright law.
63 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
64 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
65 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
66 IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
67 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
68 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
69 OTHER DEALINGS IN THE SOFTWARE.
71 For more information, please refer to <http://unlicense.org/>
72 ==============================================================================
75 #define OZUR_VERSION 20191011
78 #define OZUR_UINT8 unsigned char
82 #define OZUR_OFF_T off_t
86 #define OZUR_API(ty) static ty
89 #define OZUR_ERRCODE_OK 0
90 #define OZUR_ERRCODE_GENERIC_ERROR 1
91 #define OZUR_ERRCODE_BAD_CDATA 2
92 #define OZUR_ERRCODE_READ_FAILED 6
93 #define OZUR_ERRCODE_WRITE_FAILED 7
94 #define OZUR_ERRCODE_INSUFFICIENT_CDATA 8
96 struct ozur_follower_item
{
97 OZUR_UINT8 count
; // N - 0<=count<=32
98 OZUR_UINT8 nbits
; // B(N) - Valid if count>0
99 OZUR_UINT8 values
[32]; // S - First 'count' values are valid
102 struct ozur_ctx_type
;
103 typedef struct ozur_ctx_type ozur_ctx
;
104 typedef size_t (*ozur_cb_read_type
)(ozur_ctx
*ozur
, OZUR_UINT8
*buf
, size_t size
);
105 typedef size_t (*ozur_cb_write_type
)(ozur_ctx
*ozur
, const OZUR_UINT8
*buf
, size_t size
);
106 typedef void (*ozur_cb_post_follower_sets_type
)(ozur_ctx
*ozur
);
108 struct ozur_ctx_type
{
109 // Fields the user can or must set:
111 unsigned int cmpr_factor
;
112 OZUR_OFF_T cmpr_size
;
113 OZUR_OFF_T uncmpr_size
;
114 ozur_cb_read_type cb_read
;
115 ozur_cb_write_type cb_write
;
116 ozur_cb_post_follower_sets_type cb_post_follower_sets
; // Optional hook
118 // Fields the user can read:
120 OZUR_OFF_T uncmpr_nbytes_written
;
121 OZUR_OFF_T cmpr_nbytes_consumed
;
123 // Fields private to the library:
124 OZUR_OFF_T cmpr_nbytes_read
; // (Number of bytes read, not necessarily consumed.)
125 OZUR_OFF_T uncmpr_nbytes_emitted
; // (Number of output bytes decoded, not necessarily flushed.)
126 unsigned int bitreader_buf
;
127 unsigned int bitreader_nbits_in_buf
;
129 unsigned int var_Len
;
130 OZUR_UINT8 last_char
;
132 struct ozur_follower_item followers
[256];
134 #define OZUR_CIRCBUF_SIZE 4096 // Must be at least 4096
135 OZUR_UINT8 circbuf
[OZUR_CIRCBUF_SIZE
];
136 size_t inbuf_nbytes_consumed
;
137 size_t inbuf_nbytes_total
;
138 #define OZUR_INBUF_SIZE 1024
139 OZUR_UINT8 inbuf
[OZUR_INBUF_SIZE
];
142 static void ozur_set_error(ozur_ctx
*ozur
, int error_code
)
144 // Only record the first error.
145 if (ozur
->error_code
==0) {
146 ozur
->error_code
= error_code
;
150 static void ozur_refill_inbuf(ozur_ctx
*ozur
)
153 size_t nbytes_to_read
;
155 ozur
->inbuf_nbytes_total
= 0;
156 ozur
->inbuf_nbytes_consumed
= 0;
158 if((ozur
->cmpr_size
- ozur
->cmpr_nbytes_read
) > OZUR_INBUF_SIZE
) {
159 nbytes_to_read
= OZUR_INBUF_SIZE
;
162 nbytes_to_read
= (size_t)(ozur
->cmpr_size
- ozur
->cmpr_nbytes_read
);
164 if(nbytes_to_read
<1 || nbytes_to_read
>OZUR_INBUF_SIZE
) return;
166 ret
= ozur
->cb_read(ozur
, ozur
->inbuf
, nbytes_to_read
);
167 if(ret
!= nbytes_to_read
) {
168 ozur_set_error(ozur
, OZUR_ERRCODE_READ_FAILED
);
171 ozur
->cmpr_nbytes_read
+= (OZUR_OFF_T
)nbytes_to_read
;
172 ozur
->inbuf_nbytes_total
= nbytes_to_read
;
175 static OZUR_UINT8
ozur_nextbyte(ozur_ctx
*ozur
)
179 if(ozur
->error_code
) return 0;
181 if(ozur
->cmpr_nbytes_consumed
>= ozur
->cmpr_size
) {
182 ozur_set_error(ozur
, OZUR_ERRCODE_INSUFFICIENT_CDATA
);
185 // Another byte should be available, somewhere.
186 if(ozur
->inbuf_nbytes_consumed
>= ozur
->inbuf_nbytes_total
) {
187 // No bytes left in inbuf. Refill it.
188 ozur_refill_inbuf(ozur
);
189 if(ozur
->inbuf_nbytes_total
<1) return 0;
192 x
= ozur
->inbuf
[ozur
->inbuf_nbytes_consumed
++];
193 ozur
->cmpr_nbytes_consumed
++;
197 static OZUR_UINT8
ozur_bitreader_getbits(ozur_ctx
*ozur
, unsigned int nbits
)
201 if(nbits
<1 || nbits
>8) return 0;
203 if(ozur
->bitreader_nbits_in_buf
< nbits
) {
206 b
= ozur_nextbyte(ozur
);
207 if(ozur
->error_code
) return 0;
208 ozur
->bitreader_buf
|= ((unsigned int)b
)<<ozur
->bitreader_nbits_in_buf
;
209 ozur
->bitreader_nbits_in_buf
+= 8;
212 n
= (OZUR_UINT8
)(ozur
->bitreader_buf
& (0xff >> (8-nbits
)));
213 ozur
->bitreader_buf
>>= nbits
;
214 ozur
->bitreader_nbits_in_buf
-= nbits
;
218 // "the minimal number of bits required to encode the value of x-1".
219 // Assumes 1 <= x <= 32.
220 static OZUR_UINT8
ozur_func_B(ozur_ctx
*ozur
, OZUR_UINT8 x
)
229 static void ozur_part1_readfollowersets(ozur_ctx
*ozur
)
233 for(k
=255; k
>=0; k
--) {
235 struct ozur_follower_item
*f_i
;
237 f_i
= &ozur
->followers
[k
];
239 f_i
->count
= ozur_bitreader_getbits(ozur
, 6);
240 if(ozur
->error_code
) goto done
;
241 if(f_i
->count
> 32) {
242 ozur_set_error(ozur
, OZUR_ERRCODE_BAD_CDATA
);
247 f_i
->nbits
= ozur_func_B(ozur
, f_i
->count
);
250 for(z
=0; z
<(unsigned int)f_i
->count
; z
++) {
251 f_i
->values
[z
] = ozur_bitreader_getbits(ozur
, 8);
252 if(ozur
->error_code
) goto done
;
259 static OZUR_UINT8
ozur_part1_getnextbyte(ozur_ctx
*ozur
)
261 OZUR_UINT8 outbyte
= 0;
262 struct ozur_follower_item
*f_i
;
264 f_i
= &ozur
->followers
[(unsigned int)ozur
->last_char
];
266 if(f_i
->count
==0) { // Follower set is empty
267 outbyte
= ozur_bitreader_getbits(ozur
, 8);
269 else { // Follower set not empty
272 bitval
= ozur_bitreader_getbits(ozur
, 1);
274 outbyte
= ozur_bitreader_getbits(ozur
, 8);
279 var_I
= (unsigned int)ozur_bitreader_getbits(ozur
, (unsigned int)f_i
->nbits
);
280 outbyte
= f_i
->values
[var_I
];
284 ozur
->last_char
= outbyte
;
288 // Write the bytes in the circular buffer, up to the current position.
289 // Does not change the state of the buffer.
290 // This must only be called just before setting the buffer pos to 0,
291 // or at the end of input.
292 static void ozur_flush(ozur_ctx
*ozur
)
297 if(ozur
->error_code
) return;
298 n
= ozur
->circbuf_pos
;
299 if(n
<1 || n
>OZUR_CIRCBUF_SIZE
) return;
300 ret
= ozur
->cb_write(ozur
, ozur
->circbuf
, n
);
302 ozur_set_error(ozur
, OZUR_ERRCODE_WRITE_FAILED
);
305 ozur
->uncmpr_nbytes_written
+= (OZUR_OFF_T
)ret
;
308 static void ozur_emit_byte(ozur_ctx
*ozur
, OZUR_UINT8 x
)
310 ozur
->circbuf
[ozur
->circbuf_pos
++] = x
;
311 if(ozur
->circbuf_pos
>= OZUR_CIRCBUF_SIZE
) {
313 ozur
->circbuf_pos
= 0;
315 ozur
->uncmpr_nbytes_emitted
++;
318 static void ozur_emit_copy_of_prev_bytes(ozur_ctx
*ozur
,
319 size_t nbytes_to_look_back
, size_t nbytes
)
324 // Maximum possible is (255>>4)*255 + 255 + 1 = 4096
325 if(nbytes_to_look_back
>4096) {
326 ozur_set_error(ozur
, OZUR_ERRCODE_GENERIC_ERROR
);
329 // Maximum possible is 255 + 127 + 3 = 385
330 if(nbytes
>nbytes_to_look_back
) {
331 ozur_set_error(ozur
, OZUR_ERRCODE_BAD_CDATA
);
335 src_pos
= (ozur
->circbuf_pos
+ OZUR_CIRCBUF_SIZE
- nbytes_to_look_back
) %
338 for(i
=0; i
<nbytes
; i
++) {
339 ozur_emit_byte(ozur
, ozur
->circbuf
[src_pos
++]);
340 if(src_pos
>= OZUR_CIRCBUF_SIZE
) {
346 // Process one byte of output from part 1.
347 static void ozur_part2(ozur_ctx
*ozur
, OZUR_UINT8 var_C
)
349 size_t nbytes_to_look_back
;
350 size_t nbytes_to_copy
;
352 switch(ozur
->state
) {
358 ozur_emit_byte(ozur
, var_C
);
365 ozur
->var_Len
= (unsigned int)(ozur
->var_V
& (0xff>>ozur
->cmpr_factor
));
366 ozur
->state
= (ozur
->var_Len
==(unsigned int)(0xff>>ozur
->cmpr_factor
)) ? 2 : 3; // F()
369 ozur_emit_byte(ozur
, 144);
375 ozur
->var_Len
+= (unsigned int)var_C
;
380 nbytes_to_look_back
= (size_t)(ozur
->var_V
>>(8-ozur
->cmpr_factor
)) * 256 +
381 (size_t)var_C
+ 1; // D()
382 nbytes_to_copy
= (size_t)ozur
->var_Len
+ 3;
383 ozur_emit_copy_of_prev_bytes(ozur
, nbytes_to_look_back
, nbytes_to_copy
);
389 OZUR_API(void) ozur_run(ozur_ctx
*ozur
)
391 if(ozur
->cmpr_factor
<1 || ozur
->cmpr_factor
>4 ||
392 !ozur
->cb_read
|| !ozur
->cb_write
)
394 ozur_set_error(ozur
, OZUR_ERRCODE_GENERIC_ERROR
);
398 // Part 1 is undoing the "probabilistic" compression.
399 // It starts with a header, then we'll repeatedly get 1 byte of output from
400 // Part 1, and use it as input to Part 2.
401 ozur_part1_readfollowersets(ozur
);
402 if(ozur
->error_code
) goto done
;
404 if(ozur
->cb_post_follower_sets
) {
405 ozur
->cb_post_follower_sets(ozur
);
411 if(ozur
->error_code
) goto done
;
412 if(ozur
->uncmpr_nbytes_emitted
>= ozur
->uncmpr_size
) break; // Have enough output data
414 outbyte
= ozur_part1_getnextbyte(ozur
);
415 if(ozur
->error_code
) goto done
;
417 // Part 2 is undoing "compress repeated byte sequences" --
418 // apparently a kind of LZ77.
419 ozur_part2(ozur
, outbyte
);
426 #if 0 // Example code
428 #include <stdio.h> // Used by the example code, not by the library
430 // You must at least define the off_t or OZUR_OFF_T symbol, either manually,
431 // or by #including the appopriate standard header file.
433 #include <sys/types.h>
435 // The library is expected to be used by just one of your .c/.cpp files. There
436 // are no provisions to make it convenient to use from multiple files.
437 #include "ozunreduce.h"
441 #ifdef OZUR_TESTING_EXAMPLE_CODE
443 // Define a custom struct for context data used by callbacks
444 struct example_ozur_uctxtype
{
449 // Implement your read and write callbacks
450 static size_t example_ozur_read(ozur_ctx
*ozur
, OZUR_UINT8
*buf
, size_t size
)
452 struct example_ozur_uctxtype
*uctx
= (struct example_ozur_uctxtype
*)ozur
->userdata
;
453 return (size_t)fread(buf
, 1, size
, uctx
->infile
);
456 static size_t example_ozur_write(ozur_ctx
*ozur
, const OZUR_UINT8
*buf
, size_t size
)
458 struct example_ozur_uctxtype
*uctx
= (struct example_ozur_uctxtype
*)ozur
->userdata
;
459 return (size_t)fwrite(buf
, 1, size
, uctx
->outfile
);
462 // An example function that uses the library.
463 // infile: The ZIP file. Seek to the start of compressed data before calling
465 // outfile: The file to write uncompressed data to.
466 // cmpr_size, uncmpr_size: Compressed and uncompressed file sizes, from the
467 // ZIP file directory data.
468 // cmpr_factor: The compression factor, from 1 to 4. This is the ZIP
469 // "compression method" field, minus 1.
470 static void ozur_example_code(FILE *infile
, FILE *outfile
,
471 OZUR_OFF_T cmpr_size
, OZUR_OFF_T uncmpr_size
, unsigned int cmpr_factor
)
473 ozur_ctx
*ozur
= NULL
;
474 struct example_ozur_uctxtype uctx
;
476 // Prepare your context data
477 uctx
.infile
= infile
;
478 uctx
.outfile
= outfile
;
480 // Allocate an ozur_ctx object, and (**important!**) initialize it to all
482 // The object is about 16KB in size, so putting it on the stack is an
483 // option, but make sure to initialize it.
484 ozur
= calloc(1, sizeof(ozur_ctx
));
487 // Set some required fields
488 ozur
->userdata
= (void*)&uctx
;
489 ozur
->cmpr_size
= cmpr_size
;
490 ozur
->uncmpr_size
= uncmpr_size
;
491 ozur
->cmpr_factor
= cmpr_factor
;
492 // cb_read must supply all of the bytes requested. Returning any other number
493 // is considered a failure.
494 ozur
->cb_read
= example_ozur_read
;
495 // cb_write must consume all of the bytes supplied.
496 ozur
->cb_write
= example_ozur_write
;
498 // Do the decompression
500 if(ozur
->error_code
!= OZUR_ERRCODE_OK
) {
501 printf("Decompression failed (code %d)\n", ozur
->error_code
);
504 // Free any resources you allocated. The library does not require any
505 // cleanup of its own.
509 #endif // End of example code