zoo: Various improvements
[deark.git] / foreign / ozunreduce.h
blob0877603557a4cea224d3b1c9772f9593de7c05a5
1 // Ozunreduce / Old ZIP Unreduce - A single-header-file C/C++ library for
2 // decompressing ("expanding") ZIP "Reduce" (methods 2 through 5) compression
3 //
4 // This file, normally named ozunreduce.h, hereinafter referred to as "this
5 // file" or "this software", is an independent software library. It is okay to
6 // distribute this file by itself.
7 //
8 // This software was written from scratch by Jason Summers, using public
9 // specifications from the PKZIP/PKWARE APPNOTE.TXT files.
11 // For an overview of how to use this software, see the example code at the
12 // end of this file.
14 // More information might be found at:
15 // * <https://entropymine.com/oldunzip/>
16 // * <https://github.com/jsummers/oldunzip>
19 ================================ TERMS OF USE ================================
20 These terms of use apply only to this file, and not to any other files that
21 might appear alongside it.
23 This software is dual-licensed. Choose the license you prefer:
24 ------------------------------------------------------------------------------
25 Licence option #1: MIT
26 Copyright (C) 2019 Jason Summers
28 Permission is hereby granted, free of charge, to any person obtaining a copy
29 of this software and associated documentation files (the "Software"), to deal
30 in the Software without restriction, including without limitation the rights
31 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
32 copies of the Software, and to permit persons to whom the Software is
33 furnished to do so, subject to the following conditions:
35 The above copyright notice and this permission notice shall be included in
36 all copies or substantial portions of the Software.
38 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
39 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
40 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
41 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
42 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
43 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
44 THE SOFTWARE.
45 ------------------------------------------------------------------------------
46 Licence option #2: Public domain
48 This is free and unencumbered software released into the public domain.
50 Anyone is free to copy, modify, publish, use, compile, sell, or
51 distribute this software, either in source code form or as a compiled
52 binary, for any purpose, commercial or non-commercial, and by any
53 means.
55 In jurisdictions that recognize copyright laws, the author or authors
56 of this software dedicate any and all copyright interest in the
57 software to the public domain. We make this dedication for the benefit
58 of the public at large and to the detriment of our heirs and
59 successors. We intend this dedication to be an overt act of
60 relinquishment in perpetuity of all present and future rights to this
61 software under copyright law.
63 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
64 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
65 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
66 IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
67 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
68 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
69 OTHER DEALINGS IN THE SOFTWARE.
71 For more information, please refer to <http://unlicense.org/>
72 ==============================================================================
75 #define OZUR_VERSION 20191011
77 #ifndef OZUR_UINT8
78 #define OZUR_UINT8 unsigned char
79 #endif
81 #ifndef OZUR_OFF_T
82 #define OZUR_OFF_T off_t
83 #endif
85 #ifndef OZUR_API
86 #define OZUR_API(ty) static ty
87 #endif
89 #define OZUR_ERRCODE_OK 0
90 #define OZUR_ERRCODE_GENERIC_ERROR 1
91 #define OZUR_ERRCODE_BAD_CDATA 2
92 #define OZUR_ERRCODE_READ_FAILED 6
93 #define OZUR_ERRCODE_WRITE_FAILED 7
94 #define OZUR_ERRCODE_INSUFFICIENT_CDATA 8
96 struct ozur_follower_item {
97 OZUR_UINT8 count; // N - 0<=count<=32
98 OZUR_UINT8 nbits; // B(N) - Valid if count>0
99 OZUR_UINT8 values[32]; // S - First 'count' values are valid
102 struct ozur_ctx_type;
103 typedef struct ozur_ctx_type ozur_ctx;
104 typedef size_t (*ozur_cb_read_type)(ozur_ctx *ozur, OZUR_UINT8 *buf, size_t size);
105 typedef size_t (*ozur_cb_write_type)(ozur_ctx *ozur, const OZUR_UINT8 *buf, size_t size);
106 typedef void (*ozur_cb_post_follower_sets_type)(ozur_ctx *ozur);
108 struct ozur_ctx_type {
109 // Fields the user can or must set:
110 void *userdata;
111 unsigned int cmpr_factor;
112 OZUR_OFF_T cmpr_size;
113 OZUR_OFF_T uncmpr_size;
114 ozur_cb_read_type cb_read;
115 ozur_cb_write_type cb_write;
116 ozur_cb_post_follower_sets_type cb_post_follower_sets; // Optional hook
118 // Fields the user can read:
119 int error_code;
120 OZUR_OFF_T uncmpr_nbytes_written;
121 OZUR_OFF_T cmpr_nbytes_consumed;
123 // Fields private to the library:
124 OZUR_OFF_T cmpr_nbytes_read; // (Number of bytes read, not necessarily consumed.)
125 OZUR_OFF_T uncmpr_nbytes_emitted; // (Number of output bytes decoded, not necessarily flushed.)
126 unsigned int bitreader_buf;
127 unsigned int bitreader_nbits_in_buf;
128 int state;
129 unsigned int var_Len;
130 OZUR_UINT8 last_char;
131 OZUR_UINT8 var_V;
132 struct ozur_follower_item followers[256];
133 size_t circbuf_pos;
134 #define OZUR_CIRCBUF_SIZE 4096 // Must be at least 4096
135 OZUR_UINT8 circbuf[OZUR_CIRCBUF_SIZE];
136 size_t inbuf_nbytes_consumed;
137 size_t inbuf_nbytes_total;
138 #define OZUR_INBUF_SIZE 1024
139 OZUR_UINT8 inbuf[OZUR_INBUF_SIZE];
142 static void ozur_set_error(ozur_ctx *ozur, int error_code)
144 // Only record the first error.
145 if (ozur->error_code==0) {
146 ozur->error_code = error_code;
150 static void ozur_refill_inbuf(ozur_ctx *ozur)
152 size_t ret;
153 size_t nbytes_to_read;
155 ozur->inbuf_nbytes_total = 0;
156 ozur->inbuf_nbytes_consumed = 0;
158 if((ozur->cmpr_size - ozur->cmpr_nbytes_read) > OZUR_INBUF_SIZE) {
159 nbytes_to_read = OZUR_INBUF_SIZE;
161 else {
162 nbytes_to_read = (size_t)(ozur->cmpr_size - ozur->cmpr_nbytes_read);
164 if(nbytes_to_read<1 || nbytes_to_read>OZUR_INBUF_SIZE) return;
166 ret = ozur->cb_read(ozur, ozur->inbuf, nbytes_to_read);
167 if(ret != nbytes_to_read) {
168 ozur_set_error(ozur, OZUR_ERRCODE_READ_FAILED);
169 return;
171 ozur->cmpr_nbytes_read += (OZUR_OFF_T)nbytes_to_read;
172 ozur->inbuf_nbytes_total = nbytes_to_read;
175 static OZUR_UINT8 ozur_nextbyte(ozur_ctx *ozur)
177 OZUR_UINT8 x;
179 if(ozur->error_code) return 0;
181 if(ozur->cmpr_nbytes_consumed >= ozur->cmpr_size) {
182 ozur_set_error(ozur, OZUR_ERRCODE_INSUFFICIENT_CDATA);
183 return 0;
185 // Another byte should be available, somewhere.
186 if(ozur->inbuf_nbytes_consumed >= ozur->inbuf_nbytes_total) {
187 // No bytes left in inbuf. Refill it.
188 ozur_refill_inbuf(ozur);
189 if(ozur->inbuf_nbytes_total<1) return 0;
192 x = ozur->inbuf[ozur->inbuf_nbytes_consumed++];
193 ozur->cmpr_nbytes_consumed++;
194 return x;
197 static OZUR_UINT8 ozur_bitreader_getbits(ozur_ctx *ozur, unsigned int nbits)
199 OZUR_UINT8 n;
201 if(nbits<1 || nbits>8) return 0;
203 if(ozur->bitreader_nbits_in_buf < nbits) {
204 OZUR_UINT8 b;
206 b = ozur_nextbyte(ozur);
207 if(ozur->error_code) return 0;
208 ozur->bitreader_buf |= ((unsigned int)b)<<ozur->bitreader_nbits_in_buf;
209 ozur->bitreader_nbits_in_buf += 8;
212 n = (OZUR_UINT8)(ozur->bitreader_buf & (0xff >> (8-nbits)));
213 ozur->bitreader_buf >>= nbits;
214 ozur->bitreader_nbits_in_buf -= nbits;
215 return n;
218 // "the minimal number of bits required to encode the value of x-1".
219 // Assumes 1 <= x <= 32.
220 static OZUR_UINT8 ozur_func_B(ozur_ctx *ozur, OZUR_UINT8 x)
222 if(x<=2) return 1;
223 if(x<=4) return 2;
224 if(x<=8) return 3;
225 if(x<=16) return 4;
226 return 5;
229 static void ozur_part1_readfollowersets(ozur_ctx *ozur)
231 int k;
233 for(k=255; k>=0; k--) {
234 unsigned int z;
235 struct ozur_follower_item *f_i;
237 f_i = &ozur->followers[k];
239 f_i->count = ozur_bitreader_getbits(ozur, 6);
240 if(ozur->error_code) goto done;
241 if(f_i->count > 32) {
242 ozur_set_error(ozur, OZUR_ERRCODE_BAD_CDATA);
243 goto done;
246 if(f_i->count > 0) {
247 f_i->nbits = ozur_func_B(ozur, f_i->count);
250 for(z=0; z<(unsigned int)f_i->count; z++) {
251 f_i->values[z] = ozur_bitreader_getbits(ozur, 8);
252 if(ozur->error_code) goto done;
255 done:
259 static OZUR_UINT8 ozur_part1_getnextbyte(ozur_ctx *ozur)
261 OZUR_UINT8 outbyte = 0;
262 struct ozur_follower_item *f_i;
264 f_i = &ozur->followers[(unsigned int)ozur->last_char];
266 if(f_i->count==0) { // Follower set is empty
267 outbyte = ozur_bitreader_getbits(ozur, 8);
269 else { // Follower set not empty
270 OZUR_UINT8 bitval;
272 bitval = ozur_bitreader_getbits(ozur, 1);
273 if(bitval) {
274 outbyte = ozur_bitreader_getbits(ozur, 8);
276 else {
277 unsigned int var_I;
279 var_I = (unsigned int)ozur_bitreader_getbits(ozur, (unsigned int)f_i->nbits);
280 outbyte = f_i->values[var_I];
284 ozur->last_char = outbyte;
285 return outbyte;
288 // Write the bytes in the circular buffer, up to the current position.
289 // Does not change the state of the buffer.
290 // This must only be called just before setting the buffer pos to 0,
291 // or at the end of input.
292 static void ozur_flush(ozur_ctx *ozur)
294 size_t ret;
295 size_t n;
297 if(ozur->error_code) return;
298 n = ozur->circbuf_pos;
299 if(n<1 || n>OZUR_CIRCBUF_SIZE) return;
300 ret = ozur->cb_write(ozur, ozur->circbuf, n);
301 if(ret != n) {
302 ozur_set_error(ozur, OZUR_ERRCODE_WRITE_FAILED);
303 return;
305 ozur->uncmpr_nbytes_written += (OZUR_OFF_T)ret;
308 static void ozur_emit_byte(ozur_ctx *ozur, OZUR_UINT8 x)
310 ozur->circbuf[ozur->circbuf_pos++] = x;
311 if(ozur->circbuf_pos >= OZUR_CIRCBUF_SIZE) {
312 ozur_flush(ozur);
313 ozur->circbuf_pos = 0;
315 ozur->uncmpr_nbytes_emitted++;
318 static void ozur_emit_copy_of_prev_bytes(ozur_ctx *ozur,
319 size_t nbytes_to_look_back, size_t nbytes)
321 size_t i;
322 size_t src_pos;
324 // Maximum possible is (255>>4)*255 + 255 + 1 = 4096
325 if(nbytes_to_look_back>4096) {
326 ozur_set_error(ozur, OZUR_ERRCODE_GENERIC_ERROR);
327 return;
329 // Maximum possible is 255 + 127 + 3 = 385
330 if(nbytes>nbytes_to_look_back) {
331 ozur_set_error(ozur, OZUR_ERRCODE_BAD_CDATA);
332 return;
335 src_pos = (ozur->circbuf_pos + OZUR_CIRCBUF_SIZE - nbytes_to_look_back) %
336 OZUR_CIRCBUF_SIZE;
338 for(i=0; i<nbytes; i++) {
339 ozur_emit_byte(ozur, ozur->circbuf[src_pos++]);
340 if(src_pos >= OZUR_CIRCBUF_SIZE) {
341 src_pos = 0;
346 // Process one byte of output from part 1.
347 static void ozur_part2(ozur_ctx *ozur, OZUR_UINT8 var_C)
349 size_t nbytes_to_look_back;
350 size_t nbytes_to_copy;
352 switch(ozur->state) {
353 case 0:
354 if(var_C==144) {
355 ozur->state = 1;
357 else {
358 ozur_emit_byte(ozur, var_C);
360 break;
362 case 1:
363 if(var_C) {
364 ozur->var_V = var_C;
365 ozur->var_Len = (unsigned int)(ozur->var_V & (0xff>>ozur->cmpr_factor));
366 ozur->state = (ozur->var_Len==(unsigned int)(0xff>>ozur->cmpr_factor)) ? 2 : 3; // F()
368 else {
369 ozur_emit_byte(ozur, 144);
370 ozur->state = 0;
372 break;
374 case 2:
375 ozur->var_Len += (unsigned int)var_C;
376 ozur->state = 3;
377 break;
379 case 3:
380 nbytes_to_look_back = (size_t)(ozur->var_V>>(8-ozur->cmpr_factor)) * 256 +
381 (size_t)var_C + 1; // D()
382 nbytes_to_copy = (size_t)ozur->var_Len + 3;
383 ozur_emit_copy_of_prev_bytes(ozur, nbytes_to_look_back, nbytes_to_copy);
384 ozur->state = 0;
385 break;
389 OZUR_API(void) ozur_run(ozur_ctx *ozur)
391 if(ozur->cmpr_factor<1 || ozur->cmpr_factor>4 ||
392 !ozur->cb_read || !ozur->cb_write)
394 ozur_set_error(ozur, OZUR_ERRCODE_GENERIC_ERROR);
395 goto done;
398 // Part 1 is undoing the "probabilistic" compression.
399 // It starts with a header, then we'll repeatedly get 1 byte of output from
400 // Part 1, and use it as input to Part 2.
401 ozur_part1_readfollowersets(ozur);
402 if(ozur->error_code) goto done;
404 if(ozur->cb_post_follower_sets) {
405 ozur->cb_post_follower_sets(ozur);
408 while(1) {
409 OZUR_UINT8 outbyte;
411 if(ozur->error_code) goto done;
412 if(ozur->uncmpr_nbytes_emitted >= ozur->uncmpr_size) break; // Have enough output data
414 outbyte = ozur_part1_getnextbyte(ozur);
415 if(ozur->error_code) goto done;
417 // Part 2 is undoing "compress repeated byte sequences" --
418 // apparently a kind of LZ77.
419 ozur_part2(ozur, outbyte);
422 done:
423 ozur_flush(ozur);
426 #if 0 // Example code
428 #include <stdio.h> // Used by the example code, not by the library
430 // You must at least define the off_t or OZUR_OFF_T symbol, either manually,
431 // or by #including the appopriate standard header file.
433 #include <sys/types.h>
435 // The library is expected to be used by just one of your .c/.cpp files. There
436 // are no provisions to make it convenient to use from multiple files.
437 #include "ozunreduce.h"
439 #endif
441 #ifdef OZUR_TESTING_EXAMPLE_CODE
443 // Define a custom struct for context data used by callbacks
444 struct example_ozur_uctxtype {
445 FILE *infile;
446 FILE *outfile;
449 // Implement your read and write callbacks
450 static size_t example_ozur_read(ozur_ctx *ozur, OZUR_UINT8 *buf, size_t size)
452 struct example_ozur_uctxtype *uctx = (struct example_ozur_uctxtype*)ozur->userdata;
453 return (size_t)fread(buf, 1, size, uctx->infile);
456 static size_t example_ozur_write(ozur_ctx *ozur, const OZUR_UINT8 *buf, size_t size)
458 struct example_ozur_uctxtype *uctx = (struct example_ozur_uctxtype*)ozur->userdata;
459 return (size_t)fwrite(buf, 1, size, uctx->outfile);
462 // An example function that uses the library.
463 // infile: The ZIP file. Seek to the start of compressed data before calling
464 // this function.
465 // outfile: The file to write uncompressed data to.
466 // cmpr_size, uncmpr_size: Compressed and uncompressed file sizes, from the
467 // ZIP file directory data.
468 // cmpr_factor: The compression factor, from 1 to 4. This is the ZIP
469 // "compression method" field, minus 1.
470 static void ozur_example_code(FILE *infile, FILE *outfile,
471 OZUR_OFF_T cmpr_size, OZUR_OFF_T uncmpr_size, unsigned int cmpr_factor)
473 ozur_ctx *ozur = NULL;
474 struct example_ozur_uctxtype uctx;
476 // Prepare your context data
477 uctx.infile = infile;
478 uctx.outfile = outfile;
480 // Allocate an ozur_ctx object, and (**important!**) initialize it to all
481 // zero bytes.
482 // The object is about 16KB in size, so putting it on the stack is an
483 // option, but make sure to initialize it.
484 ozur = calloc(1, sizeof(ozur_ctx));
485 if(!ozur) return;
487 // Set some required fields
488 ozur->userdata = (void*)&uctx;
489 ozur->cmpr_size = cmpr_size;
490 ozur->uncmpr_size = uncmpr_size;
491 ozur->cmpr_factor = cmpr_factor;
492 // cb_read must supply all of the bytes requested. Returning any other number
493 // is considered a failure.
494 ozur->cb_read = example_ozur_read;
495 // cb_write must consume all of the bytes supplied.
496 ozur->cb_write = example_ozur_write;
498 // Do the decompression
499 ozur_run(ozur);
500 if(ozur->error_code != OZUR_ERRCODE_OK) {
501 printf("Decompression failed (code %d)\n", ozur->error_code);
504 // Free any resources you allocated. The library does not require any
505 // cleanup of its own.
506 free(ozur);
509 #endif // End of example code