4 #if defined(_WIN32) || defined(_WIN64)
14 #define MIO0_VERSION "0.1"
16 #define GET_BIT(buf, bit) ((buf)[(bit) / 8] & (1 << (7 - ((bit) % 8))))
28 #define LOOKBACK_COUNT 256
29 #define LOOKBACK_INIT_SIZE 128
30 static lookback
*lookback_init(void)
32 lookback
*lb
= malloc(LOOKBACK_COUNT
* sizeof(*lb
));
33 for (int i
= 0; i
< LOOKBACK_COUNT
; i
++) {
34 lb
[i
].allocated
= LOOKBACK_INIT_SIZE
;
35 lb
[i
].indexes
= malloc(lb
[i
].allocated
* sizeof(*lb
[i
].indexes
));
42 static void lookback_free(lookback
*lb
)
44 for (int i
= 0; i
< LOOKBACK_COUNT
; i
++) {
50 static inline void lookback_push(lookback
*lkbk
, unsigned char val
, int index
)
52 lookback
*lb
= &lkbk
[val
];
53 if (lb
->count
== lb
->allocated
) {
55 lb
->indexes
= realloc(lb
->indexes
, lb
->allocated
* sizeof(*lb
->indexes
));
57 lb
->indexes
[lb
->count
++] = index
;
60 static void PUT_BIT(unsigned char *buf
, int bit
, int val
)
62 unsigned char mask
= 1 << (7 - (bit
% 8));
63 unsigned int offset
= bit
/ 8;
64 buf
[offset
] = (buf
[offset
] & ~(mask
)) | (val
? mask
: 0);
67 // used to find longest matching stream in buffer
69 // start_offset: offset in buf to look back from
70 // max_search: max number of bytes to find
71 // found_offset: returned offset found (0 if none found)
72 // returns max length of matching stream (0 if none found)
73 static int find_longest(const unsigned char *buf
, int start_offset
, int max_search
, int *found_offset
, lookback
*lkbk
)
81 const unsigned char first
= buf
[start_offset
];
82 lookback
*lb
= &lkbk
[first
];
87 // |--------------raw-data-----------------|
91 // check at most the past 4096 values
92 farthest
= MAX(start_offset
- 4096, 0);
93 // find starting index
94 for (lb_idx
= lb
->start
; lb_idx
< lb
->count
&& lb
->indexes
[lb_idx
] < farthest
; lb_idx
++) {}
96 for ( ; lb_idx
< lb
->count
&& lb
->indexes
[lb_idx
] < start_offset
; lb_idx
++) {
97 off
= lb
->indexes
[lb_idx
];
98 // check at most requested max or up until start
99 search_len
= MIN(max_search
, start_offset
- off
);
100 for (i
= 0; i
< search_len
; i
++) {
101 if (buf
[start_offset
+ i
] != buf
[off
+ i
]) {
106 // if matched up until start, continue matching in already matched parts
107 if (cur_length
== search_len
) {
108 // check at most requested max less current length
109 search_len
= max_search
- cur_length
;
110 for (i
= 0; i
< search_len
; i
++) {
111 if (buf
[start_offset
+ cur_length
+ i
] != buf
[off
+ i
]) {
117 if (cur_length
> best_length
) {
118 best_offset
= start_offset
- off
;
119 best_length
= cur_length
;
123 // return best reverse offset and length (may be 0)
124 *found_offset
= best_offset
;
128 // decode MIO0 header
129 // returns 1 if valid header, 0 otherwise
130 int mio0_decode_header(const unsigned char *buf
, mio0_header_t
*head
)
132 if (!memcmp(buf
, "MIO0", 4)) {
133 head
->dest_size
= read_u32_be(&buf
[4]);
134 head
->comp_offset
= read_u32_be(&buf
[8]);
135 head
->uncomp_offset
= read_u32_be(&buf
[12]);
141 void mio0_encode_header(unsigned char *buf
, const mio0_header_t
*head
)
143 memcpy(buf
, "MIO0", 4);
144 write_u32_be(&buf
[4], head
->dest_size
);
145 write_u32_be(&buf
[8], head
->comp_offset
);
146 write_u32_be(&buf
[12], head
->uncomp_offset
);
149 int mio0_decode(const unsigned char *in
, unsigned char *out
, unsigned int *end
)
152 unsigned int bytes_written
= 0;
159 valid
= mio0_decode_header(in
, &head
);
160 // verify MIO0 header
166 while (bytes_written
< head
.dest_size
) {
167 if (GET_BIT(&in
[MIO0_HEADER_LENGTH
], bit_idx
)) {
168 // 1 - pull uncompressed data
169 out
[bytes_written
] = in
[head
.uncomp_offset
+ uncomp_idx
];
173 // 0 - read compressed data
177 const unsigned char *vals
= &in
[head
.comp_offset
+ comp_idx
];
179 length
= ((vals
[0] & 0xF0) >> 4) + 3;
180 idx
= ((vals
[0] & 0x0F) << 8) + vals
[1] + 1;
181 for (i
= 0; i
< length
; i
++) {
182 out
[bytes_written
] = out
[bytes_written
- idx
];
190 *end
= head
.uncomp_offset
+ uncomp_idx
;
193 return bytes_written
;
196 int mio0_encode(const unsigned char *in
, unsigned int length
, unsigned char *out
)
198 unsigned char *bit_buf
;
199 unsigned char *comp_buf
;
200 unsigned char *uncomp_buf
;
201 unsigned int bit_length
;
202 unsigned int comp_offset
;
203 unsigned int uncomp_offset
;
204 unsigned int bytes_proc
= 0;
211 // initialize lookback buffer
212 lookbacks
= lookback_init();
214 // allocate some temporary buffers worst case size
215 bit_buf
= malloc((length
+ 7) / 8); // 1-bit/byte
216 comp_buf
= malloc(length
); // 16-bits/2bytes
217 uncomp_buf
= malloc(length
); // all uncompressed
218 memset(bit_buf
, 0, (length
+ 7) / 8);
221 // special case for first byte
222 lookback_push(lookbacks
, in
[0], 0);
223 uncomp_buf
[uncomp_idx
] = in
[0];
226 PUT_BIT(bit_buf
, bit_idx
++, 1);
227 while (bytes_proc
< length
) {
229 int max_length
= MIN(length
- bytes_proc
, 18);
230 int longest_match
= find_longest(in
, bytes_proc
, max_length
, &offset
, lookbacks
);
231 // push current byte before checking next longer match
232 lookback_push(lookbacks
, in
[bytes_proc
], bytes_proc
);
233 if (longest_match
> 2) {
234 int lookahead_offset
;
235 // lookahead to next byte to see if longer match
236 int lookahead_length
= MIN(length
- bytes_proc
- 1, 18);
237 int lookahead_match
= find_longest(in
, bytes_proc
+ 1, lookahead_length
, &lookahead_offset
, lookbacks
);
238 // better match found, use uncompressed + lookahead compressed
239 if ((longest_match
+ 1) < lookahead_match
) {
241 uncomp_buf
[uncomp_idx
] = in
[bytes_proc
];
243 PUT_BIT(bit_buf
, bit_idx
, 1);
245 longest_match
= lookahead_match
;
246 offset
= lookahead_offset
;
248 lookback_push(lookbacks
, in
[bytes_proc
], bytes_proc
);
250 // first byte already pushed above
251 for (int i
= 1; i
< longest_match
; i
++) {
252 lookback_push(lookbacks
, in
[bytes_proc
+ i
], bytes_proc
+ i
);
255 comp_buf
[comp_idx
] = (((longest_match
- 3) & 0x0F) << 4) |
256 (((offset
- 1) >> 8) & 0x0F);
257 comp_buf
[comp_idx
+ 1] = (offset
- 1) & 0xFF;
259 PUT_BIT(bit_buf
, bit_idx
, 0);
260 bytes_proc
+= longest_match
;
263 uncomp_buf
[uncomp_idx
] = in
[bytes_proc
];
265 PUT_BIT(bit_buf
, bit_idx
, 1);
271 // compute final sizes and offsets
272 // +7 so int division accounts for all bits
273 bit_length
= ((bit_idx
+ 7) / 8);
274 // compressed data after control bits and aligned to 4-byte boundary
275 comp_offset
= ALIGN(MIO0_HEADER_LENGTH
+ bit_length
, 4);
276 uncomp_offset
= comp_offset
+ comp_idx
;
277 bytes_written
= uncomp_offset
+ uncomp_idx
;
280 memcpy(out
, "MIO0", 4);
281 write_u32_be(&out
[4], length
);
282 write_u32_be(&out
[8], comp_offset
);
283 write_u32_be(&out
[12], uncomp_offset
);
285 memcpy(&out
[MIO0_HEADER_LENGTH
], bit_buf
, bit_length
);
286 memcpy(&out
[comp_offset
], comp_buf
, comp_idx
);
287 memcpy(&out
[uncomp_offset
], uncomp_buf
, uncomp_idx
);
289 // free allocated buffers
293 lookback_free(lookbacks
);
295 return bytes_written
;
298 static FILE *mio0_open_out_file(const char *out_file
) {
299 if (strcmp(out_file
, "-") == 0) {
300 #if defined(_WIN32) || defined(_WIN64)
301 _setmode(_fileno(stdout
), _O_BINARY
);
305 return fopen(out_file
, "wb");
309 int mio0_decode_file(const char *in_file
, unsigned long offset
, const char *out_file
)
314 unsigned char *in_buf
= NULL
;
315 unsigned char *out_buf
= NULL
;
323 in
= fopen(in_file
, "rb");
328 // allocate buffer to read from offset to end of file
329 fseek(in
, 0, SEEK_END
);
330 file_size
= ftell(in
);
331 in_buf
= malloc(file_size
- offset
);
332 fseek(in
, offset
, SEEK_SET
);
335 bytes_read
= fread(in_buf
, 1, file_size
- offset
, in
);
336 if (bytes_read
!= file_size
- offset
) {
342 valid
= mio0_decode_header(in_buf
, &head
);
347 out_buf
= malloc(head
.dest_size
);
349 // decompress MIO0 encoded data
350 bytes_decoded
= mio0_decode(in_buf
, out_buf
, NULL
);
351 if (bytes_decoded
< 0) {
357 out
= mio0_open_out_file(out_file
);
363 // write data to file
364 bytes_written
= fwrite(out_buf
, 1, bytes_decoded
, out
);
365 if (bytes_written
!= bytes_decoded
) {
385 int mio0_encode_file(const char *in_file
, const char *out_file
)
389 unsigned char *in_buf
= NULL
;
390 unsigned char *out_buf
= NULL
;
397 in
= fopen(in_file
, "rb");
402 // allocate buffer to read entire contents of files
403 fseek(in
, 0, SEEK_END
);
404 file_size
= ftell(in
);
405 fseek(in
, 0, SEEK_SET
);
406 in_buf
= malloc(file_size
);
409 bytes_read
= fread(in_buf
, 1, file_size
, in
);
410 if (bytes_read
!= file_size
) {
415 // allocate worst case length
416 out_buf
= malloc(MIO0_HEADER_LENGTH
+ ((file_size
+7)/8) + file_size
);
418 // compress data in MIO0 format
419 bytes_encoded
= mio0_encode(in_buf
, file_size
, out_buf
);
422 out
= mio0_open_out_file(out_file
);
428 // write data to file
429 bytes_written
= fwrite(out_buf
, 1, bytes_encoded
, out
);
430 if (bytes_written
!= bytes_encoded
) {
450 // mio0 standalone executable
451 #ifdef MIO0_STANDALONE
460 static arg_config default_config
=
468 static void print_usage(void)
470 ERROR("Usage: mio0 [-c / -d] [-o OFFSET] FILE [OUTPUT]\n"
472 "mio0 v" MIO0_VERSION
": MIO0 compression and decompression tool\n"
474 "Optional arguments:\n"
475 " -c compress raw data into MIO0 (default: compress)\n"
476 " -d decompress MIO0 into raw data\n"
477 " -o OFFSET starting offset in FILE (default: 0)\n"
481 " [OUTPUT] output file (default: FILE.out), \"-\" for stdout\n");
485 // parse command line arguments
486 static void parse_arguments(int argc
, char *argv
[], arg_config
*config
)
494 for (i
= 1; i
< argc
; i
++) {
495 if (argv
[i
][0] == '-' && argv
[i
][1] != '\0') {
496 switch (argv
[i
][1]) {
498 config
->compress
= 1;
501 config
->compress
= 0;
507 config
->offset
= strtoul(argv
[i
], NULL
, 0);
514 switch (file_count
) {
516 config
->in_filename
= argv
[i
];
519 config
->out_filename
= argv
[i
];
528 if (file_count
< 1) {
533 int main(int argc
, char *argv
[])
535 char out_filename
[FILENAME_MAX
];
539 // get configuration from arguments
540 config
= default_config
;
541 parse_arguments(argc
, argv
, &config
);
542 if (config
.out_filename
== NULL
) {
543 config
.out_filename
= out_filename
;
544 sprintf(config
.out_filename
, "%s.out", config
.in_filename
);
548 if (config
.compress
) {
549 ret_val
= mio0_encode_file(config
.in_filename
, config
.out_filename
);
551 ret_val
= mio0_decode_file(config
.in_filename
, config
.offset
, config
.out_filename
);
556 ERROR("Error opening input file \"%s\"\n", config
.in_filename
);
559 ERROR("Error reading from input file \"%s\"\n", config
.in_filename
);
562 ERROR("Error decoding MIO0 data. Wrong offset (0x%X)?\n", config
.offset
);
565 ERROR("Error opening output file \"%s\"\n", config
.out_filename
);
568 ERROR("Error writing bytes to output file \"%s\"\n", config
.out_filename
);
574 #endif // MIO0_STANDALONE