Minor refactoring, related to lzah
[deark.git] / src / deark-util.c
blobbff2800b36e3548748f6bf753e78aadff642f436
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // deark-util.c: Most of the main library functions
7 #define DE_NOT_IN_MODULE
8 #include "deark-config.h"
9 #include "deark-private.h"
10 #include "deark-version.h"
12 #define DE_MAX_SUBMODULE_NESTING_LEVEL 10
14 char *de_get_version_string(char *buf, size_t bufsize)
16 char extver[32];
18 if((DE_VERSION_NUMBER&0x000000ffU) == 0)
19 de_strlcpy(extver, "", sizeof(extver));
20 else
21 de_snprintf(extver, sizeof(extver), "-%u", DE_VERSION_NUMBER&0x000000ff);
23 de_snprintf(buf, bufsize, "%u.%u.%u%s%s",
24 (DE_VERSION_NUMBER&0xff000000U)>>24,
25 (DE_VERSION_NUMBER&0x00ff0000U)>>16,
26 (DE_VERSION_NUMBER&0x0000ff00U)>>8,
27 extver, DE_VERSION_SUFFIX);
29 return buf;
32 unsigned int de_get_version_int(void)
34 return DE_VERSION_NUMBER;
37 void de_strlcpy(char *dst, const char *src, size_t dstlen)
39 size_t n;
40 n = de_strlen(src);
41 if(n>dstlen-1) n=dstlen-1;
42 de_memcpy(dst, src, n);
43 dst[n]='\0';
46 // Compare two ASCII strings, as if all letters were lowercase.
47 // (Library functions like strcasecmp or _stricmp usually exist, but we roll
48 // our own for portability, and consistent behavior.)
49 static int de_strcasecmp_internal(const char *a, const char *b,
50 int has_n, size_t n)
52 size_t k = 0;
54 while(1) {
55 unsigned char a1, b1;
57 if(has_n && (k>=n)) break;
58 a1 = (unsigned char)a[k];
59 b1 = (unsigned char)b[k];
60 if(a1==0 && b1==0) break;
61 if(a1>='A' && a1<='Z') a1 += 32;
62 if(b1>='A' && b1<='Z') b1 += 32;
63 if(a1<b1) return -1;
64 if(a1>b1) return 1;
65 k++;
67 return 0;
70 int de_strcasecmp(const char *a, const char *b)
72 return de_strcasecmp_internal(a, b, 0, 0);
75 int de_strncasecmp(const char *a, const char *b, size_t n)
77 return de_strcasecmp_internal(a, b, 1, n);
80 // A wrapper for strchr().
81 char *de_strchr(const char *s, int c)
83 if(!s) return NULL;
84 return strchr(s, c);
87 void de_snprintf(char *buf, size_t buflen, const char *fmt, ...)
89 va_list ap;
90 va_start(ap, fmt);
91 de_vsnprintf(buf,buflen,fmt,ap);
92 va_end(ap);
95 static void de_puts_advanced(deark *c, unsigned int flags, const char *s)
97 size_t s_len;
98 size_t s_pos = 0;
99 char *tmps = NULL;
100 size_t tmps_pos = 0;
101 int hlmode = 0;
102 unsigned int special_code;
103 u32 param1 = 0;
105 s_len = de_strlen(s);
106 tmps = de_malloc(c, (i64)s_len+1);
108 // Search for characters that enable/disable highlighting,
109 // and split the string at them.
110 while(s_pos < s_len) {
111 if(s[s_pos]=='\x01' || s[s_pos]=='\x02' || s[s_pos]=='\x03') {
112 // Found a special code
114 if(s[s_pos]=='\x02' && s[s_pos+1]=='\x01' && hlmode) {
115 // Optimization: UNHL followed immediately by HL is a no-op.
116 special_code = 0;
118 else if(s[s_pos]=='\x01') {
119 special_code = DE_MSGCODE_HL;
120 hlmode = 1;
122 else if(s[s_pos]=='\x03') {
123 special_code = DE_MSGCODE_RGBSAMPLE;
124 if(s_pos + 7 <= s_len) {
125 param1 = DE_MAKE_RGB(
126 ((s[s_pos+1]&0x0f)<<4) | (s[s_pos+2]&0x0f),
127 ((s[s_pos+3]&0x0f)<<4) | (s[s_pos+4]&0x0f),
128 ((s[s_pos+5]&0x0f)<<4) | (s[s_pos+6]&0x0f));
131 else {
132 special_code = DE_MSGCODE_UNHL;
133 hlmode = 0;
136 // Print what we have of the string before the special code
137 if(tmps_pos>0) {
138 tmps[tmps_pos] = '\0';
139 c->msgfn(c, flags, tmps);
141 tmps_pos = 0;
143 // "Print" the special code
144 if(special_code && c->specialmsgfn) {
145 c->specialmsgfn(c, flags, special_code, param1);
148 // Advance past the special code
149 if(special_code==0)
150 s_pos += 2;
151 else if(special_code==DE_MSGCODE_RGBSAMPLE)
152 s_pos += 7;
153 else
154 s_pos += 1;
156 else {
157 tmps[tmps_pos++] = s[s_pos++];
161 // Unset highlight, if it somehow got left on.
162 if(hlmode && c->specialmsgfn) {
163 c->specialmsgfn(c, flags, DE_MSGCODE_UNHL, 0);
166 tmps[tmps_pos] = '\0';
167 c->msgfn(c, flags, tmps);
168 de_free(c, tmps);
171 void de_puts(deark *c, unsigned int flags, const char *s)
173 size_t k;
175 if(!c || !c->msgfn) {
176 fputs(s, stderr);
177 return;
180 // Scan the printable string for "magic" byte sequences that represent
181 // text color changes, etc. It's admittedly a little ugly that we have to
182 // do this.
184 // We could invent and use any byte sequences we want for this, as long as
185 // they will not otherwise occur in "printable" output.
186 // I.e., if it's valid UTF-8, it must contain a character we classify as
187 // "nonprintable". We could even use actual ANSI escape sequences, since
188 // Esc is a nonprintable character (but that would have little benefit,
189 // and feel kinda wrong, since this part of the code isn't supposed to
190 // know about ANSI escape sequences).
191 // Short sequences are preferable, because they're simpler to detect, and
192 // because these bytes count against some of our size limits.
193 // Valid UTF-8 is probably best, because someday we might want this scheme
194 // to be compatible with something else (such as ucstrings).
195 // So, we're simply using:
196 // U+0001 : DE_CODEPOINT_HL
197 // U+0002 : DE_CODEPOINT_UNHL
198 // U+0003 : DE_CODEPOINT_RGBSAMPLE (followed by 6 bytes for the RGB color)
200 for(k=0; s[k]; k++) {
201 if(s[k]=='\x01' || s[k]=='\x02' || s[k]=='\x03') {
202 de_puts_advanced(c, flags, s);
203 return;
207 c->msgfn(c, flags, s);
210 static void de_vprintf(deark *c, unsigned int flags, const char *fmt, va_list ap)
212 char buf[1024];
214 de_vsnprintf(buf, sizeof(buf), fmt, ap);
215 de_puts(c, flags, buf);
218 void de_printf(deark *c, unsigned int flags, const char *fmt, ...)
220 va_list ap;
222 va_start(ap, fmt);
223 de_vprintf(c, flags, fmt, ap);
224 va_end(ap);
227 static void de_vdbg_internal(deark *c, const char *fmt, va_list ap)
229 char bars_and_spaces[128];
230 size_t bpos;
231 int nspaces;
232 int nbars;
233 const char *dprefix = "DEBUG: ";
235 if(c) {
236 if(c->dprefix) dprefix = c->dprefix;
238 nbars = c->module_nesting_level - 1;
239 if(nbars>10) nbars=10;
241 nspaces = c->dbg_indent_amount;
242 if(nspaces>50) nspaces=50;
244 else {
245 nbars = 0;
246 nspaces = 0;
249 bpos = 0;
250 while(nbars>0) {
251 // One or more vertical lines, to indicate module nesting
252 bars_and_spaces[bpos++] = '\xe2'; // U+2502 Box drawings light vertical
253 bars_and_spaces[bpos++] = '\x94';
254 bars_and_spaces[bpos++] = '\x82';
255 nbars--;
257 while(nspaces>0) {
258 bars_and_spaces[bpos++] = ' ';
259 nspaces--;
261 bars_and_spaces[bpos] = '\0';
263 de_printf(c, DE_MSGTYPE_DEBUG, "%s%s", dprefix, bars_and_spaces);
264 de_vprintf(c, DE_MSGTYPE_DEBUG, fmt, ap);
265 de_puts(c, DE_MSGTYPE_DEBUG, "\n");
268 void de_dbg(deark *c, const char *fmt, ...)
270 va_list ap;
272 if(c && c->debug_level<1) return;
273 va_start(ap, fmt);
274 de_vdbg_internal(c, fmt, ap);
275 va_end(ap);
278 void de_dbg2(deark *c, const char *fmt, ...)
280 va_list ap;
282 if(c && c->debug_level<2) return;
283 va_start(ap, fmt);
284 de_vdbg_internal(c, fmt, ap);
285 va_end(ap);
288 void de_dbg3(deark *c, const char *fmt, ...)
290 va_list ap;
292 if(c && c->debug_level<3) return;
293 va_start(ap, fmt);
294 de_vdbg_internal(c, fmt, ap);
295 va_end(ap);
298 void de_dbgx(deark *c, int lv, const char *fmt, ...)
300 va_list ap;
302 if(c && c->debug_level<lv) return;
303 va_start(ap, fmt);
304 de_vdbg_internal(c, fmt, ap);
305 va_end(ap);
308 void de_dbg_indent(deark *c, int n)
310 c->dbg_indent_amount += n;
313 void de_dbg_indent_save(deark *c, int *saved_indent_level)
315 *saved_indent_level = c->dbg_indent_amount;
318 void de_dbg_indent_restore(deark *c, int saved_indent_level)
320 c->dbg_indent_amount = saved_indent_level;
323 static int get_ndigits_for_offset(i64 n)
325 int nd;
327 if(n<10) nd=1;
328 else if(n<100) nd=2;
329 else if(n<1000) nd=3;
330 else if(n<10000) nd=4;
331 else nd=5;
332 return nd;
335 struct hexdump_ctx;
336 typedef void (*hexdump_printline_fn)(deark *c, struct hexdump_ctx *hctx);
338 struct hexdump_ctx {
339 // same for each row:
340 const char *prefix;
341 const char *prefix_sep; // ":"
342 unsigned int flags;
343 hexdump_printline_fn printlinefn;
344 char offset_fmtstr[32];
346 // per row
347 i64 row_offset;
348 i64 bytesthisrow; // num bytes used in .rowbuf
349 u8 rowbuf[16];
350 char outbuf_sz[200];
353 static void do_hexdump_row(deark *c, struct hexdump_ctx *hctx)
355 char offset_formatted[32];
356 char linebuf[3*16+32];
357 char asciibuf[64];
358 int asciibufpos;
359 int linebufpos;
360 i64 k;
362 linebufpos = 0;
363 asciibufpos = 0;
364 asciibuf[asciibufpos++] = '\"';
365 for(k=0; k<hctx->bytesthisrow; k++) {
366 u8 b;
367 b = hctx->rowbuf[k];
368 linebuf[linebufpos++] = de_get_hexchar(b/16);
369 linebuf[linebufpos++] = de_get_hexchar(b%16);
370 linebuf[linebufpos++] = ' ';
371 if(b>=32 && b<=126) {
372 asciibuf[asciibufpos++] = (char)b;
374 else {
375 asciibuf[asciibufpos++] = '\x01'; // DE_CODEPOINT_HL
376 asciibuf[asciibufpos++] = '.';
377 // We'll often turn off highlighting only to turn it back on
378 // again for the next character. The OFF+ON sequences will be
379 // optimized out later, though, so there's no reason to worry
380 // about that here.
381 asciibuf[asciibufpos++] = '\x02'; // DE_CODEPOINT_UNHL
385 // Pad and terminate the hex values
386 while(linebufpos<48) {
387 linebuf[linebufpos++] = ' ';
389 linebuf[linebufpos] = '\0';
391 // Terminate or erase the ASCII representation
392 if(hctx->flags&0x1) {
393 asciibuf[asciibufpos++] = '\"';
394 asciibuf[asciibufpos++] = '\0';
396 else {
397 asciibuf[0] = '\0';
400 // Careful: With a variable format string, the compiler won't be able to
401 // detect errors.
402 de_snprintf(offset_formatted, sizeof(offset_formatted), hctx->offset_fmtstr,
403 (i64)hctx->row_offset);
405 de_snprintf(hctx->outbuf_sz, sizeof(hctx->outbuf_sz), "%s%s%s: %s%s",
406 hctx->prefix, hctx->prefix_sep, offset_formatted, linebuf, asciibuf);
407 hctx->printlinefn(c, hctx);
410 // If prefix is NULL, a default will be used.
411 // flags:
412 // 0x1 = Include an ASCII representation
413 static void de_hexdump_internal(deark *c, struct hexdump_ctx *hctx,
414 dbuf *f, i64 pos1,
415 i64 nbytes_avail, i64 max_nbytes_to_dump)
417 i64 pos = pos1;
418 i64 len;
419 int ndigits_for_offset;
420 int was_truncated = 0;
422 if(hctx->flags & 0x2) {
423 // Don't print a prefix
424 hctx->prefix = "";
425 hctx->prefix_sep = "";
427 else {
428 hctx->prefix_sep = ":";
431 if(nbytes_avail > max_nbytes_to_dump) {
432 len = max_nbytes_to_dump;
433 was_truncated = 1;
435 else {
436 len = nbytes_avail;
439 // Construct a format string to use for byte offsets.
440 if(was_truncated) {
441 // If we're truncating, the highest offset we'll print is the number
442 // of data bytes that we'll dump.
443 ndigits_for_offset = get_ndigits_for_offset(len);
445 else {
446 if(len<1) return;
448 // If we're not truncating, the highest offset we'll print is the
449 // highest byte offset that is a multiple of 16.
450 ndigits_for_offset = get_ndigits_for_offset(((len-1)/16)*16);
452 de_snprintf(hctx->offset_fmtstr, sizeof(hctx->offset_fmtstr), "%%%d"I64_FMT, ndigits_for_offset);
454 while(1) { // For each row...
455 if(pos >= pos1+len) break;
457 hctx->row_offset = pos-pos1;
459 hctx->bytesthisrow = (pos1+len)-pos;
460 if(hctx->bytesthisrow>16) hctx->bytesthisrow=16;
462 dbuf_read(f, hctx->rowbuf, pos, hctx->bytesthisrow);
464 do_hexdump_row(c, hctx);
466 pos += hctx->bytesthisrow;
468 if(was_truncated) {
469 de_snprintf(hctx->outbuf_sz, sizeof(hctx->outbuf_sz),
470 "%s%s%"I64_FMT": ...", hctx->prefix, hctx->prefix_sep, len);
471 hctx->printlinefn(c, hctx);
475 static void hexdump_printline_dbg(deark *c, struct hexdump_ctx *hctx)
477 de_dbg(c, "%s", hctx->outbuf_sz);
480 // If prefix is NULL (and the no_prefix flag is not set), a default will be used.
481 // flags:
482 // 0x1 = Include an ASCII representation
483 // 0x2 = No prefix
484 void de_dbg_hexdump(deark *c, dbuf *f, i64 pos1,
485 i64 nbytes_avail, i64 max_nbytes_to_dump,
486 const char *prefix1, unsigned int flags)
488 struct hexdump_ctx hctx;
490 hctx.flags = flags;
491 hctx.prefix = (prefix1) ? prefix1 : "data";
492 hctx.printlinefn = hexdump_printline_dbg;
494 de_hexdump_internal(c, &hctx, f, pos1, nbytes_avail, max_nbytes_to_dump);
497 static void hexdump_printline_ext(deark *c, struct hexdump_ctx *hctx)
499 de_printf(c, DE_MSGTYPE_MESSAGE, "%s\n", hctx->outbuf_sz);
502 // Print a hexdump in the style of the "hexdump" module.
503 void de_hexdump2(deark *c, dbuf *f, i64 pos1, i64 nbytes_avail,
504 i64 max_nbytes_to_dump, unsigned int flags)
506 struct hexdump_ctx hctx;
508 hctx.flags = flags | 0x2;
509 hctx.prefix = NULL;
510 hctx.printlinefn = hexdump_printline_ext;
511 de_hexdump_internal(c, &hctx, f, pos1, nbytes_avail, max_nbytes_to_dump);
514 // This is such a common thing to do, that it's worth having a function for it.
515 void de_dbg_dimensions(deark *c, i64 w, i64 h)
517 de_dbg(c, "dimensions: %"I64_FMT DE_CHAR_TIMES "%"I64_FMT, w, h);
520 // Generates a "magic" code that, when included in the debug output, will
521 // (in some circumstances) display a small sample of the given color.
522 // Caller supplies csamp[16].
523 // Returns a pointer to csamp, for convenience.
524 char *de_get_colorsample_code(deark *c, de_color clr, char *csamp,
525 size_t csamplen)
527 unsigned int r, g, b;
529 if(csamplen<8) {
530 csamp[0]='\0';
531 return csamp;
534 r = (unsigned int)DE_COLOR_R(clr);
535 g = (unsigned int)DE_COLOR_G(clr);
536 b = (unsigned int)DE_COLOR_B(clr);
538 // Only the low 4 bits are significant. We add 16 so that the bits can't
539 // all be 0; since we can't have NUL bytes in this NUL-terminated string.
540 // Also, it's nice if the values are all <= 127, to make them UTF-8
541 // compatible.
542 csamp[0] = '\x03'; // refer to DE_CODEPOINT_RGBSAMPLE
543 csamp[1] = 16 + (r>>4)%16;
544 csamp[2] = 16 + r%16;
545 csamp[3] = 16 + (g>>4)%16;
546 csamp[4] = 16 + g%16;
547 csamp[5] = 16 + (b>>4)%16;
548 csamp[6] = 16 + b%16;
549 csamp[7] = '\0';
550 return csamp;
553 // Print debugging output for an 8-bit RGB palette entry.
554 void de_dbg_pal_entry2(deark *c, i64 idx, de_color clr,
555 const char *txt_before, const char *txt_in, const char *txt_after)
557 int r,g,b,a;
558 char astr[32];
559 char csamp[16];
561 if(c->debug_level<2) return;
562 if(!txt_before) txt_before="";
563 if(!txt_in) txt_in="";
564 if(!txt_after) txt_after="";
565 r = (int)DE_COLOR_R(clr);
566 g = (int)DE_COLOR_G(clr);
567 b = (int)DE_COLOR_B(clr);
568 a = (int)DE_COLOR_A(clr);
569 if(a!=0xff) {
570 de_snprintf(astr, sizeof(astr), ",A=%d", a);
572 else {
573 astr[0] = '\0';
576 de_get_colorsample_code(c, clr, csamp, sizeof(csamp));
577 de_dbg2(c, "pal[%3d] = %s(%3d,%3d,%3d%s%s)%s%s", (int)idx, txt_before,
578 r, g, b, astr, txt_in, csamp, txt_after);
581 void de_dbg_pal_entry(deark *c, i64 idx, de_color clr)
583 if(c->debug_level<2) return;
584 de_dbg_pal_entry2(c, idx, clr, NULL, NULL, NULL);
587 void de_verr(deark *c, const char *fmt, va_list ap)
589 if(c) {
590 c->error_count++;
593 de_puts(c, DE_MSGTYPE_ERROR, "Error: ");
594 de_vprintf(c, DE_MSGTYPE_ERROR, fmt, ap);
595 de_puts(c, DE_MSGTYPE_ERROR, "\n");
598 // c can be NULL
599 void de_err(deark *c, const char *fmt, ...)
601 va_list ap;
603 va_start(ap, fmt);
604 de_verr(c, fmt, ap);
605 va_end(ap);
608 void de_vwarn(deark *c, const char *fmt, va_list ap)
610 if(!c->show_warnings) return;
611 de_puts(c, DE_MSGTYPE_WARNING, "Warning: ");
612 de_vprintf(c, DE_MSGTYPE_WARNING, fmt, ap);
613 de_puts(c, DE_MSGTYPE_WARNING, "\n");
616 void de_warn(deark *c, const char *fmt, ...)
618 va_list ap;
620 if(!c->show_warnings) return;
621 va_start(ap, fmt);
622 de_vwarn(c, fmt, ap);
623 va_end(ap);
626 // For "informational" messages: Those that will be suppressed by -noinfo.
627 void de_info(deark *c, const char *fmt, ...)
629 va_list ap;
631 if(!c->show_infomessages) return;
632 va_start(ap, fmt);
633 de_vprintf(c, DE_MSGTYPE_MESSAGE, fmt, ap);
634 va_end(ap);
635 de_puts(c, DE_MSGTYPE_MESSAGE, "\n");
638 // For "payload" messages, that won't be suppressed by options like -q.
639 // (Note that there is nothing wrong with using de_printf or de_puts instead of
640 // this.)
641 void de_msg(deark *c, const char *fmt, ...)
643 va_list ap;
645 va_start(ap, fmt);
646 de_vprintf(c, DE_MSGTYPE_MESSAGE, fmt, ap);
647 va_end(ap);
648 de_puts(c, DE_MSGTYPE_MESSAGE, "\n");
651 // c can be NULL.
652 void de_fatalerror(deark *c)
654 if(c && c->fatalerrorfn) {
655 c->fatalerrorfn(c);
657 de_exitprocess(1);
660 void de_internal_err_fatal(deark *c, const char *fmt, ...)
662 va_list ap;
664 de_puts(c, DE_MSGTYPE_ERROR, "Internal error: ");
665 va_start(ap, fmt);
666 de_vprintf(c, DE_MSGTYPE_ERROR, fmt, ap);
667 va_end(ap);
668 de_puts(c, DE_MSGTYPE_ERROR, "\n");
669 de_fatalerror(c);
672 void de_internal_err_nonfatal(deark *c, const char *fmt, ...)
674 va_list ap;
675 char buf[200];
677 va_start(ap, fmt);
678 de_vsnprintf(buf, sizeof(buf), fmt, ap);
679 va_end(ap);
680 de_err(c, "Internal: %s", buf);
683 // TODO: Make de_malloc use de_mallocarray internally, instead of vice versa.
684 void *de_mallocarray(deark *c, i64 nmemb, size_t membsize)
686 if(nmemb>500000000 || nmemb<0 || membsize>500000000) {
687 de_err(c, "Out of memory");
688 de_fatalerror(c);
689 return NULL;
692 return de_malloc(c, nmemb*(i64)membsize);
695 // Memory returned is always zeroed.
696 // c can be NULL.
697 // Always succeeds; never returns NULL.
698 void *de_malloc(deark *c, i64 n)
700 void *m;
701 if(n==0) n=1;
702 if(n<0 || n>500000000) {
703 de_err(c, "Out of memory (%d bytes requested)",(int)n);
704 de_fatalerror(c);
705 return NULL;
708 m = calloc((size_t)n,1);
709 if(!m) {
710 de_err(c, "Memory allocation failed (%d bytes)",(int)n);
711 de_fatalerror(c);
712 return NULL;
714 return m;
717 // TODO: Make de_realloc use de_reallocarray internally, instead of vice versa.
718 void *de_reallocarray(deark *c, void *m, i64 oldnmemb, size_t membsize,
719 i64 newnmemb)
722 if(newnmemb>500000000 || newnmemb<0 || oldnmemb<0 || membsize>500000000) {
723 de_err(c, "Out of memory");
724 de_fatalerror(c);
725 return NULL;
728 return de_realloc(c, m,
729 oldnmemb*(i64)membsize,
730 newnmemb*(i64)membsize);
733 // If you know oldsize, you can provide it, and newly-allocated bytes will be zeroed.
734 // Otherwise, set oldsize==newsize, and newly-allocated bytes won't be zeroed.
735 // If oldmem is NULL, this behaves the same as de_malloc, and all bytes are zeroed.
736 void *de_realloc(deark *c, void *oldmem, i64 oldsize, i64 newsize)
738 void *newmem;
740 if(!oldmem) {
741 return de_malloc(c, newsize);
744 newmem = realloc(oldmem, (size_t)newsize);
745 if(!newmem) {
746 de_err(c, "Memory reallocation failed (%d bytes)",(int)newsize);
747 free(oldmem);
748 de_fatalerror(c);
749 return NULL;
752 if(oldsize<newsize) {
753 // zero out any newly-allocated bytes
754 de_zeromem(&((u8*)newmem)[oldsize], (size_t)(newsize-oldsize));
757 return newmem;
760 void de_free(deark *c, void *m)
762 free(m);
765 // Returns the index into c->module_info[], or -1 if no found.
766 int de_get_module_idx_by_id(deark *c, const char *module_id)
768 int i;
769 int k;
771 if(!module_id) return -1;
773 for(i=0; i<c->num_modules; i++) {
774 if(!de_strcmp(c->module_info[i].id, module_id)) {
775 return i;
777 for(k=0; k<DE_MAX_MODULE_ALIASES; k++) {
778 if(!c->module_info[i].id_alias[k]) break;
779 if(!de_strcmp(c->module_info[i].id_alias[k], module_id)) {
780 return i;
784 return -1;
787 struct deark_module_info *de_get_module_by_id(deark *c, const char *module_id)
789 int idx;
791 idx = de_get_module_idx_by_id(c, module_id);
792 if(idx<0) return NULL;
793 return &c->module_info[idx];
796 int de_run_module(deark *c, struct deark_module_info *mi, de_module_params *mparams,
797 enum de_moddisp_enum moddisp)
799 enum de_moddisp_enum old_moddisp;
800 struct de_detection_data_struct *old_detection_data;
802 if(!mi) return 0;
803 if(!mi->run_fn) return 0;
804 // Note that c->module_nesting_level is 0 when we are not in a module,
805 // 1 when in the top-level module, 2 for a first-level submodule, etc.
806 if(c->module_nesting_level >= 1+DE_MAX_SUBMODULE_NESTING_LEVEL) {
807 de_err(c, "Max module nesting level exceeded");
808 return 0;
811 old_moddisp = c->module_disposition;
812 c->module_disposition = moddisp;
814 old_detection_data = c->detection_data;
815 if(c->module_nesting_level > 0) {
816 c->detection_data = NULL;
819 if(c->module_nesting_level>0 && c->debug_level>=3) {
820 de_dbg3(c, "[using %s module]", mi->id);
822 c->module_nesting_level++;
823 mi->run_fn(c, mparams);
824 c->module_nesting_level--;
825 c->module_disposition = old_moddisp;
826 c->detection_data = old_detection_data;
827 return 1;
830 int de_run_module_by_id(deark *c, const char *id, de_module_params *mparams)
832 struct deark_module_info *module_to_use;
834 module_to_use = de_get_module_by_id(c, id);
835 if(!module_to_use) {
836 de_err(c, "Unknown or unsupported format \"%s\"", id);
837 return 0;
840 return de_run_module(c, module_to_use, mparams, DE_MODDISP_INTERNAL);
843 int de_run_module_by_id_on_slice(deark *c, const char *id, de_module_params *mparams,
844 dbuf *f, i64 pos, i64 len)
846 dbuf *old_ifile;
847 int ret;
849 old_ifile = c->infile;
851 if(pos==0 && len==f->len) {
852 // Optimization: We don't need a subfile in this case
853 c->infile = f;
854 ret = de_run_module_by_id(c, id, mparams);
856 else {
857 c->infile = dbuf_open_input_subfile(f, pos, len);
858 ret = de_run_module_by_id(c, id, mparams);
859 dbuf_close(c->infile);
862 c->infile = old_ifile;
863 return ret;
866 // Same as de_run_module_by_id_on_slice(), but takes just ->codes
867 // as a parameter, instead of a full de_module_params struct.
868 int de_run_module_by_id_on_slice2(deark *c, const char *id, const char *codes,
869 dbuf *f, i64 pos, i64 len)
871 de_module_params *mparams = NULL;
872 int ret;
874 mparams = de_malloc(c, sizeof(de_module_params));
875 mparams->in_params.codes = codes;
876 ret = de_run_module_by_id_on_slice(c, id, mparams, f, pos, len);
877 de_free(c, mparams);
878 return ret;
881 const char *de_get_ext_option(deark *c, const char *name)
883 int i;
885 for(i=0; i<c->num_ext_options; i++) {
886 if(!de_strcmp(c->ext_option[i].name, name)) {
887 return c->ext_option[i].val;
890 return NULL; // Option name not found.
893 // Returns
894 // 0 if false, ("0", "n...", "f...", etc.)
895 // 1 if true (empty value, "1", "y...", "t...", etc.)
896 // defaultval (which can be any integer) if not set, or value is malformed.
897 int de_get_ext_option_bool(deark *c, const char *name, int defaultval)
899 const char *val;
901 val = de_get_ext_option(c, name);
902 if(!val) return defaultval;
903 if(val[0]=='\0' || val[0]=='1' || val[0]=='y' || val[0]=='Y' ||
904 val[0]=='t' || val[0]=='T')
906 return 1;
908 if(val[0]=='0' || val[0]=='n' || val[0]=='N' || val[0]=='f' ||
909 val[0]=='F')
911 return 0;
913 return defaultval;
916 int de_atoi(const char *string)
918 return atoi(string);
921 i64 de_atoi64(const char *string)
923 return de_strtoll(string, NULL, 10);
926 i64 de_min_int(i64 n1, i64 n2)
928 return (n1<n2) ? n1 : n2;
931 i64 de_max_int(i64 n1, i64 n2)
933 return (n1>n2) ? n1 : n2;
936 i64 de_pad_to_2(i64 x)
938 return (x&0x1) ? x+1 : x;
941 i64 de_pad_to_4(i64 x)
943 return ((x+3)/4)*4;
946 // Returns x^2.
947 // Valid for x=0 to 62. If x is invalid, returns 1 (=2^0).
948 i64 de_pow2(i64 x)
950 if(x<0 || x>62) return 1;
951 return (i64)1 << (unsigned int)x;
954 i64 de_pad_to_n(i64 x, i64 n)
956 i64 r;
957 if(n<2)
958 return x;
959 r = x%n;
960 if(r==0)
961 return x;
962 return x - r + n;
965 i64 de_log2_rounded_up(i64 n)
967 i64 i;
969 if(n<=2) return 1;
970 for(i=2; i<32; i++) {
971 if(n <= (((i64)1)<<i)) return i;
973 return 32;
976 char *de_print_base2_fixed(char *buf, size_t buf_len, u64 n, UI bitcount)
978 UI x;
979 size_t bpos = 0;
981 if(buf_len<(size_t)bitcount+1) {
982 goto done;
985 for(x=0; x<bitcount; x++) {
986 buf[bpos++] = (n & (1ULL<<(bitcount-1-x))) ? '1' : '0';
988 done:
989 buf[bpos] = '\0';
990 return buf;
993 static const char g_empty_string[] = "";
995 const char *de_get_sz_ext(const char *sz)
997 int len;
998 int pos;
1000 if(!sz) return g_empty_string;
1002 len = (int)de_strlen(sz);
1003 if(len<2) return g_empty_string;
1005 // Find the position of the last ".", that's after the last "/"
1006 pos = len-2;
1008 while(pos>=0) {
1009 if(sz[pos]=='.') {
1010 return &sz[pos+1];
1012 if(sz[pos]=='/' || sz[pos]=='\\')
1013 break;
1014 pos--;
1016 return g_empty_string;
1019 const char *de_get_input_file_ext(deark *c)
1021 if(c->suppress_detection_by_filename) return g_empty_string;
1023 if(!c->input_filename) return g_empty_string;
1025 // If we skipped over the first part of the file, assume we're reading
1026 // an embedded format that's not indicated by the file extension.
1027 if(c->slice_start_req) return g_empty_string;
1029 return de_get_sz_ext(c->input_filename);
1032 int de_sz_has_ext(const char *sz, const char *ext)
1034 const char *e;
1036 e = de_get_sz_ext(sz);
1037 if(!de_strcasecmp(e, ext))
1038 return 1;
1039 return 0;
1042 int de_input_file_has_ext(deark *c, const char *ext)
1044 const char *e;
1046 e = de_get_input_file_ext(c);
1047 if(!de_strcasecmp(e, ext))
1048 return 1;
1049 return 0;
1052 int de_havemodcode(deark *c, de_module_params *mparams, int code)
1054 if(mparams &&
1055 mparams->in_params.codes &&
1056 de_strchr(mparams->in_params.codes, code))
1058 return 1;
1060 return 0;
1063 // An finfo object holds metadata to be used when writing an output file.
1064 // It is passed to dbuf_create_output_file(), and related functions.
1065 // It does not have to remain valid after that function returns.
1066 // It is allowed to be reused.
1067 de_finfo *de_finfo_create(deark *c)
1069 de_finfo *fi;
1070 fi = de_malloc(c, sizeof(de_finfo));
1071 return fi;
1074 void de_finfo_destroy(deark *c, de_finfo *fi)
1076 if(!fi) return;
1077 if(fi->file_name_internal) ucstring_destroy(fi->file_name_internal);
1078 if(fi->name_other) ucstring_destroy(fi->name_other);
1079 de_free(c, fi);
1082 static i32 de_char_to_valid_fn_char(deark *c, i32 ch)
1084 if(ch>=32 && ch<=126 && ch!='/' && ch!='\\' && ch!=':'
1085 && ch!='*' && ch!='?' && ch!='\"' && ch!='<' &&
1086 ch!='>' && ch!='|')
1088 // These are the valid ASCII characters in Windows filenames.
1089 // TODO: We could behave differently on different platforms.
1090 return ch;
1092 else if(ch>=160 && ch<=0x10ffff) {
1093 // TODO: A lot of Unicode characters probably don't belong in filenames.
1094 // Maybe we need a whitelist or blacklist.
1095 // (is_printable_uchar() exists, but isn't quite right.)
1096 return ch;
1098 return '_';
1101 // Sanitize a filename that is either also going to be processed by
1102 // sanitize_filename2(), or is known to contain no slashes.
1103 static void sanitize_filename1(deark *c, de_ucstring *s)
1105 // Don't allow "."
1106 if(s->len==1 && s->str[0]=='.') {
1107 s->str[0] = '_';
1109 // Don't allow ".."
1110 if(s->len==2 && s->str[0]=='.' && s->str[1]=='.') {
1111 s->str[0] = '_';
1115 // Sanitize a filename that may contain slashes.
1116 // Just some basic sanitization, not expected to be perfect.
1117 // Note that this name will be written to a ZIP file, not used directly as a
1118 // filename.
1119 static void sanitize_filename2(deark *c, de_ucstring *s)
1121 i64 i;
1123 // Don't allow an initial "/"
1124 if(s->len>=1 && s->str[0]=='/') {
1125 s->str[0] = '_';
1128 // Don't allow consecutive slashes
1129 for(i=0; i<s->len-1; i++) {
1130 if(s->str[i]=='/' && s->str[i+1]=='/') {
1131 s->str[i] = '_';
1135 // Don't allow a component to be ".."
1136 for(i=0; i<s->len-1; i++) {
1137 if(s->str[i]=='.' && s->str[i+1]=='.') {
1138 int test1 = 0; // Is ".." at the beginning of a component?
1139 int test2 = 0; // Is ".." at the end of a component?
1140 if(i==0 || s->str[i-1]=='/') {
1141 test1 = 1;
1143 if(i>=s->len-2 || s->str[i+2]=='/') {
1144 test2 = 1;
1146 if(test1 && test2) {
1147 s->str[i] = '_';
1152 // Don't allow name to end with "/."
1153 if(s->len>=2 && s->str[s->len-2]=='/' && s->str[s->len-1]=='.') {
1154 s->str[s->len-1] = '_';
1157 // Don't allow name to end with "/"
1158 if(s->len>=1 && s->str[s->len-1]=='/') {
1159 s->str[s->len-1] = '_';
1163 // Takes ownership of 's', and may modify it.
1164 // flags:
1165 // DE_SNFLAG_FULLPATH = "/" characters in the name are path separators.
1166 // DE_SNFLAG_STRIPTRAILINGSLASH
1167 static void de_finfo_set_name_internal(deark *c, de_finfo *fi, de_ucstring *s,
1168 unsigned int flags)
1170 i64 i;
1171 int allow_slashes;
1173 fi->orig_name_was_dot = 0;
1175 if(fi->file_name_internal) {
1176 ucstring_destroy(fi->file_name_internal);
1177 fi->file_name_internal = NULL;
1179 if(!s) return;
1181 fi->file_name_internal = s;
1183 if((flags&DE_SNFLAG_STRIPTRAILINGSLASH) && s->len>0 && s->str[s->len-1]=='/') {
1184 ucstring_truncate(s, s->len-1);
1187 allow_slashes = (c->allow_subdirs && (flags&DE_SNFLAG_FULLPATH));
1189 if(allow_slashes && s->len==1 && s->str[0]=='.') {
1190 // Remember that this file was named ".", which can be a valid subdir
1191 // name in some cases (but at this point we don't even know whether it
1192 // is a directory).
1193 fi->orig_name_was_dot = 1;
1196 for(i=0; i<s->len; i++) {
1197 if(s->str[i]=='/' && allow_slashes) {
1198 continue;
1200 s->str[i] = de_char_to_valid_fn_char(c, s->str[i]);
1203 ucstring_strip_trailing_spaces(s);
1205 sanitize_filename1(c, s);
1207 if(allow_slashes) {
1208 sanitize_filename2(c, s);
1211 // Don't allow empty filenames.
1212 if(s->len<1) {
1213 ucstring_append_sz(s, "_", DE_ENCODING_LATIN1);
1217 void de_finfo_set_name_from_ucstring(deark *c, de_finfo *fi, de_ucstring *s,
1218 unsigned int flags)
1220 de_ucstring *s_copy;
1222 s_copy = ucstring_clone(s);
1223 de_finfo_set_name_internal(c, fi, s_copy, flags);
1226 void de_finfo_set_name_from_sz(deark *c, de_finfo *fi, const char *name1,
1227 unsigned int flags, de_ext_encoding ee)
1229 de_ucstring *fname;
1231 if(!name1) {
1232 de_finfo_set_name_from_ucstring(c, fi, NULL, flags);
1233 return;
1235 fname = ucstring_create(c);
1236 ucstring_append_sz(fname, name1, ee);
1237 de_finfo_set_name_internal(c, fi, fname, flags);
1240 // Sets the precision field to UNKNOWN.
1241 // flags: Same as de_FILETIME_to_timestamp()
1242 void de_unix_time_to_timestamp(i64 ut, struct de_timestamp *ts, unsigned int flags)
1244 de_FILETIME_to_timestamp(
1245 (ut + ((i64)86400)*(369*365 + 89)) * 10000000,
1246 ts, flags);
1247 ts->precision = DE_TSPREC_UNKNOWN;
1250 // Sets the sub-second part of the timestamp to 'frac' seconds after
1251 // (always forward in time) the whole-number second represented by the
1252 // timestamp.
1253 // 'frac' must be >=0.0 and <1.0.
1254 // Sets the precision field to HIGH.
1255 void de_timestamp_set_subsec(struct de_timestamp *ts, double frac)
1257 i64 subsec;
1259 if(!ts->is_valid) return;
1260 if(ts->ts_FILETIME<0) ts->ts_FILETIME=0;
1262 // Subtract off any existing fractional second.
1263 ts->ts_FILETIME -= (ts->ts_FILETIME%10000000);
1265 subsec = (i64)(0.5+frac*10000000.0);
1266 if(subsec>=10000000) subsec=9999999;
1267 if(subsec<0) subsec=0;
1268 ts->ts_FILETIME += subsec;
1269 ts->precision = DE_TSPREC_HIGH;
1272 // Returns the number of ten-millionths of a second after the whole number
1273 // of seconds (i.e. after the time returned by de_timestamp_to_unix_time).
1274 // The returned value will be between 0 and 9999999, inclusive.
1275 i64 de_timestamp_get_subsec(const struct de_timestamp *ts)
1277 return (de_timestamp_to_FILETIME(ts) % 10000000);
1280 void de_mac_time_to_timestamp(i64 mt, struct de_timestamp *ts)
1282 de_unix_time_to_timestamp(mt - 2082844800, ts, 0);
1285 // Convert a Windows FILETIME to a Deark timestamp.
1286 // Always sets the precision field to HIGH.
1287 // flags: 0x1 = set the UTC flag
1288 void de_FILETIME_to_timestamp(i64 ft, struct de_timestamp *ts, unsigned int flags)
1290 de_zeromem(ts, sizeof(struct de_timestamp));
1291 if(ft<=0) return;
1292 ts->is_valid = 1;
1293 ts->ts_FILETIME = ft;
1294 ts->precision = DE_TSPREC_HIGH;
1295 if(flags&0x1) ts->tzcode = DE_TZCODE_UTC;
1298 void de_dos_datetime_to_timestamp(struct de_timestamp *ts,
1299 i64 ddate, i64 dtime)
1301 i64 yr, mo, da, hr, mi, se;
1303 if(ddate==0) {
1304 de_zeromem(ts, sizeof(struct de_timestamp));
1305 ts->is_valid = 0;
1306 return;
1308 yr = 1980+((ddate&0xfe00)>>9);
1309 mo = (ddate&0x01e0)>>5;
1310 da = (ddate&0x001f);
1311 hr = (dtime&0xf800)>>11;
1312 mi = (dtime&0x07e0)>>5;
1313 se = 2*(dtime&0x001f);
1314 de_make_timestamp(ts, yr, mo, da, hr, mi, se);
1315 ts->precision = DE_TSPREC_2SEC;
1318 // flags:
1319 // 0x1 = support VFAT long filename attribs
1320 void de_describe_dos_attribs(deark *c, UI attr, de_ucstring *s, UI flags)
1322 unsigned int bf = attr;
1324 if((flags & 0x1) && (bf & 0x3f)==0x0f) {
1325 ucstring_append_flags_item(s, "long filename");
1326 bf -= 0x0f;
1328 if(bf & 0x01) {
1329 ucstring_append_flags_item(s, "read-only");
1330 bf -= 0x01;
1332 if(bf & 0x02) {
1333 ucstring_append_flags_item(s, "hidden");
1334 bf -= 0x02;
1336 if(bf & 0x04) {
1337 ucstring_append_flags_item(s, "system");
1338 bf -= 0x04;
1340 if(bf & 0x08) {
1341 ucstring_append_flags_item(s, "volume label");
1342 bf -= 0x08;
1344 if(bf & 0x10) {
1345 ucstring_append_flags_item(s, "directory");
1346 bf -= 0x10;
1348 if(bf & 0x20) {
1349 ucstring_append_flags_item(s, "archive");
1350 bf -= 0x20;
1353 if(bf!=0) { // Report any unrecognized flags
1354 ucstring_append_flags_itemf(s, "0x%02x", bf);
1358 // Sets the DE_TZCODE_UTC flag.
1359 void de_riscos_loadexec_to_timestamp(u32 load_addr,
1360 u32 exec_addr, struct de_timestamp *ts)
1362 i64 t;
1363 unsigned int centiseconds;
1365 de_zeromem(ts, sizeof(struct de_timestamp));
1366 if((load_addr&0xfff00000U)!=0xfff00000U) return;
1368 t = (((i64)(load_addr&0xff))<<32) | (i64)exec_addr;
1369 // t now = number of centiseconds since the beginning of 1900
1371 // Remember centiseconds.
1372 centiseconds = (unsigned int)(t%100);
1373 // Convert t to seconds.
1374 t = t/100;
1376 // Convert 1900 epoch to 1970 epoch.
1377 // (There were 17 leap days between Jan 1900 and Jan 1970.)
1378 t -= (365*70 + 17)*(i64)86400;
1380 if(t<=0 || t>=8000000000LL) return; // sanity check
1382 de_unix_time_to_timestamp(t, ts, 0);
1383 de_timestamp_set_subsec(ts, ((double)centiseconds)/100.0);
1384 ts->tzcode = DE_TZCODE_UTC;
1387 // This always truncates down to a whole number of seconds.
1388 // While an option to round might be useful for *something*, it could
1389 // cause problems if you're not really careful. It invites double-rounding,
1390 // and the creation of timestamps that are slightly in the future, both of
1391 // which can be problematic.
1392 i64 de_timestamp_to_unix_time(const struct de_timestamp *ts)
1394 if(!ts->is_valid) return 0;
1396 // There are 369 years between 1601 and 1970, with 89 leap days.
1397 return (de_timestamp_to_FILETIME(ts)/10000000) - ((i64)86400)*(369*365 + 89);
1400 // Convert to Windows FILETIME.
1401 // Returns 0 on error.
1402 i64 de_timestamp_to_FILETIME(const struct de_timestamp *ts)
1404 if(!ts->is_valid) return 0;
1405 if(ts->ts_FILETIME<0) return 0;
1406 return ts->ts_FILETIME;
1409 // [Adapted from Eric Raymond's public domain my_timegm().]
1410 // Convert a time (as individual fields) to a de_timestamp.
1411 // This is basically a UTC version of mktime().
1412 // yr = full year
1413 // mo = month: 1=Jan, ... 12=Dec
1414 // da = day of month: 1=1, ... 31=31
1415 void de_make_timestamp(struct de_timestamp *ts,
1416 i64 yr, i64 mo, i64 da,
1417 i64 hr, i64 mi, i64 se)
1419 i64 result;
1420 i64 tm_mon;
1421 static const int cumulative_days[12] =
1422 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
1424 de_zeromem(ts, sizeof(struct de_timestamp));
1425 tm_mon = mo-1;
1426 if(tm_mon<0 || tm_mon>11) tm_mon=0;
1427 result = (yr - 1970) * 365 + cumulative_days[tm_mon];
1428 result += (yr - 1968) / 4;
1429 result -= (yr - 1900) / 100;
1430 result += (yr - 1600) / 400;
1431 if ((yr%4)==0 && ((yr%100)!=0 || (yr%400)==0) && tm_mon<2) {
1432 result--;
1434 result += da-1;
1435 result *= 24;
1436 result += hr;
1437 result *= 60;
1438 result += mi;
1439 result *= 60;
1440 result += se;
1442 de_unix_time_to_timestamp(result, ts, 0);
1445 // Adjust the timestamp, presumably to convert it from local time to UTC,
1446 // and set the UTC flag.
1447 // offset_seconds is number of seconds to add to the timestamp to get UTC,
1448 // i.e. number of seconds west of UTC.
1449 void de_timestamp_cvt_to_utc(struct de_timestamp *ts, i64 offset_seconds)
1451 if(!ts->is_valid) return;
1452 ts->ts_FILETIME += offset_seconds*10000000;
1453 ts->tzcode = DE_TZCODE_UTC;
1456 // Our version of the standard gmtime() function.
1457 // We roll our own, so that we can support a wide range of dates. We want to
1458 // handle erroneous, and deliberately pathological, dates in the distant past
1459 // and future. We also want Deark to work the same on all platforms.
1461 // Converts a de_timestamp to a de_struct_tm, with separate fields
1462 // for year, month, day, ...
1463 // Uses the Gregorian calendar.
1464 // Supports dates from about year 1601 to 30828.
1465 void de_gmtime(const struct de_timestamp *ts, struct de_struct_tm *tm2)
1467 // Let's define an "eon" to be a 400-year period. Eons begin at the start
1468 // of the year 1601, 2001, 2401, etc.
1469 static const i64 secs_per_eon = 12622780800LL;
1470 i64 eon;
1471 i64 secs_since_start_of_1601;
1472 i64 secs_since_start_of_eon;
1473 i64 days_since_start_of_eon;
1474 i64 secs_since_start_of_day;
1475 i64 yr_tmp; // years, since start of eon, accounted for so far
1476 i64 days_tmp; // number of days not accounted for in yr_tmp
1477 i64 count;
1478 int is_leapyear;
1479 int k;
1481 de_zeromem(tm2, sizeof(struct de_struct_tm));
1482 if(!ts->is_valid || ts->ts_FILETIME<=0) {
1483 return;
1486 secs_since_start_of_1601 = ts->ts_FILETIME / 10000000;
1487 tm2->tm_subsec = ts->ts_FILETIME % 10000000;
1488 eon = secs_since_start_of_1601 / secs_per_eon;
1489 secs_since_start_of_eon = secs_since_start_of_1601 % secs_per_eon;
1490 days_since_start_of_eon = secs_since_start_of_eon / 86400;
1491 secs_since_start_of_day = secs_since_start_of_eon % 86400;
1492 tm2->tm_hour = (int)(secs_since_start_of_day / 3600);
1493 tm2->tm_min = (int)((secs_since_start_of_day % 3600)/60);
1494 tm2->tm_sec = (int)(secs_since_start_of_day % 60);
1496 days_tmp = days_since_start_of_eon;
1497 yr_tmp = 0;
1499 // The first 3 100-year periods in this eon have
1500 // 100*365 + 24 days each.
1501 count = days_tmp / (100*365 + 24);
1502 if(count>3) count = 3;
1503 days_tmp -= (100*365 + 24)*count;
1504 yr_tmp += 100*count;
1506 // The first 24 4-year periods in this 100-year period have
1507 // 1 leap day each.
1508 count = days_tmp / (4*365 + 1);
1509 if(count>24) count = 24;
1510 days_tmp -= (4*365 + 1)*count;
1511 yr_tmp += 4*count;
1513 // The first 3 years in this 4-year period are not leap years.
1514 count = days_tmp / 365;
1515 if(count>3) count = 3;
1516 days_tmp -= 365*count;
1517 yr_tmp += count;
1519 tm2->tm_fullyear = (int)(1601 + eon*400 + yr_tmp);
1520 is_leapyear = ((yr_tmp%4)==3 &&
1521 yr_tmp!=99 && yr_tmp!=199 && yr_tmp!=299);
1523 for(k=0; k<11; k++) {
1524 static const u8 days_in_month[11] = // (Don't need December)
1525 { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30 };
1526 i64 days_in_this_month = (i64)days_in_month[k];
1527 if(k==1 && is_leapyear) days_in_this_month++;
1528 if(days_tmp >= days_in_this_month) {
1529 days_tmp -= days_in_this_month;
1530 tm2->tm_mon++;
1532 else {
1533 break;
1537 tm2->tm_mday = (int)(1+days_tmp);
1538 tm2->is_valid = 1;
1541 // Appends " UTC" if ts->tzcode==DE_TZCODE_UTC
1542 // No flags are currently defined.
1543 // Caller supplies buf (suggest it be at least size 64).
1544 // Returns an extra pointer to buf.
1545 char *de_timestamp_to_string(const struct de_timestamp *ts,
1546 char *buf, size_t buf_len, unsigned int flags)
1548 const char *tzlabel;
1549 char subsec[16];
1550 struct de_struct_tm tm2;
1552 if(!ts->is_valid) {
1553 de_strlcpy(buf, "[invalid timestamp]", buf_len);
1554 goto done;
1557 de_gmtime(ts, &tm2);
1558 if(!tm2.is_valid) {
1559 de_snprintf(buf, buf_len, "[timestamp out of range: %"I64_FMT"]",
1560 de_timestamp_to_unix_time(ts));
1561 goto done;
1564 if(ts->precision>DE_TSPREC_1SEC) {
1565 unsigned int ms;
1566 ms = (unsigned int)(tm2.tm_subsec/10000);
1567 if(ms>=1000) ms=999;
1568 de_snprintf(subsec, sizeof(subsec), ".%03u", ms);
1570 else {
1571 subsec[0] = '\0';
1574 tzlabel = (ts->tzcode==DE_TZCODE_UTC)?" UTC":"";
1575 if(ts->precision!=DE_TSPREC_UNKNOWN && ts->precision<=DE_TSPREC_1DAY) { // date only
1576 de_snprintf(buf, buf_len, "%04d-%02d-%02d",
1577 tm2.tm_fullyear, 1+tm2.tm_mon, tm2.tm_mday);
1578 goto done;
1580 de_snprintf(buf, buf_len, "%04d-%02d-%02d %02d:%02d:%02d%s%s",
1581 tm2.tm_fullyear, 1+tm2.tm_mon, tm2.tm_mday,
1582 tm2.tm_hour, tm2.tm_min, tm2.tm_sec, subsec, tzlabel);
1583 done:
1584 return buf;
1587 // Same as de_timestamp_to_string(), except it assumes the output is only
1588 // needed if debug output is enabled.
1589 // If it is not, it just returns an empty string, to avoid the relatively
1590 // slow date processing.
1591 char *de_dbg_timestamp_to_string(deark *c, const struct de_timestamp *ts,
1592 char *buf, size_t buf_len, unsigned int flags)
1594 if(c->debug_level<1) {
1595 buf[0] = '\0';
1596 return buf;
1598 return de_timestamp_to_string(ts, buf, buf_len, flags);
1601 // Returns the same time if called multiple times.
1602 void de_cached_current_time_to_timestamp(deark *c, struct de_timestamp *ts)
1604 if(!c->current_time.is_valid) {
1605 de_current_time_to_timestamp(&c->current_time);
1607 *ts = c->current_time;
1610 void de_declare_fmt(deark *c, const char *fmtname)
1612 if(c->module_nesting_level > 1) {
1613 return; // Only allowed for the top-level module
1615 if(c->format_declared) return;
1616 de_info(c, "Format: %s", fmtname);
1617 c->format_declared = 1;
1620 void de_declare_fmtf(deark *c, const char *fmt, ...)
1622 va_list ap;
1623 char buf[128];
1625 va_start(ap, fmt);
1626 de_vsnprintf(buf, sizeof(buf), fmt, ap);
1627 de_declare_fmt(c, buf);
1628 va_end(ap);
1631 // Returns a suitable input encoding.
1632 // If mparams.in_params.input_encoding exists and is not UNKNOWN,
1633 // returns that.
1634 // Else if c->input_encoding (the -inenc option) is not UNKNOWN, returns that.
1635 // Else returns dflt.
1636 de_encoding de_get_input_encoding(deark *c, de_module_params *mparams,
1637 de_encoding dflt)
1639 if(mparams && mparams->in_params.input_encoding!=DE_ENCODING_UNKNOWN) {
1640 return mparams->in_params.input_encoding;
1642 if(c->input_encoding!=DE_ENCODING_UNKNOWN) {
1643 return c->input_encoding;
1645 return dflt;
1648 // Assumes dst starts out with only '0' bits
1649 void de_copy_bits(const u8 *src, i64 srcbitnum,
1650 u8 *dst, i64 dstbitnum, i64 bitstocopy)
1652 i64 i;
1653 u8 b;
1655 for(i=0; i<bitstocopy; i++) {
1656 b = src[(srcbitnum+i)/8];
1657 b = (b>>(7-(srcbitnum+i)%8))&0x1;
1658 if(b) {
1659 b = b<<(7-(dstbitnum+i)%8);
1660 dst[(dstbitnum+i)/8] |= b;
1665 // A very simple hash table implementation, with int64 keys.
1667 #define DE_INTHASHTABLE_NBUCKETS 71
1669 struct de_inthashtable_item {
1670 i64 key;
1671 void *value;
1672 struct de_inthashtable_item *next; // Next item in linked list
1675 struct de_inthashtable_bucket {
1676 struct de_inthashtable_item *first_item;
1679 struct de_inthashtable {
1680 struct de_inthashtable_bucket buckets[DE_INTHASHTABLE_NBUCKETS];
1683 static struct de_inthashtable_bucket *inthashtable_find_bucket(struct de_inthashtable *ht,
1684 i64 key)
1686 i64 bkt_num;
1688 if(key>=0) bkt_num = key%DE_INTHASHTABLE_NBUCKETS;
1689 else bkt_num = (-key)%DE_INTHASHTABLE_NBUCKETS;
1691 return &ht->buckets[bkt_num];
1694 struct de_inthashtable *de_inthashtable_create(deark *c)
1696 return de_mallocarray(c, DE_INTHASHTABLE_NBUCKETS, sizeof(struct de_inthashtable));
1699 static void inthashtable_destroy_item(deark *c, struct de_inthashtable_item *item)
1701 de_free(c, item);
1704 static void inthashtable_destroy_items_in_bucket(deark *c, struct de_inthashtable_bucket *bkt)
1706 struct de_inthashtable_item *next_item;
1708 while(bkt->first_item) {
1709 next_item = bkt->first_item->next;
1710 inthashtable_destroy_item(c, bkt->first_item);
1711 bkt->first_item = next_item;
1715 void de_inthashtable_destroy(deark *c, struct de_inthashtable *ht)
1717 i64 i;
1719 if(!ht) return;
1720 for(i=0; i<DE_INTHASHTABLE_NBUCKETS; i++) {
1721 if(ht->buckets[i].first_item)
1722 inthashtable_destroy_items_in_bucket(c, &ht->buckets[i]);
1724 de_free(c, ht);
1727 // Returns NULL if item does not exist in the given bucket
1728 static struct de_inthashtable_item *inthashtable_find_item_in_bucket(struct de_inthashtable *ht,
1729 struct de_inthashtable_bucket *bkt, i64 key)
1731 struct de_inthashtable_item *p;
1733 p = bkt->first_item;
1734 while(p && (p->key != key)) {
1735 p = p->next;
1737 return p;
1740 // Returns NULL if item does not exist
1741 static struct de_inthashtable_item *inthashtable_find_item(struct de_inthashtable *ht, i64 key)
1743 struct de_inthashtable_bucket *bkt;
1745 if(!ht) return NULL;
1746 bkt = inthashtable_find_bucket(ht, key);
1747 return inthashtable_find_item_in_bucket(ht, bkt, key);
1750 // If key does not exist, sets *pvalue to NULL and returns 0.
1751 int de_inthashtable_get_item(deark *c, struct de_inthashtable *ht, i64 key, void **pvalue)
1753 struct de_inthashtable_item *item;
1755 item = inthashtable_find_item(ht, key);
1756 if(item) {
1757 *pvalue = item->value;
1758 return 1;
1760 *pvalue = NULL;
1761 return 0;
1764 int de_inthashtable_item_exists(deark *c, struct de_inthashtable *ht, i64 key)
1766 return (inthashtable_find_item(ht, key) != NULL);
1769 // Unconditionally adds an item to the given bucket (does not prevent duplicates)
1770 static void inthashtable_add_item_to_bucket(struct de_inthashtable *ht,
1771 struct de_inthashtable_bucket *bkt, struct de_inthashtable_item *new_item)
1773 new_item->next = bkt->first_item;
1774 bkt->first_item = new_item;
1777 // Returns 1 if the key has been newly-added,
1778 // or 0 if the key already existed.
1779 int de_inthashtable_add_item(deark *c, struct de_inthashtable *ht, i64 key, void *value)
1781 struct de_inthashtable_bucket *bkt;
1782 struct de_inthashtable_item *new_item;
1784 bkt = inthashtable_find_bucket(ht, key);
1785 if(inthashtable_find_item_in_bucket(ht, bkt, key)) {
1786 // Item already exist. Don't add it again.
1787 // TODO: This may eventually need to be changed to modify the existing item,
1788 // or delete-then-add the new item, instead of doing nothing.
1789 return 0;
1792 new_item = de_malloc(c, sizeof(struct de_inthashtable_item));
1793 new_item->key = key;
1794 new_item->value = value;
1795 inthashtable_add_item_to_bucket(ht, bkt, new_item);
1796 return 1;
1799 int de_inthashtable_remove_item(deark *c, struct de_inthashtable *ht, i64 key, void **pvalue)
1801 // TODO
1802 return 0;
1805 // Select one item arbitrarily, return its key and value, and delete it from the
1806 // hashtable.
1807 int de_inthashtable_remove_any_item(deark *c, struct de_inthashtable *ht, i64 *pkey, void **pvalue)
1809 i64 i;
1811 for(i=0; i<DE_INTHASHTABLE_NBUCKETS; i++) {
1812 struct de_inthashtable_item *item;
1814 item = ht->buckets[i].first_item;
1815 if(!item) continue;
1817 // Found an item. Copy it, for the caller.
1818 if(pkey) *pkey = item->key;
1819 if(pvalue) *pvalue = item->value;
1821 // Delete our copy of it.
1822 ht->buckets[i].first_item = item->next;
1823 inthashtable_destroy_item(c, item);
1824 return 1;
1827 // No items in hashtable.
1828 if(pkey) *pkey = 0;
1829 if(pvalue) *pvalue = NULL;
1830 return 0;
1833 // crcobj: Functions for performing CRC calculations, and other checksum-like
1834 // functions for which the result can fit in a 32-bit int.
1836 struct de_crcobj {
1837 u32 val;
1838 unsigned int crctype;
1839 deark *c;
1840 u16 *table16;
1843 #define DE_CRC32_INIT 0
1845 // crc32_calc() is based on public domain code by Jon Mayo, downloaded
1846 // from <http://orangetide.com/code/crc.c>.
1847 // It includes minor changes for Deark. I disclaim any copyright on these
1848 // minor changes. -JS
1849 // Note: I have found several other seemingly-independent implementations
1850 // of the same algorithm, such as the one by Karl Malbrain, used in miniz.
1851 // I don't know its origin.
1852 static u32 crc32_calc(const u8 *ptr, size_t cnt, u32 crc)
1854 static const u32 crc32_tab[16] = {
1855 0x00000000U, 0x1db71064U, 0x3b6e20c8U, 0x26d930acU,
1856 0x76dc4190U, 0x6b6b51f4U, 0x4db26158U, 0x5005713cU,
1857 0xedb88320U, 0xf00f9344U, 0xd6d6a3e8U, 0xcb61b38cU,
1858 0x9b64c2b0U, 0x86d3d2d4U, 0xa00ae278U, 0xbdbdf21cU
1861 if(cnt==0) return crc;
1862 crc = ~crc;
1863 while(cnt--) {
1864 crc = (crc >> 4) ^ crc32_tab[(crc & 0xf) ^ (*ptr & 0xf)];
1865 crc = (crc >> 4) ^ crc32_tab[(crc & 0xf) ^ (*ptr++ >> 4)];
1867 return ~crc;
1870 // For a one-shot CRC calculations, or the first part of a multi-part
1871 // calculation.
1872 // buf can be NULL (in which case buf_len should be 0, but is ignored)
1873 static u32 de_crc32(const void *buf, i64 buf_len)
1875 if(!buf) return DE_CRC32_INIT;
1876 return (u32)crc32_calc((const u8*)buf, (size_t)buf_len, DE_CRC32_INIT);
1879 static u32 de_crc32_continue(u32 prev_crc, const void *buf, i64 buf_len)
1881 return (u32)crc32_calc((const u8*)buf, (size_t)buf_len, prev_crc);
1884 static void adler32_continue(struct de_crcobj *crco, const u8 *buf, i64 buf_len)
1886 u32 s1 = crco->val & 0xffff;
1887 u32 s2 = (crco->val >> 16) & 0xffff;
1888 i64 i;
1890 for(i = 0; i<buf_len; i++) {
1891 s1 = (s1 + buf[i]) % 65521;
1892 s2 = (s2 + s1) % 65521;
1894 crco->val = (s2 << 16) + s1;
1897 // This is the CRC-16 algorithm used in MacBinary.
1898 // It is in the x^16 + x^12 + x^5 + 1 family.
1899 // CRC-16-CCITT is probably the best name for it, though I'm not completely
1900 // sure, and there are several algorithms that have been called "CRC-16-CCITT".
1901 static void de_crc16ccitt_init(struct de_crcobj *crco)
1903 const unsigned int polynomial = 0x1021;
1904 unsigned int index;
1906 crco->table16 = de_mallocarray(crco->c, 256, sizeof(u16));
1907 crco->table16[0] = 0;
1908 for(index=0; index<128; index++) {
1909 unsigned int carry = crco->table16[index] & 0x8000;
1910 unsigned int temp = (crco->table16[index] << 1) & 0xffff;
1911 crco->table16[index * 2 + (carry ? 0 : 1)] = temp ^ polynomial;
1912 crco->table16[index * 2 + (carry ? 1 : 0)] = temp;
1916 static void de_crc16ccitt_continue(struct de_crcobj *crco, const u8 *buf, i64 buf_len)
1918 i64 k;
1920 if(!crco->table16) return;
1921 for(k=0; k<buf_len; k++) {
1922 crco->val = ((crco->val<<8)&0xffff) ^
1923 (u32)crco->table16[((crco->val>>8) ^ (u32)buf[k]) & 0xff];
1927 // This is the CRC-16 algorithm used in ARC, LHA, ZOO, etc.
1928 // It is in the x^16 + x^15 + x^2 + 1 family.
1929 // It's some variant of CRC-16-IBM, and sometimes simply called "CRC-16". But
1930 // both these names are more ambiguous than I'd like, so I'm calling it "ARC".
1931 static void de_crc16arc_init(struct de_crcobj *crco)
1933 u32 i, k;
1935 crco->table16 = de_mallocarray(crco->c, 256, sizeof(u16));
1936 for(i=0; i<256; i++) {
1937 crco->table16[i] = i;
1938 for(k=0; k<8; k++)
1939 crco->table16[i] = (crco->table16[i]>>1) ^ ((crco->table16[i] & 1) ? 0xa001 : 0);
1943 static void de_crc16arc_continue(struct de_crcobj *crco, const u8 *buf, i64 buf_len)
1945 i64 k;
1947 if(!crco->table16) return;
1948 for(k=0; k<buf_len; k++) {
1949 crco->val = ((crco->val>>8) ^
1950 (u32)crco->table16[(crco->val ^ buf[k]) & 0xff]);
1954 // Allocates, initializes, and resets a new object.
1955 struct de_crcobj *de_crcobj_create(deark *c, UI type_and_flags)
1957 struct de_crcobj *crco;
1959 crco = de_malloc(c, sizeof(struct de_crcobj));
1960 crco->c = c;
1961 crco->crctype = type_and_flags;
1963 switch(crco->crctype) {
1964 case DE_CRCOBJ_CRC16_CCITT:
1965 de_crc16ccitt_init(crco);
1966 break;
1967 case DE_CRCOBJ_CRC16_ARC:
1968 de_crc16arc_init(crco);
1969 break;
1972 de_crcobj_reset(crco);
1973 return crco;
1976 void de_crcobj_destroy(struct de_crcobj *crco)
1978 deark *c;
1980 if(!crco) return;
1981 c = crco->c;
1982 de_free(c, crco->table16);
1983 de_free(c, crco);
1986 void de_crcobj_reset(struct de_crcobj *crco)
1988 crco->val = 0;
1990 switch(crco->crctype) {
1991 case DE_CRCOBJ_CRC32_IEEE:
1992 crco->val = de_crc32(NULL, 0);
1993 break;
1994 case DE_CRCOBJ_ADLER32:
1995 crco->val = 1;
1996 break;
2000 u32 de_crcobj_getval(struct de_crcobj *crco)
2002 return crco->val;
2005 void de_crcobj_addbuf(struct de_crcobj *crco, const u8 *buf, i64 buf_len)
2007 if(buf_len<1) return;
2009 switch(crco->crctype) {
2010 case DE_CRCOBJ_CRC32_IEEE:
2011 crco->val = de_crc32_continue(crco->val, buf, buf_len);
2012 break;
2013 case DE_CRCOBJ_CRC16_CCITT:
2014 de_crc16ccitt_continue(crco, buf, buf_len);
2015 break;
2016 case DE_CRCOBJ_CRC16_ARC:
2017 de_crc16arc_continue(crco, buf, buf_len);
2018 break;
2019 case DE_CRCOBJ_ADLER32:
2020 adler32_continue(crco, buf, buf_len);
2021 break;
2025 void de_crcobj_addzeroes(struct de_crcobj *crco, i64 len)
2027 i64 i;
2028 const u8 z = 0;
2030 for(i=0; i<len; i++) {
2031 de_crcobj_addbuf(crco, &z, 1);
2035 static int addslice_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
2036 i64 buf_len)
2038 de_crcobj_addbuf((struct de_crcobj*)brctx->userdata, buf, buf_len);
2039 return 1;
2042 void de_crcobj_addslice(struct de_crcobj *crco, dbuf *f, i64 pos, i64 len)
2044 dbuf_buffered_read(f, pos, len, addslice_cbfn, (void*)crco);
2047 void de_get_reproducible_timestamp(deark *c, struct de_timestamp *ts)
2049 if(c->reproducible_timestamp.is_valid) {
2050 *ts = c->reproducible_timestamp;
2051 return;
2054 // An arbitrary timestamp
2055 // $ date -u --date='2010-09-08 07:06:05' '+%s'
2056 de_unix_time_to_timestamp(1283929565LL, ts, 0x1);
2059 // Call this to ensure that a zip/tar file will be created, even if it has
2060 // no member files.
2061 int de_archive_initialize(deark *c)
2063 if(c->output_style!=DE_OUTPUTSTYLE_ARCHIVE) return 0;
2064 switch(c->archive_fmt) {
2065 case DE_ARCHIVEFMT_ZIP:
2066 return de_zip_create_file(c);
2067 case DE_ARCHIVEFMT_TAR:
2068 return de_tar_create_file(c);
2070 return 0;