1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // deark-util.c: Most of the main library functions
7 #define DE_NOT_IN_MODULE
8 #include "deark-config.h"
9 #include "deark-private.h"
10 #include "deark-version.h"
12 #define DE_MAX_SUBMODULE_NESTING_LEVEL 10
14 char *de_get_version_string(char *buf
, size_t bufsize
)
18 if((DE_VERSION_NUMBER
&0x000000ffU
) == 0)
19 de_strlcpy(extver
, "", sizeof(extver
));
21 de_snprintf(extver
, sizeof(extver
), "-%u", DE_VERSION_NUMBER
&0x000000ff);
23 de_snprintf(buf
, bufsize
, "%u.%u.%u%s%s",
24 (DE_VERSION_NUMBER
&0xff000000U
)>>24,
25 (DE_VERSION_NUMBER
&0x00ff0000U
)>>16,
26 (DE_VERSION_NUMBER
&0x0000ff00U
)>>8,
27 extver
, DE_VERSION_SUFFIX
);
32 unsigned int de_get_version_int(void)
34 return DE_VERSION_NUMBER
;
37 void de_strlcpy(char *dst
, const char *src
, size_t dstlen
)
41 if(n
>dstlen
-1) n
=dstlen
-1;
42 de_memcpy(dst
, src
, n
);
46 // Compare two ASCII strings, as if all letters were lowercase.
47 // (Library functions like strcasecmp or _stricmp usually exist, but we roll
48 // our own for portability, and consistent behavior.)
49 static int de_strcasecmp_internal(const char *a
, const char *b
,
57 if(has_n
&& (k
>=n
)) break;
58 a1
= (unsigned char)a
[k
];
59 b1
= (unsigned char)b
[k
];
60 if(a1
==0 && b1
==0) break;
61 if(a1
>='A' && a1
<='Z') a1
+= 32;
62 if(b1
>='A' && b1
<='Z') b1
+= 32;
70 int de_strcasecmp(const char *a
, const char *b
)
72 return de_strcasecmp_internal(a
, b
, 0, 0);
75 int de_strncasecmp(const char *a
, const char *b
, size_t n
)
77 return de_strcasecmp_internal(a
, b
, 1, n
);
80 // A wrapper for strchr().
81 char *de_strchr(const char *s
, int c
)
87 void de_snprintf(char *buf
, size_t buflen
, const char *fmt
, ...)
91 de_vsnprintf(buf
,buflen
,fmt
,ap
);
95 static void de_puts_advanced(deark
*c
, unsigned int flags
, const char *s
)
102 unsigned int special_code
;
105 s_len
= de_strlen(s
);
106 tmps
= de_malloc(c
, (i64
)s_len
+1);
108 // Search for characters that enable/disable highlighting,
109 // and split the string at them.
110 while(s_pos
< s_len
) {
111 if(s
[s_pos
]=='\x01' || s
[s_pos
]=='\x02' || s
[s_pos
]=='\x03') {
112 // Found a special code
114 if(s
[s_pos
]=='\x02' && s
[s_pos
+1]=='\x01' && hlmode
) {
115 // Optimization: UNHL followed immediately by HL is a no-op.
118 else if(s
[s_pos
]=='\x01') {
119 special_code
= DE_MSGCODE_HL
;
122 else if(s
[s_pos
]=='\x03') {
123 special_code
= DE_MSGCODE_RGBSAMPLE
;
124 if(s_pos
+ 7 <= s_len
) {
125 param1
= DE_MAKE_RGB(
126 ((s
[s_pos
+1]&0x0f)<<4) | (s
[s_pos
+2]&0x0f),
127 ((s
[s_pos
+3]&0x0f)<<4) | (s
[s_pos
+4]&0x0f),
128 ((s
[s_pos
+5]&0x0f)<<4) | (s
[s_pos
+6]&0x0f));
132 special_code
= DE_MSGCODE_UNHL
;
136 // Print what we have of the string before the special code
138 tmps
[tmps_pos
] = '\0';
139 c
->msgfn(c
, flags
, tmps
);
143 // "Print" the special code
144 if(special_code
&& c
->specialmsgfn
) {
145 c
->specialmsgfn(c
, flags
, special_code
, param1
);
148 // Advance past the special code
151 else if(special_code
==DE_MSGCODE_RGBSAMPLE
)
157 tmps
[tmps_pos
++] = s
[s_pos
++];
161 // Unset highlight, if it somehow got left on.
162 if(hlmode
&& c
->specialmsgfn
) {
163 c
->specialmsgfn(c
, flags
, DE_MSGCODE_UNHL
, 0);
166 tmps
[tmps_pos
] = '\0';
167 c
->msgfn(c
, flags
, tmps
);
171 void de_puts(deark
*c
, unsigned int flags
, const char *s
)
175 if(!c
|| !c
->msgfn
) {
180 // Scan the printable string for "magic" byte sequences that represent
181 // text color changes, etc. It's admittedly a little ugly that we have to
184 // We could invent and use any byte sequences we want for this, as long as
185 // they will not otherwise occur in "printable" output.
186 // I.e., if it's valid UTF-8, it must contain a character we classify as
187 // "nonprintable". We could even use actual ANSI escape sequences, since
188 // Esc is a nonprintable character (but that would have little benefit,
189 // and feel kinda wrong, since this part of the code isn't supposed to
190 // know about ANSI escape sequences).
191 // Short sequences are preferable, because they're simpler to detect, and
192 // because these bytes count against some of our size limits.
193 // Valid UTF-8 is probably best, because someday we might want this scheme
194 // to be compatible with something else (such as ucstrings).
195 // So, we're simply using:
196 // U+0001 : DE_CODEPOINT_HL
197 // U+0002 : DE_CODEPOINT_UNHL
198 // U+0003 : DE_CODEPOINT_RGBSAMPLE (followed by 6 bytes for the RGB color)
200 for(k
=0; s
[k
]; k
++) {
201 if(s
[k
]=='\x01' || s
[k
]=='\x02' || s
[k
]=='\x03') {
202 de_puts_advanced(c
, flags
, s
);
207 c
->msgfn(c
, flags
, s
);
210 static void de_vprintf(deark
*c
, unsigned int flags
, const char *fmt
, va_list ap
)
214 de_vsnprintf(buf
, sizeof(buf
), fmt
, ap
);
215 de_puts(c
, flags
, buf
);
218 void de_printf(deark
*c
, unsigned int flags
, const char *fmt
, ...)
223 de_vprintf(c
, flags
, fmt
, ap
);
227 static void de_vdbg_internal(deark
*c
, const char *fmt
, va_list ap
)
229 char bars_and_spaces
[128];
233 const char *dprefix
= "DEBUG: ";
236 if(c
->dprefix
) dprefix
= c
->dprefix
;
238 nbars
= c
->module_nesting_level
- 1;
239 if(nbars
>10) nbars
=10;
241 nspaces
= c
->dbg_indent_amount
;
242 if(nspaces
>50) nspaces
=50;
251 // One or more vertical lines, to indicate module nesting
252 bars_and_spaces
[bpos
++] = '\xe2'; // U+2502 Box drawings light vertical
253 bars_and_spaces
[bpos
++] = '\x94';
254 bars_and_spaces
[bpos
++] = '\x82';
258 bars_and_spaces
[bpos
++] = ' ';
261 bars_and_spaces
[bpos
] = '\0';
263 de_printf(c
, DE_MSGTYPE_DEBUG
, "%s%s", dprefix
, bars_and_spaces
);
264 de_vprintf(c
, DE_MSGTYPE_DEBUG
, fmt
, ap
);
265 de_puts(c
, DE_MSGTYPE_DEBUG
, "\n");
268 void de_dbg(deark
*c
, const char *fmt
, ...)
272 if(c
&& c
->debug_level
<1) return;
274 de_vdbg_internal(c
, fmt
, ap
);
278 void de_dbg2(deark
*c
, const char *fmt
, ...)
282 if(c
&& c
->debug_level
<2) return;
284 de_vdbg_internal(c
, fmt
, ap
);
288 void de_dbg3(deark
*c
, const char *fmt
, ...)
292 if(c
&& c
->debug_level
<3) return;
294 de_vdbg_internal(c
, fmt
, ap
);
298 void de_dbgx(deark
*c
, int lv
, const char *fmt
, ...)
302 if(c
&& c
->debug_level
<lv
) return;
304 de_vdbg_internal(c
, fmt
, ap
);
308 void de_dbg_indent(deark
*c
, int n
)
310 c
->dbg_indent_amount
+= n
;
313 void de_dbg_indent_save(deark
*c
, int *saved_indent_level
)
315 *saved_indent_level
= c
->dbg_indent_amount
;
318 void de_dbg_indent_restore(deark
*c
, int saved_indent_level
)
320 c
->dbg_indent_amount
= saved_indent_level
;
323 static int get_ndigits_for_offset(i64 n
)
329 else if(n
<1000) nd
=3;
330 else if(n
<10000) nd
=4;
336 typedef void (*hexdump_printline_fn
)(deark
*c
, struct hexdump_ctx
*hctx
);
339 // same for each row:
341 const char *prefix_sep
; // ":"
343 hexdump_printline_fn printlinefn
;
344 char offset_fmtstr
[32];
348 i64 bytesthisrow
; // num bytes used in .rowbuf
353 static void do_hexdump_row(deark
*c
, struct hexdump_ctx
*hctx
)
355 char offset_formatted
[32];
356 char linebuf
[3*16+32];
364 asciibuf
[asciibufpos
++] = '\"';
365 for(k
=0; k
<hctx
->bytesthisrow
; k
++) {
368 linebuf
[linebufpos
++] = de_get_hexchar(b
/16);
369 linebuf
[linebufpos
++] = de_get_hexchar(b
%16);
370 linebuf
[linebufpos
++] = ' ';
371 if(b
>=32 && b
<=126) {
372 asciibuf
[asciibufpos
++] = (char)b
;
375 asciibuf
[asciibufpos
++] = '\x01'; // DE_CODEPOINT_HL
376 asciibuf
[asciibufpos
++] = '.';
377 // We'll often turn off highlighting only to turn it back on
378 // again for the next character. The OFF+ON sequences will be
379 // optimized out later, though, so there's no reason to worry
381 asciibuf
[asciibufpos
++] = '\x02'; // DE_CODEPOINT_UNHL
385 // Pad and terminate the hex values
386 while(linebufpos
<48) {
387 linebuf
[linebufpos
++] = ' ';
389 linebuf
[linebufpos
] = '\0';
391 // Terminate or erase the ASCII representation
392 if(hctx
->flags
&0x1) {
393 asciibuf
[asciibufpos
++] = '\"';
394 asciibuf
[asciibufpos
++] = '\0';
400 // Careful: With a variable format string, the compiler won't be able to
402 de_snprintf(offset_formatted
, sizeof(offset_formatted
), hctx
->offset_fmtstr
,
403 (i64
)hctx
->row_offset
);
405 de_snprintf(hctx
->outbuf_sz
, sizeof(hctx
->outbuf_sz
), "%s%s%s: %s%s",
406 hctx
->prefix
, hctx
->prefix_sep
, offset_formatted
, linebuf
, asciibuf
);
407 hctx
->printlinefn(c
, hctx
);
410 // If prefix is NULL, a default will be used.
412 // 0x1 = Include an ASCII representation
413 static void de_hexdump_internal(deark
*c
, struct hexdump_ctx
*hctx
,
415 i64 nbytes_avail
, i64 max_nbytes_to_dump
)
419 int ndigits_for_offset
;
420 int was_truncated
= 0;
422 if(hctx
->flags
& 0x2) {
423 // Don't print a prefix
425 hctx
->prefix_sep
= "";
428 hctx
->prefix_sep
= ":";
431 if(nbytes_avail
> max_nbytes_to_dump
) {
432 len
= max_nbytes_to_dump
;
439 // Construct a format string to use for byte offsets.
441 // If we're truncating, the highest offset we'll print is the number
442 // of data bytes that we'll dump.
443 ndigits_for_offset
= get_ndigits_for_offset(len
);
448 // If we're not truncating, the highest offset we'll print is the
449 // highest byte offset that is a multiple of 16.
450 ndigits_for_offset
= get_ndigits_for_offset(((len
-1)/16)*16);
452 de_snprintf(hctx
->offset_fmtstr
, sizeof(hctx
->offset_fmtstr
), "%%%d"I64_FMT
, ndigits_for_offset
);
454 while(1) { // For each row...
455 if(pos
>= pos1
+len
) break;
457 hctx
->row_offset
= pos
-pos1
;
459 hctx
->bytesthisrow
= (pos1
+len
)-pos
;
460 if(hctx
->bytesthisrow
>16) hctx
->bytesthisrow
=16;
462 dbuf_read(f
, hctx
->rowbuf
, pos
, hctx
->bytesthisrow
);
464 do_hexdump_row(c
, hctx
);
466 pos
+= hctx
->bytesthisrow
;
469 de_snprintf(hctx
->outbuf_sz
, sizeof(hctx
->outbuf_sz
),
470 "%s%s%"I64_FMT
": ...", hctx
->prefix
, hctx
->prefix_sep
, len
);
471 hctx
->printlinefn(c
, hctx
);
475 static void hexdump_printline_dbg(deark
*c
, struct hexdump_ctx
*hctx
)
477 de_dbg(c
, "%s", hctx
->outbuf_sz
);
480 // If prefix is NULL (and the no_prefix flag is not set), a default will be used.
482 // 0x1 = Include an ASCII representation
484 void de_dbg_hexdump(deark
*c
, dbuf
*f
, i64 pos1
,
485 i64 nbytes_avail
, i64 max_nbytes_to_dump
,
486 const char *prefix1
, unsigned int flags
)
488 struct hexdump_ctx hctx
;
491 hctx
.prefix
= (prefix1
) ? prefix1
: "data";
492 hctx
.printlinefn
= hexdump_printline_dbg
;
494 de_hexdump_internal(c
, &hctx
, f
, pos1
, nbytes_avail
, max_nbytes_to_dump
);
497 static void hexdump_printline_ext(deark
*c
, struct hexdump_ctx
*hctx
)
499 de_printf(c
, DE_MSGTYPE_MESSAGE
, "%s\n", hctx
->outbuf_sz
);
502 // Print a hexdump in the style of the "hexdump" module.
503 void de_hexdump2(deark
*c
, dbuf
*f
, i64 pos1
, i64 nbytes_avail
,
504 i64 max_nbytes_to_dump
, unsigned int flags
)
506 struct hexdump_ctx hctx
;
508 hctx
.flags
= flags
| 0x2;
510 hctx
.printlinefn
= hexdump_printline_ext
;
511 de_hexdump_internal(c
, &hctx
, f
, pos1
, nbytes_avail
, max_nbytes_to_dump
);
514 // This is such a common thing to do, that it's worth having a function for it.
515 void de_dbg_dimensions(deark
*c
, i64 w
, i64 h
)
517 de_dbg(c
, "dimensions: %"I64_FMT DE_CHAR_TIMES
"%"I64_FMT
, w
, h
);
520 // Generates a "magic" code that, when included in the debug output, will
521 // (in some circumstances) display a small sample of the given color.
522 // Caller supplies csamp[16].
523 // Returns a pointer to csamp, for convenience.
524 char *de_get_colorsample_code(deark
*c
, de_color clr
, char *csamp
,
527 unsigned int r
, g
, b
;
534 r
= (unsigned int)DE_COLOR_R(clr
);
535 g
= (unsigned int)DE_COLOR_G(clr
);
536 b
= (unsigned int)DE_COLOR_B(clr
);
538 // Only the low 4 bits are significant. We add 16 so that the bits can't
539 // all be 0; since we can't have NUL bytes in this NUL-terminated string.
540 // Also, it's nice if the values are all <= 127, to make them UTF-8
542 csamp
[0] = '\x03'; // refer to DE_CODEPOINT_RGBSAMPLE
543 csamp
[1] = 16 + (r
>>4)%16;
544 csamp
[2] = 16 + r
%16;
545 csamp
[3] = 16 + (g
>>4)%16;
546 csamp
[4] = 16 + g
%16;
547 csamp
[5] = 16 + (b
>>4)%16;
548 csamp
[6] = 16 + b
%16;
553 // Print debugging output for an 8-bit RGB palette entry.
554 void de_dbg_pal_entry2(deark
*c
, i64 idx
, de_color clr
,
555 const char *txt_before
, const char *txt_in
, const char *txt_after
)
561 if(c
->debug_level
<2) return;
562 if(!txt_before
) txt_before
="";
563 if(!txt_in
) txt_in
="";
564 if(!txt_after
) txt_after
="";
565 r
= (int)DE_COLOR_R(clr
);
566 g
= (int)DE_COLOR_G(clr
);
567 b
= (int)DE_COLOR_B(clr
);
568 a
= (int)DE_COLOR_A(clr
);
570 de_snprintf(astr
, sizeof(astr
), ",A=%d", a
);
576 de_get_colorsample_code(c
, clr
, csamp
, sizeof(csamp
));
577 de_dbg2(c
, "pal[%3d] = %s(%3d,%3d,%3d%s%s)%s%s", (int)idx
, txt_before
,
578 r
, g
, b
, astr
, txt_in
, csamp
, txt_after
);
581 void de_dbg_pal_entry(deark
*c
, i64 idx
, de_color clr
)
583 if(c
->debug_level
<2) return;
584 de_dbg_pal_entry2(c
, idx
, clr
, NULL
, NULL
, NULL
);
587 void de_verr(deark
*c
, const char *fmt
, va_list ap
)
593 de_puts(c
, DE_MSGTYPE_ERROR
, "Error: ");
594 de_vprintf(c
, DE_MSGTYPE_ERROR
, fmt
, ap
);
595 de_puts(c
, DE_MSGTYPE_ERROR
, "\n");
599 void de_err(deark
*c
, const char *fmt
, ...)
608 void de_vwarn(deark
*c
, const char *fmt
, va_list ap
)
610 if(!c
->show_warnings
) return;
611 de_puts(c
, DE_MSGTYPE_WARNING
, "Warning: ");
612 de_vprintf(c
, DE_MSGTYPE_WARNING
, fmt
, ap
);
613 de_puts(c
, DE_MSGTYPE_WARNING
, "\n");
616 void de_warn(deark
*c
, const char *fmt
, ...)
620 if(!c
->show_warnings
) return;
622 de_vwarn(c
, fmt
, ap
);
626 // For "informational" messages: Those that will be suppressed by -noinfo.
627 void de_info(deark
*c
, const char *fmt
, ...)
631 if(!c
->show_infomessages
) return;
633 de_vprintf(c
, DE_MSGTYPE_MESSAGE
, fmt
, ap
);
635 de_puts(c
, DE_MSGTYPE_MESSAGE
, "\n");
638 // For "payload" messages, that won't be suppressed by options like -q.
639 // (Note that there is nothing wrong with using de_printf or de_puts instead of
641 void de_msg(deark
*c
, const char *fmt
, ...)
646 de_vprintf(c
, DE_MSGTYPE_MESSAGE
, fmt
, ap
);
648 de_puts(c
, DE_MSGTYPE_MESSAGE
, "\n");
652 void de_fatalerror(deark
*c
)
654 if(c
&& c
->fatalerrorfn
) {
660 void de_internal_err_fatal(deark
*c
, const char *fmt
, ...)
664 de_puts(c
, DE_MSGTYPE_ERROR
, "Internal error: ");
666 de_vprintf(c
, DE_MSGTYPE_ERROR
, fmt
, ap
);
668 de_puts(c
, DE_MSGTYPE_ERROR
, "\n");
672 void de_internal_err_nonfatal(deark
*c
, const char *fmt
, ...)
678 de_vsnprintf(buf
, sizeof(buf
), fmt
, ap
);
680 de_err(c
, "Internal: %s", buf
);
683 // TODO: Make de_malloc use de_mallocarray internally, instead of vice versa.
684 void *de_mallocarray(deark
*c
, i64 nmemb
, size_t membsize
)
686 if(nmemb
>500000000 || nmemb
<0 || membsize
>500000000) {
687 de_err(c
, "Out of memory");
692 return de_malloc(c
, nmemb
*(i64
)membsize
);
695 // Memory returned is always zeroed.
697 // Always succeeds; never returns NULL.
698 void *de_malloc(deark
*c
, i64 n
)
702 if(n
<0 || n
>500000000) {
703 de_err(c
, "Out of memory (%d bytes requested)",(int)n
);
708 m
= calloc((size_t)n
,1);
710 de_err(c
, "Memory allocation failed (%d bytes)",(int)n
);
717 // TODO: Make de_realloc use de_reallocarray internally, instead of vice versa.
718 void *de_reallocarray(deark
*c
, void *m
, i64 oldnmemb
, size_t membsize
,
722 if(newnmemb
>500000000 || newnmemb
<0 || oldnmemb
<0 || membsize
>500000000) {
723 de_err(c
, "Out of memory");
728 return de_realloc(c
, m
,
729 oldnmemb
*(i64
)membsize
,
730 newnmemb
*(i64
)membsize
);
733 // If you know oldsize, you can provide it, and newly-allocated bytes will be zeroed.
734 // Otherwise, set oldsize==newsize, and newly-allocated bytes won't be zeroed.
735 // If oldmem is NULL, this behaves the same as de_malloc, and all bytes are zeroed.
736 void *de_realloc(deark
*c
, void *oldmem
, i64 oldsize
, i64 newsize
)
741 return de_malloc(c
, newsize
);
744 newmem
= realloc(oldmem
, (size_t)newsize
);
746 de_err(c
, "Memory reallocation failed (%d bytes)",(int)newsize
);
752 if(oldsize
<newsize
) {
753 // zero out any newly-allocated bytes
754 de_zeromem(&((u8
*)newmem
)[oldsize
], (size_t)(newsize
-oldsize
));
760 void de_free(deark
*c
, void *m
)
765 // Returns the index into c->module_info[], or -1 if no found.
766 int de_get_module_idx_by_id(deark
*c
, const char *module_id
)
771 if(!module_id
) return -1;
773 for(i
=0; i
<c
->num_modules
; i
++) {
774 if(!de_strcmp(c
->module_info
[i
].id
, module_id
)) {
777 for(k
=0; k
<DE_MAX_MODULE_ALIASES
; k
++) {
778 if(!c
->module_info
[i
].id_alias
[k
]) break;
779 if(!de_strcmp(c
->module_info
[i
].id_alias
[k
], module_id
)) {
787 struct deark_module_info
*de_get_module_by_id(deark
*c
, const char *module_id
)
791 idx
= de_get_module_idx_by_id(c
, module_id
);
792 if(idx
<0) return NULL
;
793 return &c
->module_info
[idx
];
796 int de_run_module(deark
*c
, struct deark_module_info
*mi
, de_module_params
*mparams
,
797 enum de_moddisp_enum moddisp
)
799 enum de_moddisp_enum old_moddisp
;
800 struct de_detection_data_struct
*old_detection_data
;
803 if(!mi
->run_fn
) return 0;
804 // Note that c->module_nesting_level is 0 when we are not in a module,
805 // 1 when in the top-level module, 2 for a first-level submodule, etc.
806 if(c
->module_nesting_level
>= 1+DE_MAX_SUBMODULE_NESTING_LEVEL
) {
807 de_err(c
, "Max module nesting level exceeded");
811 old_moddisp
= c
->module_disposition
;
812 c
->module_disposition
= moddisp
;
814 old_detection_data
= c
->detection_data
;
815 if(c
->module_nesting_level
> 0) {
816 c
->detection_data
= NULL
;
819 if(c
->module_nesting_level
>0 && c
->debug_level
>=3) {
820 de_dbg3(c
, "[using %s module]", mi
->id
);
822 c
->module_nesting_level
++;
823 mi
->run_fn(c
, mparams
);
824 c
->module_nesting_level
--;
825 c
->module_disposition
= old_moddisp
;
826 c
->detection_data
= old_detection_data
;
830 int de_run_module_by_id(deark
*c
, const char *id
, de_module_params
*mparams
)
832 struct deark_module_info
*module_to_use
;
834 module_to_use
= de_get_module_by_id(c
, id
);
836 de_err(c
, "Unknown or unsupported format \"%s\"", id
);
840 return de_run_module(c
, module_to_use
, mparams
, DE_MODDISP_INTERNAL
);
843 int de_run_module_by_id_on_slice(deark
*c
, const char *id
, de_module_params
*mparams
,
844 dbuf
*f
, i64 pos
, i64 len
)
849 old_ifile
= c
->infile
;
851 if(pos
==0 && len
==f
->len
) {
852 // Optimization: We don't need a subfile in this case
854 ret
= de_run_module_by_id(c
, id
, mparams
);
857 c
->infile
= dbuf_open_input_subfile(f
, pos
, len
);
858 ret
= de_run_module_by_id(c
, id
, mparams
);
859 dbuf_close(c
->infile
);
862 c
->infile
= old_ifile
;
866 // Same as de_run_module_by_id_on_slice(), but takes just ->codes
867 // as a parameter, instead of a full de_module_params struct.
868 int de_run_module_by_id_on_slice2(deark
*c
, const char *id
, const char *codes
,
869 dbuf
*f
, i64 pos
, i64 len
)
871 de_module_params
*mparams
= NULL
;
874 mparams
= de_malloc(c
, sizeof(de_module_params
));
875 mparams
->in_params
.codes
= codes
;
876 ret
= de_run_module_by_id_on_slice(c
, id
, mparams
, f
, pos
, len
);
881 const char *de_get_ext_option(deark
*c
, const char *name
)
885 for(i
=0; i
<c
->num_ext_options
; i
++) {
886 if(!de_strcmp(c
->ext_option
[i
].name
, name
)) {
887 return c
->ext_option
[i
].val
;
890 return NULL
; // Option name not found.
894 // 0 if false, ("0", "n...", "f...", etc.)
895 // 1 if true (empty value, "1", "y...", "t...", etc.)
896 // defaultval (which can be any integer) if not set, or value is malformed.
897 int de_get_ext_option_bool(deark
*c
, const char *name
, int defaultval
)
901 val
= de_get_ext_option(c
, name
);
902 if(!val
) return defaultval
;
903 if(val
[0]=='\0' || val
[0]=='1' || val
[0]=='y' || val
[0]=='Y' ||
904 val
[0]=='t' || val
[0]=='T')
908 if(val
[0]=='0' || val
[0]=='n' || val
[0]=='N' || val
[0]=='f' ||
916 int de_atoi(const char *string
)
921 i64
de_atoi64(const char *string
)
923 return de_strtoll(string
, NULL
, 10);
926 i64
de_min_int(i64 n1
, i64 n2
)
928 return (n1
<n2
) ? n1
: n2
;
931 i64
de_max_int(i64 n1
, i64 n2
)
933 return (n1
>n2
) ? n1
: n2
;
936 i64
de_pad_to_2(i64 x
)
938 return (x
&0x1) ? x
+1 : x
;
941 i64
de_pad_to_4(i64 x
)
947 // Valid for x=0 to 62. If x is invalid, returns 1 (=2^0).
950 if(x
<0 || x
>62) return 1;
951 return (i64
)1 << (unsigned int)x
;
954 i64
de_pad_to_n(i64 x
, i64 n
)
965 i64
de_log2_rounded_up(i64 n
)
970 for(i
=2; i
<32; i
++) {
971 if(n
<= (((i64
)1)<<i
)) return i
;
976 char *de_print_base2_fixed(char *buf
, size_t buf_len
, u64 n
, UI bitcount
)
981 if(buf_len
<(size_t)bitcount
+1) {
985 for(x
=0; x
<bitcount
; x
++) {
986 buf
[bpos
++] = (n
& (1ULL<<(bitcount
-1-x
))) ? '1' : '0';
993 static const char g_empty_string
[] = "";
995 const char *de_get_sz_ext(const char *sz
)
1000 if(!sz
) return g_empty_string
;
1002 len
= (int)de_strlen(sz
);
1003 if(len
<2) return g_empty_string
;
1005 // Find the position of the last ".", that's after the last "/"
1012 if(sz
[pos
]=='/' || sz
[pos
]=='\\')
1016 return g_empty_string
;
1019 const char *de_get_input_file_ext(deark
*c
)
1021 if(c
->suppress_detection_by_filename
) return g_empty_string
;
1023 if(!c
->input_filename
) return g_empty_string
;
1025 // If we skipped over the first part of the file, assume we're reading
1026 // an embedded format that's not indicated by the file extension.
1027 if(c
->slice_start_req
) return g_empty_string
;
1029 return de_get_sz_ext(c
->input_filename
);
1032 int de_sz_has_ext(const char *sz
, const char *ext
)
1036 e
= de_get_sz_ext(sz
);
1037 if(!de_strcasecmp(e
, ext
))
1042 int de_input_file_has_ext(deark
*c
, const char *ext
)
1046 e
= de_get_input_file_ext(c
);
1047 if(!de_strcasecmp(e
, ext
))
1052 int de_havemodcode(deark
*c
, de_module_params
*mparams
, int code
)
1055 mparams
->in_params
.codes
&&
1056 de_strchr(mparams
->in_params
.codes
, code
))
1063 // An finfo object holds metadata to be used when writing an output file.
1064 // It is passed to dbuf_create_output_file(), and related functions.
1065 // It does not have to remain valid after that function returns.
1066 // It is allowed to be reused.
1067 de_finfo
*de_finfo_create(deark
*c
)
1070 fi
= de_malloc(c
, sizeof(de_finfo
));
1074 void de_finfo_destroy(deark
*c
, de_finfo
*fi
)
1077 if(fi
->file_name_internal
) ucstring_destroy(fi
->file_name_internal
);
1078 if(fi
->name_other
) ucstring_destroy(fi
->name_other
);
1082 static i32
de_char_to_valid_fn_char(deark
*c
, i32 ch
)
1084 if(ch
>=32 && ch
<=126 && ch
!='/' && ch
!='\\' && ch
!=':'
1085 && ch
!='*' && ch
!='?' && ch
!='\"' && ch
!='<' &&
1088 // These are the valid ASCII characters in Windows filenames.
1089 // TODO: We could behave differently on different platforms.
1092 else if(ch
>=160 && ch
<=0x10ffff) {
1093 // TODO: A lot of Unicode characters probably don't belong in filenames.
1094 // Maybe we need a whitelist or blacklist.
1095 // (is_printable_uchar() exists, but isn't quite right.)
1101 // Sanitize a filename that is either also going to be processed by
1102 // sanitize_filename2(), or is known to contain no slashes.
1103 static void sanitize_filename1(deark
*c
, de_ucstring
*s
)
1106 if(s
->len
==1 && s
->str
[0]=='.') {
1110 if(s
->len
==2 && s
->str
[0]=='.' && s
->str
[1]=='.') {
1115 // Sanitize a filename that may contain slashes.
1116 // Just some basic sanitization, not expected to be perfect.
1117 // Note that this name will be written to a ZIP file, not used directly as a
1119 static void sanitize_filename2(deark
*c
, de_ucstring
*s
)
1123 // Don't allow an initial "/"
1124 if(s
->len
>=1 && s
->str
[0]=='/') {
1128 // Don't allow consecutive slashes
1129 for(i
=0; i
<s
->len
-1; i
++) {
1130 if(s
->str
[i
]=='/' && s
->str
[i
+1]=='/') {
1135 // Don't allow a component to be ".."
1136 for(i
=0; i
<s
->len
-1; i
++) {
1137 if(s
->str
[i
]=='.' && s
->str
[i
+1]=='.') {
1138 int test1
= 0; // Is ".." at the beginning of a component?
1139 int test2
= 0; // Is ".." at the end of a component?
1140 if(i
==0 || s
->str
[i
-1]=='/') {
1143 if(i
>=s
->len
-2 || s
->str
[i
+2]=='/') {
1146 if(test1
&& test2
) {
1152 // Don't allow name to end with "/."
1153 if(s
->len
>=2 && s
->str
[s
->len
-2]=='/' && s
->str
[s
->len
-1]=='.') {
1154 s
->str
[s
->len
-1] = '_';
1157 // Don't allow name to end with "/"
1158 if(s
->len
>=1 && s
->str
[s
->len
-1]=='/') {
1159 s
->str
[s
->len
-1] = '_';
1163 // Takes ownership of 's', and may modify it.
1165 // DE_SNFLAG_FULLPATH = "/" characters in the name are path separators.
1166 // DE_SNFLAG_STRIPTRAILINGSLASH
1167 static void de_finfo_set_name_internal(deark
*c
, de_finfo
*fi
, de_ucstring
*s
,
1173 fi
->orig_name_was_dot
= 0;
1175 if(fi
->file_name_internal
) {
1176 ucstring_destroy(fi
->file_name_internal
);
1177 fi
->file_name_internal
= NULL
;
1181 fi
->file_name_internal
= s
;
1183 if((flags
&DE_SNFLAG_STRIPTRAILINGSLASH
) && s
->len
>0 && s
->str
[s
->len
-1]=='/') {
1184 ucstring_truncate(s
, s
->len
-1);
1187 allow_slashes
= (c
->allow_subdirs
&& (flags
&DE_SNFLAG_FULLPATH
));
1189 if(allow_slashes
&& s
->len
==1 && s
->str
[0]=='.') {
1190 // Remember that this file was named ".", which can be a valid subdir
1191 // name in some cases (but at this point we don't even know whether it
1193 fi
->orig_name_was_dot
= 1;
1196 for(i
=0; i
<s
->len
; i
++) {
1197 if(s
->str
[i
]=='/' && allow_slashes
) {
1200 s
->str
[i
] = de_char_to_valid_fn_char(c
, s
->str
[i
]);
1203 ucstring_strip_trailing_spaces(s
);
1205 sanitize_filename1(c
, s
);
1208 sanitize_filename2(c
, s
);
1211 // Don't allow empty filenames.
1213 ucstring_append_sz(s
, "_", DE_ENCODING_LATIN1
);
1217 void de_finfo_set_name_from_ucstring(deark
*c
, de_finfo
*fi
, de_ucstring
*s
,
1220 de_ucstring
*s_copy
;
1222 s_copy
= ucstring_clone(s
);
1223 de_finfo_set_name_internal(c
, fi
, s_copy
, flags
);
1226 void de_finfo_set_name_from_sz(deark
*c
, de_finfo
*fi
, const char *name1
,
1227 unsigned int flags
, de_ext_encoding ee
)
1232 de_finfo_set_name_from_ucstring(c
, fi
, NULL
, flags
);
1235 fname
= ucstring_create(c
);
1236 ucstring_append_sz(fname
, name1
, ee
);
1237 de_finfo_set_name_internal(c
, fi
, fname
, flags
);
1240 // Sets the precision field to UNKNOWN.
1241 // flags: Same as de_FILETIME_to_timestamp()
1242 void de_unix_time_to_timestamp(i64 ut
, struct de_timestamp
*ts
, unsigned int flags
)
1244 de_FILETIME_to_timestamp(
1245 (ut
+ ((i64
)86400)*(369*365 + 89)) * 10000000,
1247 ts
->precision
= DE_TSPREC_UNKNOWN
;
1250 // Sets the sub-second part of the timestamp to 'frac' seconds after
1251 // (always forward in time) the whole-number second represented by the
1253 // 'frac' must be >=0.0 and <1.0.
1254 // Sets the precision field to HIGH.
1255 void de_timestamp_set_subsec(struct de_timestamp
*ts
, double frac
)
1259 if(!ts
->is_valid
) return;
1260 if(ts
->ts_FILETIME
<0) ts
->ts_FILETIME
=0;
1262 // Subtract off any existing fractional second.
1263 ts
->ts_FILETIME
-= (ts
->ts_FILETIME
%10000000);
1265 subsec
= (i64
)(0.5+frac
*10000000.0);
1266 if(subsec
>=10000000) subsec
=9999999;
1267 if(subsec
<0) subsec
=0;
1268 ts
->ts_FILETIME
+= subsec
;
1269 ts
->precision
= DE_TSPREC_HIGH
;
1272 // Returns the number of ten-millionths of a second after the whole number
1273 // of seconds (i.e. after the time returned by de_timestamp_to_unix_time).
1274 // The returned value will be between 0 and 9999999, inclusive.
1275 i64
de_timestamp_get_subsec(const struct de_timestamp
*ts
)
1277 return (de_timestamp_to_FILETIME(ts
) % 10000000);
1280 void de_mac_time_to_timestamp(i64 mt
, struct de_timestamp
*ts
)
1282 de_unix_time_to_timestamp(mt
- 2082844800, ts
, 0);
1285 // Convert a Windows FILETIME to a Deark timestamp.
1286 // Always sets the precision field to HIGH.
1287 // flags: 0x1 = set the UTC flag
1288 void de_FILETIME_to_timestamp(i64 ft
, struct de_timestamp
*ts
, unsigned int flags
)
1290 de_zeromem(ts
, sizeof(struct de_timestamp
));
1293 ts
->ts_FILETIME
= ft
;
1294 ts
->precision
= DE_TSPREC_HIGH
;
1295 if(flags
&0x1) ts
->tzcode
= DE_TZCODE_UTC
;
1298 void de_dos_datetime_to_timestamp(struct de_timestamp
*ts
,
1299 i64 ddate
, i64 dtime
)
1301 i64 yr
, mo
, da
, hr
, mi
, se
;
1304 de_zeromem(ts
, sizeof(struct de_timestamp
));
1308 yr
= 1980+((ddate
&0xfe00)>>9);
1309 mo
= (ddate
&0x01e0)>>5;
1310 da
= (ddate
&0x001f);
1311 hr
= (dtime
&0xf800)>>11;
1312 mi
= (dtime
&0x07e0)>>5;
1313 se
= 2*(dtime
&0x001f);
1314 de_make_timestamp(ts
, yr
, mo
, da
, hr
, mi
, se
);
1315 ts
->precision
= DE_TSPREC_2SEC
;
1319 // 0x1 = support VFAT long filename attribs
1320 void de_describe_dos_attribs(deark
*c
, UI attr
, de_ucstring
*s
, UI flags
)
1322 unsigned int bf
= attr
;
1324 if((flags
& 0x1) && (bf
& 0x3f)==0x0f) {
1325 ucstring_append_flags_item(s
, "long filename");
1329 ucstring_append_flags_item(s
, "read-only");
1333 ucstring_append_flags_item(s
, "hidden");
1337 ucstring_append_flags_item(s
, "system");
1341 ucstring_append_flags_item(s
, "volume label");
1345 ucstring_append_flags_item(s
, "directory");
1349 ucstring_append_flags_item(s
, "archive");
1353 if(bf
!=0) { // Report any unrecognized flags
1354 ucstring_append_flags_itemf(s
, "0x%02x", bf
);
1358 // Sets the DE_TZCODE_UTC flag.
1359 void de_riscos_loadexec_to_timestamp(u32 load_addr
,
1360 u32 exec_addr
, struct de_timestamp
*ts
)
1363 unsigned int centiseconds
;
1365 de_zeromem(ts
, sizeof(struct de_timestamp
));
1366 if((load_addr
&0xfff00000U
)!=0xfff00000U
) return;
1368 t
= (((i64
)(load_addr
&0xff))<<32) | (i64
)exec_addr
;
1369 // t now = number of centiseconds since the beginning of 1900
1371 // Remember centiseconds.
1372 centiseconds
= (unsigned int)(t
%100);
1373 // Convert t to seconds.
1376 // Convert 1900 epoch to 1970 epoch.
1377 // (There were 17 leap days between Jan 1900 and Jan 1970.)
1378 t
-= (365*70 + 17)*(i64
)86400;
1380 if(t
<=0 || t
>=8000000000LL) return; // sanity check
1382 de_unix_time_to_timestamp(t
, ts
, 0);
1383 de_timestamp_set_subsec(ts
, ((double)centiseconds
)/100.0);
1384 ts
->tzcode
= DE_TZCODE_UTC
;
1387 // This always truncates down to a whole number of seconds.
1388 // While an option to round might be useful for *something*, it could
1389 // cause problems if you're not really careful. It invites double-rounding,
1390 // and the creation of timestamps that are slightly in the future, both of
1391 // which can be problematic.
1392 i64
de_timestamp_to_unix_time(const struct de_timestamp
*ts
)
1394 if(!ts
->is_valid
) return 0;
1396 // There are 369 years between 1601 and 1970, with 89 leap days.
1397 return (de_timestamp_to_FILETIME(ts
)/10000000) - ((i64
)86400)*(369*365 + 89);
1400 // Convert to Windows FILETIME.
1401 // Returns 0 on error.
1402 i64
de_timestamp_to_FILETIME(const struct de_timestamp
*ts
)
1404 if(!ts
->is_valid
) return 0;
1405 if(ts
->ts_FILETIME
<0) return 0;
1406 return ts
->ts_FILETIME
;
1409 // [Adapted from Eric Raymond's public domain my_timegm().]
1410 // Convert a time (as individual fields) to a de_timestamp.
1411 // This is basically a UTC version of mktime().
1413 // mo = month: 1=Jan, ... 12=Dec
1414 // da = day of month: 1=1, ... 31=31
1415 void de_make_timestamp(struct de_timestamp
*ts
,
1416 i64 yr
, i64 mo
, i64 da
,
1417 i64 hr
, i64 mi
, i64 se
)
1421 static const int cumulative_days
[12] =
1422 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
1424 de_zeromem(ts
, sizeof(struct de_timestamp
));
1426 if(tm_mon
<0 || tm_mon
>11) tm_mon
=0;
1427 result
= (yr
- 1970) * 365 + cumulative_days
[tm_mon
];
1428 result
+= (yr
- 1968) / 4;
1429 result
-= (yr
- 1900) / 100;
1430 result
+= (yr
- 1600) / 400;
1431 if ((yr
%4)==0 && ((yr
%100)!=0 || (yr
%400)==0) && tm_mon
<2) {
1442 de_unix_time_to_timestamp(result
, ts
, 0);
1445 // Adjust the timestamp, presumably to convert it from local time to UTC,
1446 // and set the UTC flag.
1447 // offset_seconds is number of seconds to add to the timestamp to get UTC,
1448 // i.e. number of seconds west of UTC.
1449 void de_timestamp_cvt_to_utc(struct de_timestamp
*ts
, i64 offset_seconds
)
1451 if(!ts
->is_valid
) return;
1452 ts
->ts_FILETIME
+= offset_seconds
*10000000;
1453 ts
->tzcode
= DE_TZCODE_UTC
;
1456 // Our version of the standard gmtime() function.
1457 // We roll our own, so that we can support a wide range of dates. We want to
1458 // handle erroneous, and deliberately pathological, dates in the distant past
1459 // and future. We also want Deark to work the same on all platforms.
1461 // Converts a de_timestamp to a de_struct_tm, with separate fields
1462 // for year, month, day, ...
1463 // Uses the Gregorian calendar.
1464 // Supports dates from about year 1601 to 30828.
1465 void de_gmtime(const struct de_timestamp
*ts
, struct de_struct_tm
*tm2
)
1467 // Let's define an "eon" to be a 400-year period. Eons begin at the start
1468 // of the year 1601, 2001, 2401, etc.
1469 static const i64 secs_per_eon
= 12622780800LL;
1471 i64 secs_since_start_of_1601
;
1472 i64 secs_since_start_of_eon
;
1473 i64 days_since_start_of_eon
;
1474 i64 secs_since_start_of_day
;
1475 i64 yr_tmp
; // years, since start of eon, accounted for so far
1476 i64 days_tmp
; // number of days not accounted for in yr_tmp
1481 de_zeromem(tm2
, sizeof(struct de_struct_tm
));
1482 if(!ts
->is_valid
|| ts
->ts_FILETIME
<=0) {
1486 secs_since_start_of_1601
= ts
->ts_FILETIME
/ 10000000;
1487 tm2
->tm_subsec
= ts
->ts_FILETIME
% 10000000;
1488 eon
= secs_since_start_of_1601
/ secs_per_eon
;
1489 secs_since_start_of_eon
= secs_since_start_of_1601
% secs_per_eon
;
1490 days_since_start_of_eon
= secs_since_start_of_eon
/ 86400;
1491 secs_since_start_of_day
= secs_since_start_of_eon
% 86400;
1492 tm2
->tm_hour
= (int)(secs_since_start_of_day
/ 3600);
1493 tm2
->tm_min
= (int)((secs_since_start_of_day
% 3600)/60);
1494 tm2
->tm_sec
= (int)(secs_since_start_of_day
% 60);
1496 days_tmp
= days_since_start_of_eon
;
1499 // The first 3 100-year periods in this eon have
1500 // 100*365 + 24 days each.
1501 count
= days_tmp
/ (100*365 + 24);
1502 if(count
>3) count
= 3;
1503 days_tmp
-= (100*365 + 24)*count
;
1504 yr_tmp
+= 100*count
;
1506 // The first 24 4-year periods in this 100-year period have
1508 count
= days_tmp
/ (4*365 + 1);
1509 if(count
>24) count
= 24;
1510 days_tmp
-= (4*365 + 1)*count
;
1513 // The first 3 years in this 4-year period are not leap years.
1514 count
= days_tmp
/ 365;
1515 if(count
>3) count
= 3;
1516 days_tmp
-= 365*count
;
1519 tm2
->tm_fullyear
= (int)(1601 + eon
*400 + yr_tmp
);
1520 is_leapyear
= ((yr_tmp
%4)==3 &&
1521 yr_tmp
!=99 && yr_tmp
!=199 && yr_tmp
!=299);
1523 for(k
=0; k
<11; k
++) {
1524 static const u8 days_in_month
[11] = // (Don't need December)
1525 { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30 };
1526 i64 days_in_this_month
= (i64
)days_in_month
[k
];
1527 if(k
==1 && is_leapyear
) days_in_this_month
++;
1528 if(days_tmp
>= days_in_this_month
) {
1529 days_tmp
-= days_in_this_month
;
1537 tm2
->tm_mday
= (int)(1+days_tmp
);
1541 // Appends " UTC" if ts->tzcode==DE_TZCODE_UTC
1542 // No flags are currently defined.
1543 // Caller supplies buf (suggest it be at least size 64).
1544 // Returns an extra pointer to buf.
1545 char *de_timestamp_to_string(const struct de_timestamp
*ts
,
1546 char *buf
, size_t buf_len
, unsigned int flags
)
1548 const char *tzlabel
;
1550 struct de_struct_tm tm2
;
1553 de_strlcpy(buf
, "[invalid timestamp]", buf_len
);
1557 de_gmtime(ts
, &tm2
);
1559 de_snprintf(buf
, buf_len
, "[timestamp out of range: %"I64_FMT
"]",
1560 de_timestamp_to_unix_time(ts
));
1564 if(ts
->precision
>DE_TSPREC_1SEC
) {
1566 ms
= (unsigned int)(tm2
.tm_subsec
/10000);
1567 if(ms
>=1000) ms
=999;
1568 de_snprintf(subsec
, sizeof(subsec
), ".%03u", ms
);
1574 tzlabel
= (ts
->tzcode
==DE_TZCODE_UTC
)?" UTC":"";
1575 if(ts
->precision
!=DE_TSPREC_UNKNOWN
&& ts
->precision
<=DE_TSPREC_1DAY
) { // date only
1576 de_snprintf(buf
, buf_len
, "%04d-%02d-%02d",
1577 tm2
.tm_fullyear
, 1+tm2
.tm_mon
, tm2
.tm_mday
);
1580 de_snprintf(buf
, buf_len
, "%04d-%02d-%02d %02d:%02d:%02d%s%s",
1581 tm2
.tm_fullyear
, 1+tm2
.tm_mon
, tm2
.tm_mday
,
1582 tm2
.tm_hour
, tm2
.tm_min
, tm2
.tm_sec
, subsec
, tzlabel
);
1587 // Same as de_timestamp_to_string(), except it assumes the output is only
1588 // needed if debug output is enabled.
1589 // If it is not, it just returns an empty string, to avoid the relatively
1590 // slow date processing.
1591 char *de_dbg_timestamp_to_string(deark
*c
, const struct de_timestamp
*ts
,
1592 char *buf
, size_t buf_len
, unsigned int flags
)
1594 if(c
->debug_level
<1) {
1598 return de_timestamp_to_string(ts
, buf
, buf_len
, flags
);
1601 // Returns the same time if called multiple times.
1602 void de_cached_current_time_to_timestamp(deark
*c
, struct de_timestamp
*ts
)
1604 if(!c
->current_time
.is_valid
) {
1605 de_current_time_to_timestamp(&c
->current_time
);
1607 *ts
= c
->current_time
;
1610 void de_declare_fmt(deark
*c
, const char *fmtname
)
1612 if(c
->module_nesting_level
> 1) {
1613 return; // Only allowed for the top-level module
1615 if(c
->format_declared
) return;
1616 de_info(c
, "Format: %s", fmtname
);
1617 c
->format_declared
= 1;
1620 void de_declare_fmtf(deark
*c
, const char *fmt
, ...)
1626 de_vsnprintf(buf
, sizeof(buf
), fmt
, ap
);
1627 de_declare_fmt(c
, buf
);
1631 // Returns a suitable input encoding.
1632 // If mparams.in_params.input_encoding exists and is not UNKNOWN,
1634 // Else if c->input_encoding (the -inenc option) is not UNKNOWN, returns that.
1635 // Else returns dflt.
1636 de_encoding
de_get_input_encoding(deark
*c
, de_module_params
*mparams
,
1639 if(mparams
&& mparams
->in_params
.input_encoding
!=DE_ENCODING_UNKNOWN
) {
1640 return mparams
->in_params
.input_encoding
;
1642 if(c
->input_encoding
!=DE_ENCODING_UNKNOWN
) {
1643 return c
->input_encoding
;
1648 // Assumes dst starts out with only '0' bits
1649 void de_copy_bits(const u8
*src
, i64 srcbitnum
,
1650 u8
*dst
, i64 dstbitnum
, i64 bitstocopy
)
1655 for(i
=0; i
<bitstocopy
; i
++) {
1656 b
= src
[(srcbitnum
+i
)/8];
1657 b
= (b
>>(7-(srcbitnum
+i
)%8))&0x1;
1659 b
= b
<<(7-(dstbitnum
+i
)%8);
1660 dst
[(dstbitnum
+i
)/8] |= b
;
1665 // A very simple hash table implementation, with int64 keys.
1667 #define DE_INTHASHTABLE_NBUCKETS 71
1669 struct de_inthashtable_item
{
1672 struct de_inthashtable_item
*next
; // Next item in linked list
1675 struct de_inthashtable_bucket
{
1676 struct de_inthashtable_item
*first_item
;
1679 struct de_inthashtable
{
1680 struct de_inthashtable_bucket buckets
[DE_INTHASHTABLE_NBUCKETS
];
1683 static struct de_inthashtable_bucket
*inthashtable_find_bucket(struct de_inthashtable
*ht
,
1688 if(key
>=0) bkt_num
= key
%DE_INTHASHTABLE_NBUCKETS
;
1689 else bkt_num
= (-key
)%DE_INTHASHTABLE_NBUCKETS
;
1691 return &ht
->buckets
[bkt_num
];
1694 struct de_inthashtable
*de_inthashtable_create(deark
*c
)
1696 return de_mallocarray(c
, DE_INTHASHTABLE_NBUCKETS
, sizeof(struct de_inthashtable
));
1699 static void inthashtable_destroy_item(deark
*c
, struct de_inthashtable_item
*item
)
1704 static void inthashtable_destroy_items_in_bucket(deark
*c
, struct de_inthashtable_bucket
*bkt
)
1706 struct de_inthashtable_item
*next_item
;
1708 while(bkt
->first_item
) {
1709 next_item
= bkt
->first_item
->next
;
1710 inthashtable_destroy_item(c
, bkt
->first_item
);
1711 bkt
->first_item
= next_item
;
1715 void de_inthashtable_destroy(deark
*c
, struct de_inthashtable
*ht
)
1720 for(i
=0; i
<DE_INTHASHTABLE_NBUCKETS
; i
++) {
1721 if(ht
->buckets
[i
].first_item
)
1722 inthashtable_destroy_items_in_bucket(c
, &ht
->buckets
[i
]);
1727 // Returns NULL if item does not exist in the given bucket
1728 static struct de_inthashtable_item
*inthashtable_find_item_in_bucket(struct de_inthashtable
*ht
,
1729 struct de_inthashtable_bucket
*bkt
, i64 key
)
1731 struct de_inthashtable_item
*p
;
1733 p
= bkt
->first_item
;
1734 while(p
&& (p
->key
!= key
)) {
1740 // Returns NULL if item does not exist
1741 static struct de_inthashtable_item
*inthashtable_find_item(struct de_inthashtable
*ht
, i64 key
)
1743 struct de_inthashtable_bucket
*bkt
;
1745 if(!ht
) return NULL
;
1746 bkt
= inthashtable_find_bucket(ht
, key
);
1747 return inthashtable_find_item_in_bucket(ht
, bkt
, key
);
1750 // If key does not exist, sets *pvalue to NULL and returns 0.
1751 int de_inthashtable_get_item(deark
*c
, struct de_inthashtable
*ht
, i64 key
, void **pvalue
)
1753 struct de_inthashtable_item
*item
;
1755 item
= inthashtable_find_item(ht
, key
);
1757 *pvalue
= item
->value
;
1764 int de_inthashtable_item_exists(deark
*c
, struct de_inthashtable
*ht
, i64 key
)
1766 return (inthashtable_find_item(ht
, key
) != NULL
);
1769 // Unconditionally adds an item to the given bucket (does not prevent duplicates)
1770 static void inthashtable_add_item_to_bucket(struct de_inthashtable
*ht
,
1771 struct de_inthashtable_bucket
*bkt
, struct de_inthashtable_item
*new_item
)
1773 new_item
->next
= bkt
->first_item
;
1774 bkt
->first_item
= new_item
;
1777 // Returns 1 if the key has been newly-added,
1778 // or 0 if the key already existed.
1779 int de_inthashtable_add_item(deark
*c
, struct de_inthashtable
*ht
, i64 key
, void *value
)
1781 struct de_inthashtable_bucket
*bkt
;
1782 struct de_inthashtable_item
*new_item
;
1784 bkt
= inthashtable_find_bucket(ht
, key
);
1785 if(inthashtable_find_item_in_bucket(ht
, bkt
, key
)) {
1786 // Item already exist. Don't add it again.
1787 // TODO: This may eventually need to be changed to modify the existing item,
1788 // or delete-then-add the new item, instead of doing nothing.
1792 new_item
= de_malloc(c
, sizeof(struct de_inthashtable_item
));
1793 new_item
->key
= key
;
1794 new_item
->value
= value
;
1795 inthashtable_add_item_to_bucket(ht
, bkt
, new_item
);
1799 int de_inthashtable_remove_item(deark
*c
, struct de_inthashtable
*ht
, i64 key
, void **pvalue
)
1805 // Select one item arbitrarily, return its key and value, and delete it from the
1807 int de_inthashtable_remove_any_item(deark
*c
, struct de_inthashtable
*ht
, i64
*pkey
, void **pvalue
)
1811 for(i
=0; i
<DE_INTHASHTABLE_NBUCKETS
; i
++) {
1812 struct de_inthashtable_item
*item
;
1814 item
= ht
->buckets
[i
].first_item
;
1817 // Found an item. Copy it, for the caller.
1818 if(pkey
) *pkey
= item
->key
;
1819 if(pvalue
) *pvalue
= item
->value
;
1821 // Delete our copy of it.
1822 ht
->buckets
[i
].first_item
= item
->next
;
1823 inthashtable_destroy_item(c
, item
);
1827 // No items in hashtable.
1829 if(pvalue
) *pvalue
= NULL
;
1833 // crcobj: Functions for performing CRC calculations, and other checksum-like
1834 // functions for which the result can fit in a 32-bit int.
1838 unsigned int crctype
;
1843 #define DE_CRC32_INIT 0
1845 // crc32_calc() is based on public domain code by Jon Mayo, downloaded
1846 // from <http://orangetide.com/code/crc.c>.
1847 // It includes minor changes for Deark. I disclaim any copyright on these
1848 // minor changes. -JS
1849 // Note: I have found several other seemingly-independent implementations
1850 // of the same algorithm, such as the one by Karl Malbrain, used in miniz.
1851 // I don't know its origin.
1852 static u32
crc32_calc(const u8
*ptr
, size_t cnt
, u32 crc
)
1854 static const u32 crc32_tab
[16] = {
1855 0x00000000U
, 0x1db71064U
, 0x3b6e20c8U
, 0x26d930acU
,
1856 0x76dc4190U
, 0x6b6b51f4U
, 0x4db26158U
, 0x5005713cU
,
1857 0xedb88320U
, 0xf00f9344U
, 0xd6d6a3e8U
, 0xcb61b38cU
,
1858 0x9b64c2b0U
, 0x86d3d2d4U
, 0xa00ae278U
, 0xbdbdf21cU
1861 if(cnt
==0) return crc
;
1864 crc
= (crc
>> 4) ^ crc32_tab
[(crc
& 0xf) ^ (*ptr
& 0xf)];
1865 crc
= (crc
>> 4) ^ crc32_tab
[(crc
& 0xf) ^ (*ptr
++ >> 4)];
1870 // For a one-shot CRC calculations, or the first part of a multi-part
1872 // buf can be NULL (in which case buf_len should be 0, but is ignored)
1873 static u32
de_crc32(const void *buf
, i64 buf_len
)
1875 if(!buf
) return DE_CRC32_INIT
;
1876 return (u32
)crc32_calc((const u8
*)buf
, (size_t)buf_len
, DE_CRC32_INIT
);
1879 static u32
de_crc32_continue(u32 prev_crc
, const void *buf
, i64 buf_len
)
1881 return (u32
)crc32_calc((const u8
*)buf
, (size_t)buf_len
, prev_crc
);
1884 static void adler32_continue(struct de_crcobj
*crco
, const u8
*buf
, i64 buf_len
)
1886 u32 s1
= crco
->val
& 0xffff;
1887 u32 s2
= (crco
->val
>> 16) & 0xffff;
1890 for(i
= 0; i
<buf_len
; i
++) {
1891 s1
= (s1
+ buf
[i
]) % 65521;
1892 s2
= (s2
+ s1
) % 65521;
1894 crco
->val
= (s2
<< 16) + s1
;
1897 // This is the CRC-16 algorithm used in MacBinary.
1898 // It is in the x^16 + x^12 + x^5 + 1 family.
1899 // CRC-16-CCITT is probably the best name for it, though I'm not completely
1900 // sure, and there are several algorithms that have been called "CRC-16-CCITT".
1901 static void de_crc16ccitt_init(struct de_crcobj
*crco
)
1903 const unsigned int polynomial
= 0x1021;
1906 crco
->table16
= de_mallocarray(crco
->c
, 256, sizeof(u16
));
1907 crco
->table16
[0] = 0;
1908 for(index
=0; index
<128; index
++) {
1909 unsigned int carry
= crco
->table16
[index
] & 0x8000;
1910 unsigned int temp
= (crco
->table16
[index
] << 1) & 0xffff;
1911 crco
->table16
[index
* 2 + (carry
? 0 : 1)] = temp
^ polynomial
;
1912 crco
->table16
[index
* 2 + (carry
? 1 : 0)] = temp
;
1916 static void de_crc16ccitt_continue(struct de_crcobj
*crco
, const u8
*buf
, i64 buf_len
)
1920 if(!crco
->table16
) return;
1921 for(k
=0; k
<buf_len
; k
++) {
1922 crco
->val
= ((crco
->val
<<8)&0xffff) ^
1923 (u32
)crco
->table16
[((crco
->val
>>8) ^ (u32
)buf
[k
]) & 0xff];
1927 // This is the CRC-16 algorithm used in ARC, LHA, ZOO, etc.
1928 // It is in the x^16 + x^15 + x^2 + 1 family.
1929 // It's some variant of CRC-16-IBM, and sometimes simply called "CRC-16". But
1930 // both these names are more ambiguous than I'd like, so I'm calling it "ARC".
1931 static void de_crc16arc_init(struct de_crcobj
*crco
)
1935 crco
->table16
= de_mallocarray(crco
->c
, 256, sizeof(u16
));
1936 for(i
=0; i
<256; i
++) {
1937 crco
->table16
[i
] = i
;
1939 crco
->table16
[i
] = (crco
->table16
[i
]>>1) ^ ((crco
->table16
[i
] & 1) ? 0xa001 : 0);
1943 static void de_crc16arc_continue(struct de_crcobj
*crco
, const u8
*buf
, i64 buf_len
)
1947 if(!crco
->table16
) return;
1948 for(k
=0; k
<buf_len
; k
++) {
1949 crco
->val
= ((crco
->val
>>8) ^
1950 (u32
)crco
->table16
[(crco
->val
^ buf
[k
]) & 0xff]);
1954 // Allocates, initializes, and resets a new object.
1955 struct de_crcobj
*de_crcobj_create(deark
*c
, UI type_and_flags
)
1957 struct de_crcobj
*crco
;
1959 crco
= de_malloc(c
, sizeof(struct de_crcobj
));
1961 crco
->crctype
= type_and_flags
;
1963 switch(crco
->crctype
) {
1964 case DE_CRCOBJ_CRC16_CCITT
:
1965 de_crc16ccitt_init(crco
);
1967 case DE_CRCOBJ_CRC16_ARC
:
1968 de_crc16arc_init(crco
);
1972 de_crcobj_reset(crco
);
1976 void de_crcobj_destroy(struct de_crcobj
*crco
)
1982 de_free(c
, crco
->table16
);
1986 void de_crcobj_reset(struct de_crcobj
*crco
)
1990 switch(crco
->crctype
) {
1991 case DE_CRCOBJ_CRC32_IEEE
:
1992 crco
->val
= de_crc32(NULL
, 0);
1994 case DE_CRCOBJ_ADLER32
:
2000 u32
de_crcobj_getval(struct de_crcobj
*crco
)
2005 void de_crcobj_addbuf(struct de_crcobj
*crco
, const u8
*buf
, i64 buf_len
)
2007 if(buf_len
<1) return;
2009 switch(crco
->crctype
) {
2010 case DE_CRCOBJ_CRC32_IEEE
:
2011 crco
->val
= de_crc32_continue(crco
->val
, buf
, buf_len
);
2013 case DE_CRCOBJ_CRC16_CCITT
:
2014 de_crc16ccitt_continue(crco
, buf
, buf_len
);
2016 case DE_CRCOBJ_CRC16_ARC
:
2017 de_crc16arc_continue(crco
, buf
, buf_len
);
2019 case DE_CRCOBJ_ADLER32
:
2020 adler32_continue(crco
, buf
, buf_len
);
2025 void de_crcobj_addzeroes(struct de_crcobj
*crco
, i64 len
)
2030 for(i
=0; i
<len
; i
++) {
2031 de_crcobj_addbuf(crco
, &z
, 1);
2035 static int addslice_cbfn(struct de_bufferedreadctx
*brctx
, const u8
*buf
,
2038 de_crcobj_addbuf((struct de_crcobj
*)brctx
->userdata
, buf
, buf_len
);
2042 void de_crcobj_addslice(struct de_crcobj
*crco
, dbuf
*f
, i64 pos
, i64 len
)
2044 dbuf_buffered_read(f
, pos
, len
, addslice_cbfn
, (void*)crco
);
2047 void de_get_reproducible_timestamp(deark
*c
, struct de_timestamp
*ts
)
2049 if(c
->reproducible_timestamp
.is_valid
) {
2050 *ts
= c
->reproducible_timestamp
;
2054 // An arbitrary timestamp
2055 // $ date -u --date='2010-09-08 07:06:05' '+%s'
2056 de_unix_time_to_timestamp(1283929565LL, ts
, 0x1);
2059 // Call this to ensure that a zip/tar file will be created, even if it has
2061 int de_archive_initialize(deark
*c
)
2063 if(c
->output_style
!=DE_OUTPUTSTYLE_ARCHIVE
) return 0;
2064 switch(c
->archive_fmt
) {
2065 case DE_ARCHIVEFMT_ZIP
:
2066 return de_zip_create_file(c
);
2067 case DE_ARCHIVEFMT_TAR
:
2068 return de_tar_create_file(c
);