1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
7 // Functions related to the dbuf object.
9 #define DE_NOT_IN_MODULE
10 #include "deark-config.h"
11 #include "deark-private.h"
13 #define DE_DUMMY_MAX_FILE_SIZE (1LL<<56)
14 #define DE_MAX_MEMBUF_SIZE 2000000000
15 #define DE_CACHE_SIZE 262144
17 // Fill the cache that remembers the first part of the file.
18 // TODO: We should probably use memory-mapped files instead when possible,
19 // but this is simple and portable, and does most of what we need.
20 static void populate_cache(dbuf
*f
)
25 if(f
->btype
!=DBUF_TYPE_IFILE
) return;
27 bytes_to_read
= DE_CACHE_SIZE
;
28 if(f
->len
< bytes_to_read
) {
29 bytes_to_read
= f
->len
;
32 f
->cache
= de_malloc(f
->c
, DE_CACHE_SIZE
);
33 de_fseek(f
->fp
, 0, SEEK_SET
);
34 bytes_read
= fread(f
->cache
, 1, (size_t)bytes_to_read
, f
->fp
);
35 f
->cache_bytes_used
= bytes_read
;
36 f
->file_pos_known
= 0;
39 // Read all data from stdin (or a named pipe) into memory.
40 static void populate_cache_from_pipe(dbuf
*f
)
43 i64 cache_bytes_alloc
= 0;
45 if(f
->btype
==DBUF_TYPE_STDIN
) {
48 else if(f
->btype
==DBUF_TYPE_FIFO
) {
55 f
->cache_bytes_used
= 0;
58 i64 bytes_to_read
, bytes_read
;
60 if(f
->cache_bytes_used
>= cache_bytes_alloc
) {
61 i64 old_cache_size
, new_cache_size
;
63 // Cache is full. Increase its size.
64 old_cache_size
= cache_bytes_alloc
;
65 new_cache_size
= old_cache_size
*2;
66 if(new_cache_size
<DE_CACHE_SIZE
) new_cache_size
= DE_CACHE_SIZE
;
67 f
->cache
= de_realloc(f
->c
, f
->cache
, old_cache_size
, new_cache_size
);
68 cache_bytes_alloc
= new_cache_size
;
71 // Try to read as many bytes as it would take to fill the cache.
72 bytes_to_read
= cache_bytes_alloc
- f
->cache_bytes_used
;
73 if(bytes_to_read
<1) break; // Shouldn't happen
75 bytes_read
= fread(&f
->cache
[f
->cache_bytes_used
], 1, (size_t)bytes_to_read
, fp
);
76 if(bytes_read
<1 || bytes_read
>bytes_to_read
) break;
77 f
->cache_bytes_used
+= bytes_read
;
78 if(feof(fp
) || ferror(fp
)) break;
81 f
->len
= f
->cache_bytes_used
;
84 // Read len bytes, starting at file position pos, into buf.
85 // Unread bytes will be set to 0.
86 void dbuf_read(dbuf
*f
, u8
*buf
, i64 pos
, i64 len
)
96 // All requested bytes are before the beginning of the file
97 de_zeromem(buf
, (size_t)len
);
100 // Some requested bytes are before the beginning of the file.
101 // Zero out the ones that are:
102 de_zeromem(buf
, (size_t)(-pos
));
103 // And adjust the parameters:
113 else if(pos
+ bytes_to_read
> f
->len
) {
114 bytes_to_read
= f
->len
- pos
;
117 if(bytes_to_read
<1) {
121 // If the data we need is all cached, get it from cache.
124 pos
+ bytes_to_read
<= f
->cache_bytes_used
)
126 de_memcpy(buf
, &f
->cache
[pos
], (size_t)bytes_to_read
);
127 bytes_read
= bytes_to_read
;
132 case DBUF_TYPE_IFILE
:
134 de_internal_err_fatal(c
, "File not open");
138 // For performance reasons, don't call fseek if we're already at the
140 if(!f
->file_pos_known
|| f
->file_pos
!=pos
) {
141 de_fseek(f
->fp
, pos
, SEEK_SET
);
144 bytes_read
= fread(buf
, 1, (size_t)bytes_to_read
, f
->fp
);
146 f
->file_pos
= pos
+ bytes_read
;
147 f
->file_pos_known
= 1;
150 case DBUF_TYPE_IDBUF
:
151 // Recursive call to the parent dbuf.
152 dbuf_read(f
->parent_dbuf
, buf
, f
->offset_into_parent_dbuf
+pos
, bytes_to_read
);
154 // The parent dbuf always writes 'bytes_to_read' bytes.
155 bytes_read
= bytes_to_read
;
158 case DBUF_TYPE_MEMBUF
:
159 de_memcpy(buf
, &f
->membuf_buf
[pos
], (size_t)bytes_to_read
);
160 bytes_read
= bytes_to_read
;
164 de_internal_err_fatal(c
, "getbytes from this I/O type not implemented");
169 // Zero out any requested bytes that were not read.
170 if(bytes_read
< len
) {
171 de_zeromem(buf
+bytes_read
, (size_t)(len
- bytes_read
));
175 // A function that works a little more like a standard read/fread function than
176 // does dbuf_read. It returns the number of bytes read, won't read past end of
177 // file, and helps track the file position.
178 i64
dbuf_standard_read(dbuf
*f
, u8
*buf
, i64 n
, i64
*fpos
)
182 if(*fpos
< 0 || *fpos
>= f
->len
) return 0;
185 if(*fpos
+ amt_to_read
> f
->len
) amt_to_read
= f
->len
- *fpos
;
186 dbuf_read(f
, buf
, *fpos
, amt_to_read
);
187 *fpos
+= amt_to_read
;
191 u8
dbuf_getbyte(dbuf
*f
, i64 pos
)
193 if(pos
<0 || pos
>=f
->len
) return 0x00;
195 if(pos
<f
->cache_bytes_used
) {
196 return f
->cache
[pos
];
198 if(f
->btype
==DBUF_TYPE_MEMBUF
) {
199 // Note that it is necessary to handle read+write dbuf types specially,
200 // so that the "cache2" feature isn't used.
201 return f
->membuf_buf
[pos
];
204 // TODO: I don't like that cache2 exists, but without it some large images
205 // are decoded too slowly (especially on Windows), and I haven't figured out
206 // a solution I like better.
207 if(pos
==f
->cache2_pos
) {
211 dbuf_read(f
, &f
->cache2
, pos
, 1);
215 i64
de_geti8_direct(const u8
*m
)
219 if(b
<=127) return (i64
)b
;
223 i64
dbuf_geti8(dbuf
*f
, i64 pos
)
227 b
= dbuf_getbyte(f
, pos
);
228 return de_geti8_direct(&b
);
231 u8
dbuf_getbyte_p(dbuf
*f
, i64
*ppos
)
234 b
= dbuf_getbyte(f
, *ppos
);
239 static i64
dbuf_getuint_ext_be_direct(const u8
*m
, unsigned int nbytes
)
244 for(k
=0; k
<nbytes
; k
++) {
245 if(val
>0x00ffffffffffffffULL
) return 0;
246 val
= (val
<<8) | (u64
)m
[k
];
251 static i64
dbuf_getint_ext_be_direct(const u8
*m
, unsigned int nbytes
)
256 // We can handle up to 8 arbitrary bytes. Any more have to be 0xff.
258 for(k
=0; k
<nbytes
-8; k
++) {
259 if(m
[k
]!=0xff) return 0; // underflow
263 // Process bytes in order of increasing significance
268 byteval
= m
[nbytes
-1-k
];
273 val
|= ((u64
)byteval
) << (k
*8);
278 static i64
dbuf_getuint_ext_le_direct(const u8
*m
, unsigned int nbytes
)
283 for(k
=0; k
<nbytes
; k
++) {
286 val
|= ((u64
)m
[k
])<<(k
*8);
292 static i64
dbuf_getuint_ext_x(dbuf
*f
, i64 pos
, unsigned int nbytes
,
297 if(nbytes
>(unsigned int)sizeof(m
)) return 0;
298 dbuf_read(f
, m
, pos
, (i64
)nbytes
);
300 return dbuf_getuint_ext_le_direct(m
, nbytes
);
302 return dbuf_getuint_ext_be_direct(m
, nbytes
);
305 static i64
dbuf_getint_ext_x(dbuf
*f
, i64 pos
, unsigned int nbytes
, int is_le
)
309 if(nbytes
>(unsigned int)sizeof(m
)) return 0;
310 dbuf_read(f
, m
, pos
, (i64
)nbytes
);
314 return dbuf_getint_ext_be_direct(m
, nbytes
);
317 i64
de_getu16be_direct(const u8
*m
)
319 return (i64
)(((u32
)m
[1]) | (((u32
)m
[0])<<8));
322 i64
dbuf_getu16be(dbuf
*f
, i64 pos
)
325 dbuf_read(f
, m
, pos
, 2);
326 return de_getu16be_direct(m
);
329 i64
dbuf_getu16be_p(dbuf
*f
, i64
*ppos
)
332 dbuf_read(f
, m
, *ppos
, 2);
334 return de_getu16be_direct(m
);
337 i64
de_getu16le_direct(const u8
*m
)
339 return (i64
)(((u32
)m
[0]) | (((u32
)m
[1])<<8));
342 i64
dbuf_getu16le(dbuf
*f
, i64 pos
)
345 dbuf_read(f
, m
, pos
, 2);
346 return de_getu16le_direct(m
);
349 i64
dbuf_getu16le_p(dbuf
*f
, i64
*ppos
)
352 dbuf_read(f
, m
, *ppos
, 2);
354 return de_getu16le_direct(m
);
357 i64
dbuf_geti16be(dbuf
*f
, i64 pos
)
360 n
= dbuf_getu16be(f
, pos
);
361 if(n
>=32768) n
-= 65536;
365 i64
dbuf_geti16le(dbuf
*f
, i64 pos
)
368 n
= dbuf_getu16le(f
, pos
);
369 if(n
>=32768) n
-= 65536;
373 i64
dbuf_geti16be_p(dbuf
*f
, i64
*ppos
)
376 n
= dbuf_geti16be(f
, *ppos
);
381 i64
dbuf_geti16le_p(dbuf
*f
, i64
*ppos
)
384 n
= dbuf_geti16le(f
, *ppos
);
389 i64
de_getu32be_direct(const u8
*m
)
391 return (i64
)(((u32
)m
[3]) | (((u32
)m
[2])<<8) |
392 (((u32
)m
[1])<<16) | (((u32
)m
[0])<<24));
395 i64
dbuf_getu32be(dbuf
*f
, i64 pos
)
398 dbuf_read(f
, m
, pos
, 4);
399 return de_getu32be_direct(m
);
402 i64
dbuf_getu32be_p(dbuf
*f
, i64
*ppos
)
405 dbuf_read(f
, m
, *ppos
, 4);
407 return de_getu32be_direct(m
);
410 i64
de_getu32le_direct(const u8
*m
)
412 return (i64
)(((u32
)m
[0]) | (((u32
)m
[1])<<8) |
413 (((u32
)m
[2])<<16) | (((u32
)m
[3])<<24));
416 i64
dbuf_getu32le(dbuf
*f
, i64 pos
)
419 dbuf_read(f
, m
, pos
, 4);
420 return de_getu32le_direct(m
);
423 i64
dbuf_getu32le_p(dbuf
*f
, i64
*ppos
)
426 dbuf_read(f
, m
, *ppos
, 4);
428 return de_getu32le_direct(m
);
431 i64
dbuf_geti32be(dbuf
*f
, i64 pos
)
434 n
= dbuf_getu32be(f
, pos
);
435 return (i64
)(i32
)(u32
)n
;
438 i64
dbuf_geti32le(dbuf
*f
, i64 pos
)
441 n
= dbuf_getu32le(f
, pos
);
442 return (i64
)(i32
)(u32
)n
;
445 i64
dbuf_geti32be_p(dbuf
*f
, i64
*ppos
)
448 n
= dbuf_geti32be(f
, *ppos
);
453 i64
dbuf_geti32le_p(dbuf
*f
, i64
*ppos
)
456 n
= dbuf_geti32le(f
, *ppos
);
461 u64
de_getu64be_direct(const u8
*m
)
467 val
|= ((u64
)m
[i
])<<((7-i
)*8);
472 i64
de_geti64be_direct(const u8
*m
)
474 return (i64
)de_getu64be_direct(m
);
477 i64
dbuf_geti64be(dbuf
*f
, i64 pos
)
480 dbuf_read(f
, m
, pos
, 8);
481 return de_geti64be_direct(m
);
484 u64
de_getu64le_direct(const u8
*m
)
490 val
|= ((u64
)m
[i
])<<(i
*8);
495 i64
de_geti64le_direct(const u8
*m
)
497 return (i64
)de_getu64le_direct(m
);
500 i64
dbuf_geti64le(dbuf
*f
, i64 pos
)
503 dbuf_read(f
, m
, pos
, 8);
504 return de_geti64le_direct(m
);
507 i64
dbuf_getu16x(dbuf
*f
, i64 pos
, int is_le
)
509 if(is_le
) return dbuf_getu16le(f
, pos
);
510 return dbuf_getu16be(f
, pos
);
513 i64
dbuf_geti16x(dbuf
*f
, i64 pos
, int is_le
)
515 if(is_le
) return dbuf_geti16le(f
, pos
);
516 return dbuf_geti16be(f
, pos
);
519 i64
dbuf_getu32x(dbuf
*f
, i64 pos
, int is_le
)
521 if(is_le
) return dbuf_getu32le(f
, pos
);
522 return dbuf_getu32be(f
, pos
);
525 i64
dbuf_geti32x(dbuf
*f
, i64 pos
, int is_le
)
527 if(is_le
) return dbuf_geti32le(f
, pos
);
528 return dbuf_geti32be(f
, pos
);
531 i64
dbuf_geti64x(dbuf
*f
, i64 pos
, int is_le
)
533 if(is_le
) return dbuf_geti64le(f
, pos
);
534 return dbuf_geti64be(f
, pos
);
537 u64
dbuf_getu64be(dbuf
*f
, i64 pos
)
540 dbuf_read(f
, m
, pos
, 8);
541 return de_getu64be_direct(m
);
544 u64
dbuf_getu64le(dbuf
*f
, i64 pos
)
547 dbuf_read(f
, m
, pos
, 8);
548 return de_getu64le_direct(m
);
551 u64
dbuf_getu64x(dbuf
*f
, i64 pos
, int is_le
)
553 if(is_le
) return dbuf_getu64le(f
, pos
);
554 return dbuf_getu64be(f
, pos
);
557 i64
dbuf_getint_ext(dbuf
*f
, i64 pos
, unsigned int nbytes
,
558 int is_le
, int is_signed
)
561 // TODO: Extend this to any number of bytes, 1-8.
563 case 1: return (i64
)(signed char)dbuf_getbyte(f
, pos
); break;
564 case 2: return dbuf_geti16x(f
, pos
, is_le
); break;
565 case 4: return dbuf_geti32x(f
, pos
, is_le
); break;
566 case 8: return dbuf_geti64x(f
, pos
, is_le
); break;
568 return dbuf_getint_ext_x(f
, pos
, nbytes
, is_le
);
573 case 1: return (i64
)dbuf_getbyte(f
, pos
); break;
574 case 2: return dbuf_getu16x(f
, pos
, is_le
); break;
575 case 4: return dbuf_getu32x(f
, pos
, is_le
); break;
576 case 8: return dbuf_geti64x(f
, pos
, is_le
); break;
578 return dbuf_getuint_ext_x(f
, pos
, nbytes
, is_le
);
584 static void init_fltpt_decoder(deark
*c
)
589 c
->can_decode_fltpt
= 0;
590 if(sizeof(float)!=4) return;
591 if(sizeof(double)!=8) return;
592 c
->can_decode_fltpt
= 1;
594 de_memcpy(&b
, &x
, 1);
601 double de_getfloat32x_direct(deark
*c
, const u8
*m
, int is_le
)
606 if(c
->can_decode_fltpt
<0) {
607 init_fltpt_decoder(c
);
609 if(!c
->can_decode_fltpt
) return 0.0;
611 // FIXME: This assumes that the native floating point format is
612 // IEEE 754, but that does not have to be the case.
614 de_memcpy(buf
, m
, 4);
616 if(is_le
!= c
->host_is_le
) {
619 // Reverse order of bytes
621 tmpc
= buf
[i
]; buf
[i
] = buf
[3-i
]; buf
[3-i
] = tmpc
;
625 de_memcpy(&val
, buf
, 4);
629 double dbuf_getfloat32x(dbuf
*f
, i64 pos
, int is_le
)
632 dbuf_read(f
, buf
, pos
, 4);
633 return de_getfloat32x_direct(f
->c
, buf
, is_le
);
636 double de_getfloat64x_direct(deark
*c
, const u8
*m
, int is_le
)
641 if(c
->can_decode_fltpt
<0) {
642 init_fltpt_decoder(c
);
644 if(!c
->can_decode_fltpt
) return 0.0;
646 de_memcpy(buf
, m
, 8);
648 if(is_le
!= c
->host_is_le
) {
651 // Reverse order of bytes
653 tmpc
= buf
[i
]; buf
[i
] = buf
[7-i
]; buf
[7-i
] = tmpc
;
657 de_memcpy(&val
, buf
, 8);
661 double dbuf_getfloat64x(dbuf
*f
, i64 pos
, int is_le
)
664 dbuf_read(f
, buf
, pos
, 8);
665 return de_getfloat64x_direct(f
->c
, buf
, is_le
);
668 int dbuf_read_ascii_number(dbuf
*f
, i64 pos
, i64 fieldsize
,
669 int base
, i64
*value
)
674 if(fieldsize
>(i64
)(sizeof(buf
)-1)) return 0;
676 dbuf_read(f
, (u8
*)buf
, pos
, fieldsize
);
677 buf
[fieldsize
] = '\0';
679 *value
= de_strtoll(buf
, NULL
, base
);
683 de_color
dbuf_getRGB(dbuf
*f
, i64 pos
, unsigned int flags
)
686 dbuf_read(f
, buf
, pos
, 3);
687 if(flags
&DE_GETRGBFLAG_BGR
)
688 return DE_MAKE_RGB(buf
[2], buf
[1], buf
[0]);
689 return DE_MAKE_RGB(buf
[0], buf
[1], buf
[2]);
692 static int copy_cbfn(struct de_bufferedreadctx
*brctx
, const u8
*buf
,
695 dbuf
*outf
= (dbuf
*)brctx
->userdata
;
696 dbuf_write(outf
, buf
, buf_len
);
700 void dbuf_copy(dbuf
*inf
, i64 input_offset
, i64 input_len
, dbuf
*outf
)
704 // Fast paths, if the data to copy is all in memory
707 (input_offset
>=0) && (input_offset
+input_len
<=inf
->cache_bytes_used
))
709 dbuf_write(outf
, &inf
->cache
[input_offset
], input_len
);
713 if(inf
->btype
==DBUF_TYPE_MEMBUF
&&
714 (input_offset
>=0) && (input_offset
+input_len
<=inf
->len
))
716 dbuf_write(outf
, &inf
->membuf_buf
[input_offset
], input_len
);
720 if(input_len
<=(i64
)sizeof(tmpbuf
)) {
721 // Fast path for small sizes
722 dbuf_read(inf
, tmpbuf
, input_offset
, input_len
);
723 dbuf_write(outf
, tmpbuf
, input_len
);
727 dbuf_buffered_read(inf
, input_offset
, input_len
, copy_cbfn
, (void*)outf
);
735 static int copy_at_cbfn(struct de_bufferedreadctx
*brctx
, const u8
*buf
,
738 struct copy_at_ctx
*ctx
= (struct copy_at_ctx
*)brctx
->userdata
;
740 dbuf_write_at(ctx
->outf
, ctx
->outpos
, buf
, buf_len
);
741 ctx
->outpos
+= buf_len
;
745 void dbuf_copy_at(dbuf
*inf
, i64 input_offset
, i64 input_len
,
746 dbuf
*outf
, i64 output_offset
)
748 struct copy_at_ctx ctx
;
751 ctx
.outpos
= output_offset
;
752 dbuf_buffered_read(inf
, input_offset
, input_len
, copy_at_cbfn
, (void*)&ctx
);
755 // An advanced function for reading a string from a file.
756 // The issue is that some strings are both human-readable and machine-readable.
757 // In such a case, we'd like to read some data from a file into a nice printable
758 // ucstring, while also making some or all of the raw bytes available, say for
759 // byte-for-byte string comparisons.
760 // Plus (for NUL-terminated/padded strings), we may need to know the actual length
761 // of the string in the file, so that it can be skipped over, even if we don't
762 // care about the whole string.
763 // Caller is responsible for calling destroy_stringreader() on the returned value.
764 // max_bytes_to_scan: The maximum number of bytes to read from the file.
765 // max_bytes_to_keep: The maximum (or in some cases the exact) number of bytes,
766 // not counting any NUL terminator, to return in ->sz.
767 // The ->str field is a Unicode version of ->sz, so this also affects ->str.
768 // If DE_CONVFLAG_STOP_AT_NUL is not set, it is assumed we are reading a string
769 // of known length, that may have internal NUL bytes. The caller must set
770 // max_bytes_to_scan and max_bytes_to_keep to the same value. The ->sz field will
771 // always be allocated with this many bytes, plus one more for an artificial NUL
773 // If DE_CONVFLAG_WANT_UTF8 is set, then the ->sz_utf8 field will be set to a
774 // UTF-8 version of ->str. This is mainly useful if the original string was
775 // UTF-16. sz_utf8 is not "printable" -- use ucstring_get_printable_sz_n(str) for
777 // ->sz_strlen will equal strlen(->sz) if DE_CONVFLAG_STOP_AT_NUL is set, or
778 // the supplied value of max_bytes_to_(scan|keep) if not.
780 // - DE_CONVFLAG_STOP_AT_NUL
781 // - DE_CONVFLAG_WANT_UTF8
782 struct de_stringreaderdata
*dbuf_read_string(dbuf
*f
, i64 pos
,
783 i64 max_bytes_to_scan
,
784 i64 max_bytes_to_keep
,
785 unsigned int flags
, de_ext_encoding ee
)
788 struct de_stringreaderdata
*srd
;
791 i64 bytes_avail_to_read
;
795 srd
= de_malloc(c
, sizeof(struct de_stringreaderdata
));
796 srd
->str
= ucstring_create(c
);
797 if(max_bytes_to_scan
<0) max_bytes_to_scan
= 0;
798 if(max_bytes_to_keep
<0) max_bytes_to_keep
= 0;
800 bytes_avail_to_read
= max_bytes_to_scan
;
801 if(bytes_avail_to_read
> f
->len
-pos
) {
802 bytes_avail_to_read
= f
->len
-pos
;
804 if(bytes_avail_to_read
<0) bytes_avail_to_read
= 0;
806 srd
->bytes_consumed
= bytes_avail_to_read
; // default
808 // From here on, we can safely bail out ("goto done"). The
809 // de_stringreaderdata struct is sufficiently valid.
811 if(!(flags
&DE_CONVFLAG_STOP_AT_NUL
) &&
812 (max_bytes_to_scan
!= max_bytes_to_keep
))
814 // To reduce possible confusion, we require that
815 // max_bytes_to_scan==max_bytes_to_keep in this case.
816 srd
->sz
= de_malloc(c
, max_bytes_to_keep
+1);
820 if(flags
&DE_CONVFLAG_STOP_AT_NUL
) {
821 ret
= dbuf_search_byte(f
, 0x00, pos
, bytes_avail_to_read
, &foundpos
);
826 // No NUL byte found. Could be an error in some formats, but in
827 // others NUL is used as separator or as padding, not a terminator.
828 foundpos
= pos
+bytes_avail_to_read
;
831 x_strlen
= foundpos
-pos
;
832 srd
->bytes_consumed
= x_strlen
+1;
835 x_strlen
= max_bytes_to_keep
;
836 srd
->bytes_consumed
= x_strlen
;
839 bytes_to_malloc
= x_strlen
+1;
840 if(bytes_to_malloc
>(max_bytes_to_keep
+1)) {
841 bytes_to_malloc
= max_bytes_to_keep
+1;
842 srd
->was_truncated
= 1;
845 srd
->sz
= de_malloc(c
, bytes_to_malloc
);
846 dbuf_read(f
, (u8
*)srd
->sz
, pos
, bytes_to_malloc
-1); // The last byte remains NUL
848 ucstring_append_bytes(srd
->str
, (const u8
*)srd
->sz
, bytes_to_malloc
-1, 0, ee
);
850 if(flags
&DE_CONVFLAG_WANT_UTF8
) {
851 srd
->sz_utf8_strlen
= (size_t)ucstring_count_utf8_bytes(srd
->str
);
852 srd
->sz_utf8
= de_malloc(c
, (i64
)srd
->sz_utf8_strlen
+ 1);
853 ucstring_to_sz(srd
->str
, srd
->sz_utf8
, srd
->sz_utf8_strlen
+ 1, 0, DE_ENCODING_UTF8
);
858 // Always return a valid sz, even on failure.
859 srd
->sz
= de_malloc(c
, 1);
861 if((flags
&DE_CONVFLAG_WANT_UTF8
) && !srd
->sz_utf8
) {
862 // Always return a valid sz_utf8 if it was requested, even on failure.
863 srd
->sz_utf8
= de_malloc(c
, 1);
864 srd
->sz_utf8_strlen
= 0;
866 srd
->sz_strlen
= (size_t)x_strlen
;
870 void de_destroy_stringreaderdata(deark
*c
, struct de_stringreaderdata
*srd
)
874 de_free(c
, srd
->sz_utf8
);
875 ucstring_destroy(srd
->str
);
879 void dbuf_read_to_ucstring_ex(dbuf
*f
, i64 pos1
, i64 len
,
880 de_ucstring
*s
, unsigned int conv_flags
, struct de_encconv_state
*es
)
882 i64 nbytes_remaining
;
885 #define READTOUCSTRING_BUFLEN 256
886 u8 buf
[READTOUCSTRING_BUFLEN
];
888 if(conv_flags
& DE_CONVFLAG_STOP_AT_NUL
) {
890 // We handle STOP_AT_NUL ourselves, so don't pass it on.
891 conv_flags
-= DE_CONVFLAG_STOP_AT_NUL
;
894 // Note: It might be sensible to use dbuf_buffered_read() here, but I've
895 // decided against it for now.
896 nbytes_remaining
= len
;
900 unsigned int conv_flags_to_use_this_time
;
902 // Lack of DE_CONVFLAG_PARTIAL_DATA flag signals end of data, which
903 // isn't necessarily a no-op even with len=0.
904 // That's why we always do this loop at least once.
906 nbytes_to_read
= de_min_int(nbytes_remaining
, READTOUCSTRING_BUFLEN
);
907 dbuf_read(f
, buf
, pos
, nbytes_to_read
);
908 pos
+= nbytes_to_read
;
909 nbytes_in_buf
= nbytes_to_read
;
910 nbytes_remaining
-= nbytes_to_read
;
915 tmpp
= de_memchr(buf
, 0x00, (size_t)nbytes_in_buf
);
917 nbytes_in_buf
= (const u8
*)tmpp
- buf
;
918 nbytes_remaining
= 0;
922 conv_flags_to_use_this_time
= conv_flags
;
923 if(nbytes_remaining
>0) {
924 // The caller may have aleady set this flag, in which case we will use
926 // If not, we still use it for all but the final call to ucstring_append_bytes_ex().
927 conv_flags_to_use_this_time
|= DE_CONVFLAG_PARTIAL_DATA
;
930 ucstring_append_bytes_ex(s
, buf
, nbytes_in_buf
, conv_flags_to_use_this_time
, es
);
931 } while(nbytes_remaining
>0);
935 // Read (up to) len bytes from f, translate them to characters, and append
937 void dbuf_read_to_ucstring(dbuf
*f
, i64 pos
, i64 len
,
938 de_ucstring
*s
, unsigned int conv_flags
, de_ext_encoding ee
)
940 struct de_encconv_state es
;
942 de_encconv_init(&es
, ee
);
943 dbuf_read_to_ucstring_ex(f
, pos
, len
, s
, conv_flags
, &es
);
946 void dbuf_read_to_ucstring_n(dbuf
*f
, i64 pos
, i64 len
, i64 max_len
,
947 de_ucstring
*s
, unsigned int conv_flags
, de_ext_encoding ee
)
949 struct de_encconv_state es
;
951 if(len
>max_len
) len
= max_len
;
952 de_encconv_init(&es
, ee
);
953 dbuf_read_to_ucstring_ex(f
, pos
, len
, s
, conv_flags
, &es
);
956 static int dbufmemcmp_cbfn(struct de_bufferedreadctx
*brctx
, const u8
*buf
,
959 // Return 0 if there is a mismatch.
960 return !de_memcmp(buf
,
961 &(((const u8
*)brctx
->userdata
)[brctx
->offset
]),
965 int dbuf_memcmp(dbuf
*f
, i64 pos
, const void *s
, size_t n
)
971 pos
+ (i64
)n
<= f
->cache_bytes_used
)
973 // Fastest path: Compare directly to cache.
974 return de_memcmp(s
, &f
->cache
[pos
], n
);
977 if(n
<=sizeof(buf1
)) {
978 // Use a stack buffer if small enough.
979 dbuf_read(f
, buf1
, pos
, n
);
980 return de_memcmp(buf1
, s
, n
);
984 return !dbuf_buffered_read(f
, pos
, n
, dbufmemcmp_cbfn
, (void*)s
);
987 int dbuf_create_file_from_slice(dbuf
*inf
, i64 pos
, i64 data_size
,
988 const char *ext
, de_finfo
*fi
, unsigned int createflags
)
991 f
= dbuf_create_output_file(inf
->c
, ext
, fi
, createflags
);
993 dbuf_copy(inf
, pos
, data_size
, f
);
998 static void finfo_shallow_copy(deark
*c
, de_finfo
*src
, de_finfo
*dst
)
1002 dst
->is_directory
= src
->is_directory
;
1003 dst
->mode_flags
= src
->mode_flags
;
1004 for(k
=0; k
<DE_TIMESTAMPIDX_COUNT
; k
++) {
1005 dst
->timestamp
[k
] = src
->timestamp
[k
];
1007 dst
->internal_mod_time
= src
->internal_mod_time
;
1008 dst
->density
= src
->density
;
1009 dst
->has_hotspot
= src
->has_hotspot
;
1010 dst
->hotspot_x
= src
->hotspot_x
;
1011 dst
->hotspot_y
= src
->hotspot_y
;
1014 static dbuf
*create_dbuf_lowlevel(deark
*c
)
1018 f
= de_malloc(c
, sizeof(dbuf
));
1020 f
->cache2_pos
= -1; // Any offset outside the bounds of the file will do.
1024 // Create or open a file for writing, that is *not* one of the usual
1025 // "output.000.ext" files we extract from the input file.
1027 // overwrite_mode, flags: Same as for de_fopen_for_write().
1029 // On failure, prints an error message, and sets f->btype to DBUF_TYPE_NULL.
1030 dbuf
*dbuf_create_unmanaged_file(deark
*c
, const char *fname
, int overwrite_mode
,
1036 f
= create_dbuf_lowlevel(c
);
1038 f
->name
= de_strdup(c
, fname
);
1040 f
->btype
= DBUF_TYPE_OFILE
;
1041 f
->max_len_hard
= c
->max_output_file_size
;
1042 f
->fp
= de_fopen_for_write(c
, f
->name
, msgbuf
, sizeof(msgbuf
),
1043 c
->overwrite_mode
, flags
);
1046 de_err(c
, "Failed to write %s: %s", f
->name
, msgbuf
);
1047 f
->btype
= DBUF_TYPE_NULL
;
1048 c
->serious_error_flag
= 1;
1054 dbuf
*dbuf_create_unmanaged_file_stdout(deark
*c
, const char *name
)
1058 f
= create_dbuf_lowlevel(c
);
1060 f
->name
= de_strdup(c
, name
);
1061 f
->btype
= DBUF_TYPE_STDOUT
;
1062 f
->max_len_hard
= c
->max_output_file_size
;
1067 static void sanitize_ext(const char *ext1
, char *ext
, size_t extlen
)
1071 de_strlcpy(ext
, ext1
, extlen
);
1072 // This part of the filename should come from Deark, and should only
1073 // use a limited set of characters. Just to be sure:
1074 for(k
=0; ext
[k
]; k
++) {
1075 if((ext
[k
]>='0' && ext
[k
]<='9') ||
1076 (ext
[k
]>='A' && ext
[k
]<='Z') ||
1077 (ext
[k
]>='a' && ext
[k
]<='z') ||
1078 ext
[k
]=='.' || ext
[k
]=='_' || ext
[k
]=='-' || ext
[k
]=='+')
1088 dbuf
*dbuf_create_output_file(deark
*c
, const char *ext1
, de_finfo
*fi
,
1089 unsigned int createflags
)
1098 u8 is_directory
= 0;
1099 char *name_from_finfo
= NULL
;
1100 i64 name_from_finfo_len
= 0;
1104 sanitize_ext(ext1
, ext
, sizeof(ext
));
1111 if(have_ext
&& fi
&& fi
->original_filename_flag
) {
1112 de_dbg(c
, "[internal warning: Incorrect use of create_output_file]");
1115 f
= create_dbuf_lowlevel(c
);
1116 f
->max_len_hard
= c
->max_output_file_size
;
1119 if(fi
&& fi
->is_directory
) {
1123 if(is_directory
&& !c
->keep_dir_entries
) {
1124 de_dbg(c
, "skipping 'directory' file");
1125 f
->btype
= DBUF_TYPE_NULL
;
1129 if(c
->extract_policy
==DE_EXTRACTPOLICY_MAINONLY
) {
1130 if(createflags
&DE_CREATEFLAG_IS_AUX
) {
1131 de_dbg(c
, "skipping 'auxiliary' file");
1132 f
->btype
= DBUF_TYPE_NULL
;
1136 else if(c
->extract_policy
==DE_EXTRACTPOLICY_AUXONLY
) {
1137 if(!(createflags
&DE_CREATEFLAG_IS_AUX
)) {
1138 de_dbg(c
, "skipping 'main' file");
1139 f
->btype
= DBUF_TYPE_NULL
;
1144 file_index
= c
->file_count
;
1147 basefn
= c
->base_output_filename
? c
->base_output_filename
: "output";
1149 if(fi
&& ucstring_isnonempty(fi
->file_name_internal
)) {
1150 name_from_finfo_len
= 1 + ucstring_count_utf8_bytes(fi
->file_name_internal
);
1151 name_from_finfo
= de_malloc(c
, name_from_finfo_len
);
1152 ucstring_to_sz(fi
->file_name_internal
, name_from_finfo
, (size_t)name_from_finfo_len
, 0,
1156 if(c
->output_style
==DE_OUTPUTSTYLE_ARCHIVE
&& !c
->base_output_filename
&&
1157 fi
&& fi
->is_directory
&&
1158 (fi
->is_root_dir
|| (fi
->detect_root_dot_dir
&& fi
->orig_name_was_dot
)))
1160 de_strlcpy(nbuf
, ".", sizeof(nbuf
));
1162 else if(c
->output_style
==DE_OUTPUTSTYLE_ARCHIVE
&& !c
->base_output_filename
&&
1163 fi
&& fi
->original_filename_flag
&& name_from_finfo
)
1165 // TODO: This is a "temporary" hack to allow us to, when both reading from
1166 // and writing to an archive format, use some semblance of the correct
1167 // filename (instead of "output.xxx.yyy").
1168 // There are some things that we don't handle optimally, such as
1170 // A major redesign of the file naming logic would be good.
1171 de_strlcpy(nbuf
, name_from_finfo
, sizeof(nbuf
));
1174 char fn_suffix
[256];
1176 if(have_ext
&& name_from_finfo
) {
1177 de_snprintf(fn_suffix
, sizeof(fn_suffix
), "%s.%s", name_from_finfo
, ext
);
1180 de_strlcpy(fn_suffix
, ext
, sizeof(fn_suffix
));
1182 else if(is_directory
&& name_from_finfo
) {
1183 de_snprintf(fn_suffix
, sizeof(fn_suffix
), "%s.dir", name_from_finfo
);
1185 else if(name_from_finfo
) {
1186 de_strlcpy(fn_suffix
, name_from_finfo
, sizeof(fn_suffix
));
1188 else if(is_directory
) {
1189 de_strlcpy(fn_suffix
, "dir", sizeof(fn_suffix
));
1192 de_strlcpy(fn_suffix
, "bin", sizeof(fn_suffix
));
1195 de_snprintf(nbuf
, sizeof(nbuf
), "%s.%03d.%s", basefn
, file_index
, fn_suffix
);
1198 f
->name
= de_strdup(c
, nbuf
);
1201 // The finfo object passed to us at file creation is not required to
1202 // remain valid, so make a copy of anything in it that we might need
1204 f
->fi_copy
= de_finfo_create(c
);
1205 finfo_shallow_copy(c
, fi
, f
->fi_copy
);
1207 // Here's where we respect the -intz option, by using it to convert to
1208 // UTC in some cases.
1209 if(f
->fi_copy
->timestamp
[DE_TIMESTAMPIDX_MODIFY
].is_valid
&& f
->fi_copy
->timestamp
[DE_TIMESTAMPIDX_MODIFY
].tzcode
==DE_TZCODE_LOCAL
&&
1210 c
->input_tz_offs_seconds
!=0)
1212 de_timestamp_cvt_to_utc(&f
->fi_copy
->timestamp
[DE_TIMESTAMPIDX_MODIFY
], -c
->input_tz_offs_seconds
);
1215 if(f
->fi_copy
->internal_mod_time
.is_valid
&& f
->fi_copy
->internal_mod_time
.tzcode
==DE_TZCODE_LOCAL
&&
1216 c
->input_tz_offs_seconds
!=0)
1218 de_timestamp_cvt_to_utc(&f
->fi_copy
->internal_mod_time
, -c
->input_tz_offs_seconds
);
1222 if(file_index
< c
->first_output_file
) {
1223 f
->btype
= DBUF_TYPE_NULL
;
1227 if(c
->max_output_files
>=0 &&
1228 file_index
>= c
->first_output_file
+ c
->max_output_files
)
1230 f
->btype
= DBUF_TYPE_NULL
;
1234 c
->num_files_extracted
++;
1236 if(c
->extrlist_dbuf
) {
1237 dbuf_printf(c
->extrlist_dbuf
, "%s\n", f
->name
);
1238 dbuf_flush(c
->extrlist_dbuf
);
1242 f
->btype
= DBUF_TYPE_NULL
;
1243 if(c
->list_mode_include_file_id
) {
1244 de_msg(c
, "%d:%s", file_index
, f
->name
);
1247 de_msg(c
, "%s", f
->name
);
1252 if(c
->output_style
==DE_OUTPUTSTYLE_ARCHIVE
&& c
->archive_fmt
==DE_ARCHIVEFMT_TAR
) {
1253 de_info(c
, "Adding %s to TAR file", f
->name
);
1254 f
->btype
= DBUF_TYPE_ODBUF
;
1255 // A dummy max_len_hard value. The parent will do the checking.
1256 f
->max_len_hard
= DE_DUMMY_MAX_FILE_SIZE
;
1257 f
->writing_to_tar_archive
= 1;
1258 de_tar_start_member_file(c
, f
);
1260 else if(c
->output_style
==DE_OUTPUTSTYLE_ARCHIVE
) { // ZIP
1262 de_info(c
, "Adding %s to ZIP file", f
->name
);
1263 f
->btype
= DBUF_TYPE_MEMBUF
;
1264 f
->max_len_hard
= DE_MAX_MEMBUF_SIZE
;
1266 // A directory entry is not expected to have any data associated
1267 // with it (besides the files it contains).
1271 initial_alloc
= 65536;
1273 f
->membuf_buf
= de_malloc(c
, initial_alloc
);
1274 f
->membuf_alloc
= initial_alloc
;
1275 f
->write_memfile_to_zip_archive
= 1;
1277 else if(c
->output_style
==DE_OUTPUTSTYLE_STDOUT
) {
1278 de_info(c
, "Writing %s to [stdout]", f
->name
);
1279 f
->btype
= DBUF_TYPE_STDOUT
;
1280 // TODO: Should we increase f->max_len_hard?
1284 de_info(c
, "Writing %s", f
->name
);
1285 f
->btype
= DBUF_TYPE_OFILE
;
1286 f
->fp
= de_fopen_for_write(c
, f
->name
, msgbuf
, sizeof(msgbuf
),
1287 c
->overwrite_mode
, 0);
1290 de_err(c
, "Failed to write %s: %s", f
->name
, msgbuf
);
1291 f
->btype
= DBUF_TYPE_NULL
;
1292 c
->serious_error_flag
= 1;
1297 de_free(c
, name_from_finfo
);
1301 static void do_on_dbuf_size_exceeded(dbuf
*f
)
1303 de_err(f
->c
, "Maximum %s size of %"I64_FMT
" bytes exceeded",
1304 (f
->btype
==DBUF_TYPE_MEMBUF
)?"membuf":"output file",
1306 de_fatalerror(f
->c
);
1309 dbuf
*dbuf_create_membuf(deark
*c
, i64 initialsize
, unsigned int flags
)
1313 f
= create_dbuf_lowlevel(c
);
1314 f
->btype
= DBUF_TYPE_MEMBUF
;
1315 f
->max_len_hard
= DE_MAX_MEMBUF_SIZE
;
1318 if(initialsize
> f
->max_len_hard
) {
1319 do_on_dbuf_size_exceeded(f
);
1321 f
->membuf_buf
= de_malloc(c
, initialsize
);
1322 f
->membuf_alloc
= initialsize
;
1326 dbuf_set_length_limit(f
, initialsize
);
1332 static void membuf_append(dbuf
*f
, const u8
*m
, i64 mlen
)
1336 if(f
->has_len_limit
) {
1337 if(f
->len
+ mlen
> f
->len_limit
) {
1338 mlen
= f
->len_limit
- f
->len
;
1344 if(mlen
> f
->membuf_alloc
- f
->len
) {
1345 // Need to allocate more space
1346 new_alloc_size
= (f
->membuf_alloc
+ mlen
)*2;
1347 if(new_alloc_size
<1024) new_alloc_size
=1024;
1348 if(new_alloc_size
> f
->max_len_hard
) new_alloc_size
= f
->max_len_hard
;
1349 de_dbg3(f
->c
, "increasing membuf size %"I64_FMT
" -> %"I64_FMT
,
1350 f
->membuf_alloc
, new_alloc_size
);
1351 if(f
->len
+ mlen
> f
->max_len_hard
) {
1352 do_on_dbuf_size_exceeded(f
);
1354 f
->membuf_buf
= de_realloc(f
->c
, f
->membuf_buf
, f
->membuf_alloc
, new_alloc_size
);
1355 f
->membuf_alloc
= new_alloc_size
;
1358 de_memcpy(&f
->membuf_buf
[f
->len
], m
, (size_t)mlen
);
1362 void dbuf_write(dbuf
*f
, const u8
*m
, i64 len
)
1365 if(f
->len
+ len
> f
->max_len_hard
) {
1366 do_on_dbuf_size_exceeded(f
);
1369 if(f
->writelistener_cb
) {
1370 // Note that the callback function can be changed at any time, so if we
1371 // ever decide to buffer these calls, precautions will be needed.
1372 f
->writelistener_cb(f
, f
->userdata_for_writelistener
, m
, len
);
1376 case DBUF_TYPE_OFILE
:
1377 case DBUF_TYPE_STDOUT
:
1379 if(f
->c
->debug_level
>=3) {
1380 de_dbg3(f
->c
, "writing %"I64_FMT
" bytes to %s", len
, f
->name
);
1382 fwrite(m
, 1, (size_t)len
, f
->fp
);
1385 case DBUF_TYPE_MEMBUF
:
1386 if(f
->c
->debug_level
>=3 && f
->name
) {
1387 de_dbg3(f
->c
, "appending %"I64_FMT
" bytes to membuf %s", len
, f
->name
);
1389 membuf_append(f
, m
, len
);
1391 case DBUF_TYPE_ODBUF
:
1392 dbuf_write(f
->parent_dbuf
, m
, len
);
1395 case DBUF_TYPE_CUSTOM
:
1396 if(f
->customwrite_fn
) {
1397 f
->customwrite_fn(f
, f
->userdata_for_customwrite
, m
, len
);
1401 case DBUF_TYPE_NULL
:
1406 de_internal_err_fatal(f
->c
, "Invalid output file type (%d)", f
->btype
);
1409 void dbuf_writebyte(dbuf
*f
, u8 n
)
1411 dbuf_write(f
, &n
, 1);
1414 // Allowed only for membufs, and unmanaged output files.
1415 // For unmanaged output files, must be used with care, and should not be
1416 // mixed with dbuf_write().
1417 void dbuf_write_at(dbuf
*f
, i64 pos
, const u8
*m
, i64 len
)
1419 if(len
<1 || pos
<0) return;
1421 if(pos
+ len
> f
->max_len_hard
) {
1422 do_on_dbuf_size_exceeded(f
);
1425 if(f
->btype
==DBUF_TYPE_MEMBUF
) {
1426 i64 amt_overwrite
, amt_newzeroes
, amt_append
;
1428 if(pos
+len
<= f
->len
) { // entirely within the current file
1429 amt_overwrite
= len
;
1433 else if(pos
>= f
->len
) { // starts after the end of the current file
1435 amt_newzeroes
= pos
- f
->len
;
1438 else { // overlaps the end of the current file
1439 amt_overwrite
= f
->len
- pos
;
1441 amt_append
= len
- amt_overwrite
;
1444 if(amt_overwrite
>0) {
1445 de_memcpy(&f
->membuf_buf
[pos
], m
, (size_t)amt_overwrite
);
1447 if(amt_newzeroes
>0) {
1448 dbuf_write_zeroes(f
, amt_newzeroes
);
1452 membuf_append(f
, &m
[amt_overwrite
], amt_append
);
1455 else if(f
->btype
==DBUF_TYPE_OFILE
&& !f
->is_managed
) {
1456 i64 curpos
= de_ftell(f
->fp
);
1458 de_fseek(f
->fp
, pos
, SEEK_SET
);
1460 fwrite(m
, 1, (size_t)len
, f
->fp
);
1461 if(pos
+len
> f
->len
) {
1465 else if(f
->btype
==DBUF_TYPE_NULL
) {
1466 if(pos
+len
> f
->len
) {
1471 de_internal_err_fatal(f
->c
, "Attempt to seek on non-seekable stream");
1475 void dbuf_writebyte_at(dbuf
*f
, i64 pos
, u8 n
)
1477 if(f
->btype
==DBUF_TYPE_MEMBUF
&& pos
>=0 && pos
<f
->len
) {
1478 // Fast path when overwriting a byte in a membuf
1479 f
->membuf_buf
[pos
] = n
;
1483 dbuf_write_at(f
, pos
, &n
, 1);
1486 void dbuf_write_run(dbuf
*f
, u8 n
, i64 len
)
1492 de_memset(buf
, n
, (size_t)len
<sizeof(buf
) ? (size_t)len
: sizeof(buf
));
1494 while(amt_left
> 0) {
1495 if((size_t)amt_left
<sizeof(buf
))
1496 amt_to_write
= amt_left
;
1498 amt_to_write
= sizeof(buf
);
1499 dbuf_write(f
, buf
, amt_to_write
);
1500 amt_left
-= amt_to_write
;
1504 void dbuf_write_zeroes(dbuf
*f
, i64 len
)
1506 dbuf_write_run(f
, 0, len
);
1509 // Make the membuf have exactly len bytes of content.
1510 void dbuf_truncate(dbuf
*f
, i64 desired_len
)
1512 if(desired_len
<0) desired_len
=0;
1513 if(desired_len
>f
->len
) {
1514 dbuf_write_zeroes(f
, desired_len
- f
->len
);
1516 else if(desired_len
<f
->len
) {
1517 if(f
->btype
==DBUF_TYPE_MEMBUF
) {
1518 f
->len
= desired_len
;
1523 void de_writeu16le_direct(u8
*m
, i64 n
)
1525 m
[0] = (u8
)(n
& 0x00ff);
1526 m
[1] = (u8
)((n
& 0xff00)>>8);
1529 void de_writeu16be_direct(u8
*m
, i64 n
)
1531 m
[0] = (u8
)((n
& 0xff00)>>8);
1532 m
[1] = (u8
)(n
& 0x00ff);
1535 void dbuf_writeu16le(dbuf
*f
, i64 n
)
1538 de_writeu16le_direct(buf
, n
);
1539 dbuf_write(f
, buf
, 2);
1542 void dbuf_writeu16be(dbuf
*f
, i64 n
)
1545 de_writeu16be_direct(buf
, n
);
1546 dbuf_write(f
, buf
, 2);
1549 void dbuf_writei16le(dbuf
*f
, i64 n
)
1552 dbuf_writeu16le(f
, n
+65536);
1555 dbuf_writeu16le(f
, n
);
1559 void dbuf_writei16be(dbuf
*f
, i64 n
)
1562 dbuf_writeu16be(f
, n
+65536);
1565 dbuf_writeu16be(f
, n
);
1569 void de_writeu32be_direct(u8
*m
, i64 n
)
1571 m
[0] = (u8
)((n
& 0xff000000)>>24);
1572 m
[1] = (u8
)((n
& 0x00ff0000)>>16);
1573 m
[2] = (u8
)((n
& 0x0000ff00)>>8);
1574 m
[3] = (u8
)(n
& 0x000000ff);
1577 void dbuf_writeu32be(dbuf
*f
, i64 n
)
1580 de_writeu32be_direct(buf
, n
);
1581 dbuf_write(f
, buf
, 4);
1584 void de_writeu32le_direct(u8
*m
, i64 n
)
1586 m
[0] = (u8
)(n
& 0x000000ff);
1587 m
[1] = (u8
)((n
& 0x0000ff00)>>8);
1588 m
[2] = (u8
)((n
& 0x00ff0000)>>16);
1589 m
[3] = (u8
)((n
& 0xff000000)>>24);
1592 void dbuf_writeu32le(dbuf
*f
, i64 n
)
1595 de_writeu32le_direct(buf
, n
);
1596 dbuf_write(f
, buf
, 4);
1599 void dbuf_writei32le(dbuf
*f
, i64 n
)
1602 dbuf_writeu32le(f
, n
+0x100000000LL
);
1605 dbuf_writeu32le(f
, n
);
1608 void dbuf_writei32be(dbuf
*f
, i64 n
)
1611 dbuf_writeu32be(f
, n
+0x100000000LL
);
1614 dbuf_writeu32be(f
, n
);
1618 void de_writeu64le_direct(u8
*m
, u64 n
)
1620 de_writeu32le_direct(&m
[0], (i64
)(u32
)(n
&0xffffffffULL
));
1621 de_writeu32le_direct(&m
[4], (i64
)(u32
)(n
>>32));
1624 void dbuf_writeu64le(dbuf
*f
, u64 n
)
1627 de_writeu64le_direct(buf
, n
);
1628 dbuf_write(f
, buf
, 8);
1631 void dbuf_puts(dbuf
*f
, const char *sz
)
1633 dbuf_write(f
, (const u8
*)sz
, (i64
)de_strlen(sz
));
1636 // TODO: Remove the buffer size limitation?
1637 void dbuf_printf(dbuf
*f
, const char *fmt
, ...)
1643 de_vsnprintf(buf
, sizeof(buf
), fmt
, ap
);
1649 void dbuf_flush(dbuf
*f
)
1651 if(f
->btype
==DBUF_TYPE_OFILE
) {
1656 dbuf
*dbuf_open_input_file(deark
*c
, const char *fn
)
1659 unsigned int returned_flags
= 0;
1663 c
->serious_error_flag
= 1;
1666 f
= create_dbuf_lowlevel(c
);
1667 f
->btype
= DBUF_TYPE_IFILE
;
1668 f
->cache_policy
= DE_CACHE_POLICY_ENABLED
;
1670 f
->fp
= de_fopen_for_read(c
, fn
, &f
->len
, msgbuf
, sizeof(msgbuf
), &returned_flags
);
1673 de_err(c
, "Can't read %s: %s", fn
, msgbuf
);
1675 c
->serious_error_flag
= 1;
1679 if(returned_flags
& 0x1) {
1680 // This "file" is actually a pipe.
1681 f
->btype
= DBUF_TYPE_FIFO
;
1682 f
->cache_policy
= DE_CACHE_POLICY_NONE
;
1683 populate_cache_from_pipe(f
);
1686 if(!f
->cache
&& f
->cache_policy
==DE_CACHE_POLICY_ENABLED
) {
1693 dbuf
*dbuf_open_input_stdin(deark
*c
)
1697 f
= create_dbuf_lowlevel(c
);
1698 f
->btype
= DBUF_TYPE_STDIN
;
1700 // Set to NONE, to make sure we don't try to auto-populate the cache later.
1701 f
->cache_policy
= DE_CACHE_POLICY_NONE
;
1703 populate_cache_from_pipe(f
);
1708 dbuf
*dbuf_open_input_subfile(dbuf
*parent
, i64 offset
, i64 size
)
1714 f
= create_dbuf_lowlevel(c
);
1715 f
->btype
= DBUF_TYPE_IDBUF
;
1716 f
->parent_dbuf
= parent
;
1717 f
->offset_into_parent_dbuf
= offset
;
1722 dbuf
*dbuf_create_custom_dbuf(deark
*c
, i64 apparent_size
, unsigned int flags
)
1726 f
= create_dbuf_lowlevel(c
);
1727 f
->btype
= DBUF_TYPE_CUSTOM
;
1728 f
->len
= apparent_size
;
1729 f
->max_len_hard
= DE_DUMMY_MAX_FILE_SIZE
;
1733 void dbuf_set_writelistener(dbuf
*f
, de_writelistener_cb_type fn
, void *userdata
)
1735 f
->userdata_for_writelistener
= userdata
;
1736 f
->writelistener_cb
= fn
;
1739 void dbuf_close(dbuf
*f
)
1745 if(f
->btype
==DBUF_TYPE_OFILE
|| f
->btype
==DBUF_TYPE_STDOUT
) {
1746 c
->total_output_size
+= f
->len
;
1749 if(f
->btype
==DBUF_TYPE_MEMBUF
&& f
->write_memfile_to_zip_archive
) {
1750 de_zip_add_file_to_archive(c
, f
);
1752 de_dbg3(c
, "closing memfile %s", f
->name
);
1755 else if(f
->writing_to_tar_archive
) {
1756 de_tar_end_member_file(c
, f
);
1760 case DBUF_TYPE_IFILE
:
1761 case DBUF_TYPE_OFILE
:
1763 de_dbg3(c
, "closing file %s", f
->name
);
1768 if(f
->btype
==DBUF_TYPE_OFILE
&& f
->is_managed
) {
1769 de_update_file_attribs(f
, c
->preserve_file_times
);
1772 case DBUF_TYPE_FIFO
:
1776 case DBUF_TYPE_STDOUT
:
1777 if(f
->name
&& f
->is_managed
) {
1778 de_dbg3(c
, "finished writing %s to stdout", f
->name
);
1780 else if(!f
->is_managed
) {
1781 de_dbg3(c
, "finished writing %s", f
->name
);
1785 case DBUF_TYPE_MEMBUF
:
1786 case DBUF_TYPE_IDBUF
:
1787 case DBUF_TYPE_ODBUF
:
1788 case DBUF_TYPE_STDIN
:
1789 case DBUF_TYPE_CUSTOM
:
1790 case DBUF_TYPE_NULL
:
1793 de_internal_err_nonfatal(c
, "Don't know how to close this type of file (%d)", f
->btype
);
1796 de_free(c
, f
->membuf_buf
);
1797 de_free(c
, f
->name
);
1798 de_free(c
, f
->cache
);
1799 if(f
->fi_copy
) de_finfo_destroy(c
, f
->fi_copy
);
1802 if(c
->total_output_size
> c
->max_total_output_size
) {
1803 // FIXME: Since we only do this check when a file is closed, it can
1804 // potentially be subverted in the (rare) case that Deark has multiple
1805 // output files open simultanously.
1806 de_err(c
, "Maximum total output size of %"I64_FMT
" bytes exceeded",
1807 c
->max_total_output_size
);
1812 void dbuf_empty(dbuf
*f
)
1814 if(f
->btype
== DBUF_TYPE_MEMBUF
) {
1819 // Provides direct (presumably read-only) access to the memory in a membuf.
1820 // Use with care: The memory is still owned by the dbuf.
1821 // Note: Another, arguably safer, way to do this is to use dbuf_buffered_read().
1822 const u8
*dbuf_get_membuf_direct_ptr(dbuf
*f
)
1824 if(f
->btype
!= DBUF_TYPE_MEMBUF
) return NULL
;
1825 return f
->membuf_buf
;
1828 // Search a section of a dbuf for a given byte.
1829 // 'haystack_len' is the number of bytes to search.
1830 // Returns 0 if not found.
1831 // If found, sets *foundpos to the position in the file where it was found
1832 // (not relative to startpos).
1833 int dbuf_search_byte(dbuf
*f
, const u8 b
, i64 startpos
,
1834 i64 haystack_len
, i64
*foundpos
)
1838 for(i
=0; i
<haystack_len
; i
++) {
1839 if(b
== dbuf_getbyte(f
, startpos
+i
)) {
1840 *foundpos
= startpos
+i
;
1854 static int search_cbfn(struct de_bufferedreadctx
*brctx
, const u8
*buf
,
1857 struct search_ctx
*sctx
= (struct search_ctx
*)brctx
->userdata
;
1859 i64 num_starting_positions_to_check
;
1861 if(buf_len
< sctx
->needle_len
) return 0;
1862 num_starting_positions_to_check
= buf_len
+ 1 - sctx
->needle_len
;
1864 for(i
=0; i
<num_starting_positions_to_check
; i
++) {
1865 if(sctx
->needle
[0]==buf
[i
] &&
1866 !de_memcmp(sctx
->needle
, &buf
[i
], (size_t)sctx
->needle_len
))
1868 sctx
->foundpos_rel
= brctx
->offset
+i
;
1869 sctx
->foundflag
= 1;
1874 if(brctx
->eof_flag
) return 0;
1875 brctx
->bytes_consumed
= num_starting_positions_to_check
;
1879 // Search a section of a dbuf for a given byte sequence.
1881 // This function is inefficient, but it's good enough for Deark's needs.
1882 // Maximum 'needle_len' is DE_BUFFERED_READ_MIN_BLKSIZE bytes, but it's expected to
1883 // be quite short. If it gets close to the maximum, the search could get very
1886 // 'haystack_len' is the number of bytes to search in (the sequence must be completely
1887 // within that range, not just start there).
1888 // Returns 0 if not found.
1889 // If found, sets *foundpos to the position in the file where it was found
1890 // (not relative to startpos).
1891 int dbuf_search(dbuf
*f
, const u8
*needle
, i64 needle_len
,
1892 i64 startpos
, i64 haystack_len
, i64
*foundpos
)
1895 struct search_ctx sctx
;
1900 haystack_len
+= startpos
;
1901 if(haystack_len
< 0) {
1906 if(startpos
> f
->len
) {
1909 if(haystack_len
> f
->len
- startpos
) {
1910 haystack_len
= f
->len
- startpos
;
1912 if(needle_len
> haystack_len
) {
1915 if(needle_len
> DE_BUFFERED_READ_MIN_BLKSIZE
) {
1920 *foundpos
= startpos
;
1924 de_zeromem(&sctx
, sizeof(struct search_ctx
));
1925 sctx
.needle
= needle
;
1926 sctx
.needle_len
= needle_len
;
1927 (void)dbuf_buffered_read(f
, startpos
, haystack_len
, search_cbfn
, (void*)&sctx
);
1928 if(sctx
.foundflag
) {
1929 *foundpos
= startpos
+ sctx
.foundpos_rel
;
1937 // Search for the aligned pair of 0x00 bytes that marks the end of a UTF-16 string.
1938 // Endianness doesn't matter, because we're only looking for 0x00 0x00.
1939 // The returned 'bytes_consumed' is in bytes, and includes the 2 bytes for the NUL
1941 // Returns 0 if the NUL is not found, in which case *bytes_consumed is not
1943 int dbuf_get_utf16_NULterm_len(dbuf
*f
, i64 pos1
, i64 bytes_avail
,
1944 i64
*bytes_consumed
)
1949 *bytes_consumed
= bytes_avail
;
1951 if(pos1
+bytes_avail
-pos
< 2) {
1954 x
= dbuf_getu16le(f
, pos
);
1957 *bytes_consumed
= pos
- pos1
;
1964 int dbuf_find_line(dbuf
*f
, i64 pos1
, i64
*pcontent_len
, i64
*ptotal_len
)
1973 if(pos1
<0 || pos1
>=f
->len
) {
1987 b0
= dbuf_getbyte(f
, pos
);
1991 // Look ahead at the next byte.
1992 b1
= dbuf_getbyte(f
, pos
+1);
2012 *pcontent_len
= eol_pos
- pos1
;
2013 *ptotal_len
= *pcontent_len
+ eol_size
;
2015 return (*ptotal_len
> 0);
2018 // Enforce a maximum size when writing to a dbuf.
2019 // Attempting to write more than this is a silent no-op.
2020 // May be valid only for memory buffers.
2021 void dbuf_set_length_limit(dbuf
*f
, i64 max_len
)
2023 f
->has_len_limit
= 1;
2024 f
->len_limit
= max_len
;
2027 int dbuf_has_utf8_bom(dbuf
*f
, i64 pos
)
2029 return !dbuf_memcmp(f
, pos
, "\xef\xbb\xbf", 3);
2032 // Write the contents of a dbuf to a file.
2033 // This function intended for use in development/debugging.
2034 int dbuf_dump_to_file(dbuf
*inf
, const char *fn
)
2039 outf
= dbuf_create_unmanaged_file(c
, fn
, DE_OVERWRITEMODE_STANDARD
, 0);
2040 dbuf_copy(inf
, 0, inf
->len
, outf
);
2045 static void reverse_fourcc(u8
*buf
, int nbytes
)
2049 for(k
=0; k
<((size_t)nbytes
)/2; k
++) {
2052 buf
[k
] = buf
[(size_t)nbytes
-1-k
];
2053 buf
[(size_t)nbytes
-1-k
] = tmpc
;
2057 // Though we call it a "fourcc", we support 'nbytes' from 1 to 4.
2058 void dbuf_read_fourcc(dbuf
*f
, i64 pos
, struct de_fourcc
*fcc
,
2059 int nbytes
, unsigned int flags
)
2061 if(nbytes
<1 || nbytes
>4) return;
2063 de_zeromem(fcc
->bytes
, 4);
2064 dbuf_read(f
, fcc
->bytes
, pos
, (i64
)nbytes
);
2065 if(flags
&DE_4CCFLAG_REVERSED
) {
2066 reverse_fourcc(fcc
->bytes
, nbytes
);
2069 fcc
->id
= (u32
)de_getu32be_direct(fcc
->bytes
);
2071 fcc
->id
>>= (4-(unsigned int)nbytes
)*8;
2074 de_bytes_to_printable_sz(fcc
->bytes
, (i64
)nbytes
,
2075 fcc
->id_sanitized_sz
, sizeof(fcc
->id_sanitized_sz
),
2076 0, DE_ENCODING_ASCII
);
2077 de_bytes_to_printable_sz(fcc
->bytes
, (i64
)nbytes
,
2078 fcc
->id_dbgstr
, sizeof(fcc
->id_dbgstr
),
2079 DE_CONVFLAG_ALLOW_HL
, DE_ENCODING_ASCII
);
2082 static int buffered_read_internal(struct de_bufferedreadctx
*brctx
,
2083 dbuf
*f
, i64 pos1
, i64 len
, de_buffered_read_cbfn cbfn
)
2086 i64 pos
= pos1
; // Absolute pos of next byte to read from f
2087 i64 offs_of_first_byte_in_buf
; // Relative to pos1, where in f is buf[0]?
2088 i64 num_unconsumed_bytes_in_buf
;
2089 #define BRBUFLEN 4096 // Must be >= DE_BUFFERED_READ_MIN_BLKSIZE
2092 num_unconsumed_bytes_in_buf
= 0;
2093 offs_of_first_byte_in_buf
= 0;
2096 i64 nbytes_avail_to_read
;
2100 nbytes_avail_to_read
= pos1
+len
-pos
;
2101 if(nbytes_avail_to_read
<1 && num_unconsumed_bytes_in_buf
<1) {
2105 // max bytes that will fit in buf:
2106 bytestoread
= BRBUFLEN
-num_unconsumed_bytes_in_buf
;
2108 // max bytes available to read:
2109 if(bytestoread
>= nbytes_avail_to_read
) {
2110 bytestoread
= nbytes_avail_to_read
;
2111 brctx
->eof_flag
= 1;
2114 brctx
->eof_flag
= 0;
2117 dbuf_read(f
, &buf
[num_unconsumed_bytes_in_buf
], pos
, bytestoread
);
2119 num_unconsumed_bytes_in_buf
+= bytestoread
;
2121 brctx
->offset
= offs_of_first_byte_in_buf
;
2122 brctx
->bytes_consumed
= num_unconsumed_bytes_in_buf
;
2123 ret
= cbfn(brctx
, buf
, num_unconsumed_bytes_in_buf
);
2125 if(brctx
->bytes_consumed
<1 || brctx
->bytes_consumed
>num_unconsumed_bytes_in_buf
) {
2129 if(brctx
->bytes_consumed
< num_unconsumed_bytes_in_buf
) {
2130 // cbfn didn't consume all bytes
2131 // TODO: For better efficiency, we could leave the buffer as it is until
2132 // the unconsumed byte count drops below DE_BUFFERED_READ_MIN_BLKSIZE.
2133 // But that's only useful if some consumers consume only a small number of bytes.
2134 de_memmove(buf
, &buf
[brctx
->bytes_consumed
],
2135 (size_t)(num_unconsumed_bytes_in_buf
-brctx
->bytes_consumed
));
2136 num_unconsumed_bytes_in_buf
-= brctx
->bytes_consumed
;
2139 num_unconsumed_bytes_in_buf
= 0;
2141 offs_of_first_byte_in_buf
+= brctx
->bytes_consumed
;
2148 // Special case where all bytes are already in memory
2149 static int buffered_read_from_mem(struct de_bufferedreadctx
*brctx
,
2150 dbuf
*f
, const u8
*mem
, i64 pos1
, i64 len
, de_buffered_read_cbfn cbfn
)
2153 i64 total_nbytes_consumed
= 0;
2159 nbytes_to_send
= len
- total_nbytes_consumed
;
2160 if(nbytes_to_send
<1) break;
2161 brctx
->bytes_consumed
= nbytes_to_send
;
2162 brctx
->offset
= total_nbytes_consumed
;
2163 brctx
->eof_flag
= 1;
2165 ret
= cbfn(brctx
, &mem
[pos1
+total_nbytes_consumed
],
2168 if(brctx
->bytes_consumed
<1 || brctx
->bytes_consumed
>nbytes_to_send
) {
2171 total_nbytes_consumed
+= brctx
->bytes_consumed
;
2178 static int buffered_read_zero_len(struct de_bufferedreadctx
*brctx
,
2179 de_buffered_read_cbfn cbfn
)
2181 const u8 dummybuf
[1] = { 0 };
2185 brctx
->eof_flag
= 1;
2186 brctx
->bytes_consumed
= 0;
2187 ret
= cbfn(brctx
, dummybuf
, 0);
2191 // dbuf_buffered_read:
2192 // Read a slice of a dbuf, and pass its data to a callback function, one
2193 // segment at a time.
2194 // cbfn: Caller-implemented callback function.
2195 // - It must be prepared for an arbitrarily large number of bytes to be passed
2196 // to it at once (though it does not have to consume them all).
2197 // - It must consume at least 1 byte, unless 0 bytes were passed to it.
2198 // - If it does not consume all the bytes passed to it, it must set
2199 // brctx->bytes_consumed.
2200 // - It must return nonzero normally, 0 to abort.
2201 // We guarantee that:
2202 // - brctx->eof_flag will be nonzero if and only if there is no data after this.
2203 // - If eof_flag is not set, at least DE_BUFFERED_READ_MIN_BLKSIZE bytes will
2205 // - If the caller supplies 0 bytes of input data, the callback function will be
2206 // called exactly once. This is the only case where the callback will be
2207 // called with buf_len==0.
2208 // - If the source dbuf is a MEMBUF, and the requested bytes are all in range,
2209 // then all requested bytes will be provided in the first call to the callback
2211 // Return value: 1 normally, 0 if the callback function ever returned 0.
2212 int dbuf_buffered_read(dbuf
*f
, i64 pos1
, i64 len
,
2213 de_buffered_read_cbfn cbfn
, void *userdata
)
2215 struct de_bufferedreadctx brctx
;
2218 brctx
.userdata
= userdata
;
2220 if(len
<=0) { // Get this special case out of the way.
2221 return buffered_read_zero_len(&brctx
, cbfn
);
2224 // Use an optimized routine if all the data we need to read is already in memory.
2225 if(f
->cache
&& (pos1
>=0) && (pos1
+len
<=f
->cache_bytes_used
)) {
2226 return buffered_read_from_mem(&brctx
, f
, f
->cache
, pos1
, len
, cbfn
);
2229 // Not an "optimization", since we promise this behavior for MEMBUFs.
2230 if(f
->btype
==DBUF_TYPE_MEMBUF
&& (pos1
>=0) && (pos1
+len
<=f
->len
)) {
2231 return buffered_read_from_mem(&brctx
, f
, f
->membuf_buf
, pos1
, len
, cbfn
);
2234 // The general case:
2235 return buffered_read_internal(&brctx
, f
, pos1
, len
, cbfn
);
2238 int de_is_all_zeroes(const u8
*b
, i64 n
)
2241 for(k
=0; k
<n
; k
++) {
2242 if(b
[k
]!=0) return 0;
2247 static int is_all_zeroes_cbfn(struct de_bufferedreadctx
*brctx
, const u8
*buf
,
2250 return de_is_all_zeroes(buf
, buf_len
);
2253 // Returns 1 if the given slice has only bytes with value 0.
2254 int dbuf_is_all_zeroes(dbuf
*f
, i64 pos
, i64 len
)
2256 return dbuf_buffered_read(f
, pos
, len
, is_all_zeroes_cbfn
, NULL
);
2259 void de_bitbuf_lowelevel_add_byte(struct de_bitbuf_lowlevel
*bbll
, u8 n
)
2261 if(bbll
->nbits_in_bitbuf
>56) return;
2262 if(bbll
->is_lsb
==0) {
2263 bbll
->bit_buf
= (bbll
->bit_buf
<<8) | n
;
2266 bbll
->bit_buf
|= (u64
)n
<< bbll
->nbits_in_bitbuf
;
2268 bbll
->nbits_in_bitbuf
+= 8;
2271 u64
de_bitbuf_lowelevel_get_bits(struct de_bitbuf_lowlevel
*bbll
, UI nbits
)
2276 if(nbits
> bbll
->nbits_in_bitbuf
) return 0;
2277 mask
= ((u64
)1 << nbits
)-1;
2278 if(bbll
->is_lsb
==0) {
2279 bbll
->nbits_in_bitbuf
-= nbits
;
2280 n
= (bbll
->bit_buf
>> bbll
->nbits_in_bitbuf
) & mask
;
2283 n
= bbll
->bit_buf
& mask
;
2284 bbll
->bit_buf
>>= nbits
;
2285 bbll
->nbits_in_bitbuf
-= nbits
;
2290 void de_bitbuf_lowelevel_empty(struct de_bitbuf_lowlevel
*bbll
)
2293 bbll
->nbits_in_bitbuf
= 0;
2296 u64
de_bitreader_getbits(struct de_bitreader
*bitrd
, UI nbits
)
2298 if(bitrd
->eof_flag
) return 0;
2300 // TODO: Decide if we always want to do this. Could risk infinite loops
2301 // with this successful no-op.
2305 bitrd
->eof_flag
= 1;
2309 while(bitrd
->bbll
.nbits_in_bitbuf
< nbits
) {
2312 if(bitrd
->curpos
>= bitrd
->endpos
) {
2313 bitrd
->eof_flag
= 1;
2316 b
= dbuf_getbyte_p(bitrd
->f
, &bitrd
->curpos
);
2317 de_bitbuf_lowelevel_add_byte(&bitrd
->bbll
, b
);
2320 return de_bitbuf_lowelevel_get_bits(&bitrd
->bbll
, nbits
);
2323 char *de_bitreader_describe_curpos(struct de_bitreader
*bitrd
, char *buf
, size_t buf_len
)
2329 nwholebytes
= (i64
)(bitrd
->bbll
.nbits_in_bitbuf
/ 8);
2330 nbits
= bitrd
->bbll
.nbits_in_bitbuf
% 8;
2331 curpos
= bitrd
->curpos
- (i64
)nwholebytes
;
2334 de_snprintf(buf
, buf_len
, "%"I64_FMT
, curpos
);
2337 de_snprintf(buf
, buf_len
, "%"I64_FMT
"+%ubits", curpos
-1, (UI
)(8-nbits
));