Minor refactoring
[deark.git] / src / deark-dbuf.c
blob4977c0dc7640723a3a9fcfeed14ff6c0854a5127
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // deark-dbuf.c
6 //
7 // Functions related to the dbuf object.
9 #define DE_NOT_IN_MODULE
10 #include "deark-config.h"
11 #include "deark-private.h"
13 #define DE_DUMMY_MAX_FILE_SIZE (1LL<<56)
14 #define DE_MAX_MEMBUF_SIZE 2000000000
15 #define DE_CACHE_SIZE 262144
17 // Fill the cache that remembers the first part of the file.
18 // TODO: We should probably use memory-mapped files instead when possible,
19 // but this is simple and portable, and does most of what we need.
20 static void populate_cache(dbuf *f)
22 i64 bytes_to_read;
23 i64 bytes_read;
25 if(f->btype!=DBUF_TYPE_IFILE) return;
27 bytes_to_read = DE_CACHE_SIZE;
28 if(f->len < bytes_to_read) {
29 bytes_to_read = f->len;
32 f->cache = de_malloc(f->c, DE_CACHE_SIZE);
33 de_fseek(f->fp, 0, SEEK_SET);
34 bytes_read = fread(f->cache, 1, (size_t)bytes_to_read, f->fp);
35 f->cache_bytes_used = bytes_read;
36 f->file_pos_known = 0;
39 // Read all data from stdin (or a named pipe) into memory.
40 static void populate_cache_from_pipe(dbuf *f)
42 FILE *fp;
43 i64 cache_bytes_alloc = 0;
45 if(f->btype==DBUF_TYPE_STDIN) {
46 fp = stdin;
48 else if(f->btype==DBUF_TYPE_FIFO) {
49 fp = f->fp;
51 else {
52 return;
55 f->cache_bytes_used = 0;
57 while(1) {
58 i64 bytes_to_read, bytes_read;
60 if(f->cache_bytes_used >= cache_bytes_alloc) {
61 i64 old_cache_size, new_cache_size;
63 // Cache is full. Increase its size.
64 old_cache_size = cache_bytes_alloc;
65 new_cache_size = old_cache_size*2;
66 if(new_cache_size<DE_CACHE_SIZE) new_cache_size = DE_CACHE_SIZE;
67 f->cache = de_realloc(f->c, f->cache, old_cache_size, new_cache_size);
68 cache_bytes_alloc = new_cache_size;
71 // Try to read as many bytes as it would take to fill the cache.
72 bytes_to_read = cache_bytes_alloc - f->cache_bytes_used;
73 if(bytes_to_read<1) break; // Shouldn't happen
75 bytes_read = fread(&f->cache[f->cache_bytes_used], 1, (size_t)bytes_to_read, fp);
76 if(bytes_read<1 || bytes_read>bytes_to_read) break;
77 f->cache_bytes_used += bytes_read;
78 if(feof(fp) || ferror(fp)) break;
81 f->len = f->cache_bytes_used;
84 // Read len bytes, starting at file position pos, into buf.
85 // Unread bytes will be set to 0.
86 void dbuf_read(dbuf *f, u8 *buf, i64 pos, i64 len)
88 i64 bytes_read = 0;
89 i64 bytes_to_read;
90 deark *c;
92 c = f->c;
94 if(pos < 0) {
95 if((-pos) >= len) {
96 // All requested bytes are before the beginning of the file
97 de_zeromem(buf, (size_t)len);
98 return;
100 // Some requested bytes are before the beginning of the file.
101 // Zero out the ones that are:
102 de_zeromem(buf, (size_t)(-pos));
103 // And adjust the parameters:
104 buf += (-pos);
105 len -= (-pos);
106 pos = 0;
109 bytes_to_read = len;
110 if(pos >= f->len) {
111 bytes_to_read = 0;
113 else if(pos + bytes_to_read > f->len) {
114 bytes_to_read = f->len - pos;
117 if(bytes_to_read<1) {
118 goto done_read;
121 // If the data we need is all cached, get it from cache.
122 if(f->cache &&
123 pos >= 0 &&
124 pos + bytes_to_read <= f->cache_bytes_used)
126 de_memcpy(buf, &f->cache[pos], (size_t)bytes_to_read);
127 bytes_read = bytes_to_read;
128 goto done_read;
131 switch(f->btype) {
132 case DBUF_TYPE_IFILE:
133 if(!f->fp) {
134 de_internal_err_fatal(c, "File not open");
135 goto done_read;
138 // For performance reasons, don't call fseek if we're already at the
139 // right position.
140 if(!f->file_pos_known || f->file_pos!=pos) {
141 de_fseek(f->fp, pos, SEEK_SET);
144 bytes_read = fread(buf, 1, (size_t)bytes_to_read, f->fp);
146 f->file_pos = pos + bytes_read;
147 f->file_pos_known = 1;
148 break;
150 case DBUF_TYPE_IDBUF:
151 // Recursive call to the parent dbuf.
152 dbuf_read(f->parent_dbuf, buf, f->offset_into_parent_dbuf+pos, bytes_to_read);
154 // The parent dbuf always writes 'bytes_to_read' bytes.
155 bytes_read = bytes_to_read;
156 break;
158 case DBUF_TYPE_MEMBUF:
159 de_memcpy(buf, &f->membuf_buf[pos], (size_t)bytes_to_read);
160 bytes_read = bytes_to_read;
161 break;
163 default:
164 de_internal_err_fatal(c, "getbytes from this I/O type not implemented");
165 goto done_read;
168 done_read:
169 // Zero out any requested bytes that were not read.
170 if(bytes_read < len) {
171 de_zeromem(buf+bytes_read, (size_t)(len - bytes_read));
175 // A function that works a little more like a standard read/fread function than
176 // does dbuf_read. It returns the number of bytes read, won't read past end of
177 // file, and helps track the file position.
178 i64 dbuf_standard_read(dbuf *f, u8 *buf, i64 n, i64 *fpos)
180 i64 amt_to_read;
182 if(*fpos < 0 || *fpos >= f->len) return 0;
184 amt_to_read = n;
185 if(*fpos + amt_to_read > f->len) amt_to_read = f->len - *fpos;
186 dbuf_read(f, buf, *fpos, amt_to_read);
187 *fpos += amt_to_read;
188 return amt_to_read;
191 u8 dbuf_getbyte(dbuf *f, i64 pos)
193 if(pos<0 || pos>=f->len) return 0x00;
195 if(pos<f->cache_bytes_used) {
196 return f->cache[pos];
198 if(f->btype==DBUF_TYPE_MEMBUF) {
199 // Note that it is necessary to handle read+write dbuf types specially,
200 // so that the "cache2" feature isn't used.
201 return f->membuf_buf[pos];
204 // TODO: I don't like that cache2 exists, but without it some large images
205 // are decoded too slowly (especially on Windows), and I haven't figured out
206 // a solution I like better.
207 if(pos==f->cache2_pos) {
208 return f->cache2;
210 f->cache2_pos = pos;
211 dbuf_read(f, &f->cache2, pos, 1);
212 return f->cache2;
215 i64 de_geti8_direct(const u8 *m)
217 u8 b = m[0];
219 if(b<=127) return (i64)b;
220 return ((i64)b)-256;
223 i64 dbuf_geti8(dbuf *f, i64 pos)
225 u8 b;
227 b = dbuf_getbyte(f, pos);
228 return de_geti8_direct(&b);
231 u8 dbuf_getbyte_p(dbuf *f, i64 *ppos)
233 u8 b;
234 b = dbuf_getbyte(f, *ppos);
235 (*ppos)++;
236 return b;
239 static i64 dbuf_getuint_ext_be_direct(const u8 *m, unsigned int nbytes)
241 unsigned int k;
242 u64 val = 0;
244 for(k=0; k<nbytes; k++) {
245 if(val>0x00ffffffffffffffULL) return 0;
246 val = (val<<8) | (u64)m[k];
248 return (i64)val;
251 static i64 dbuf_getint_ext_be_direct(const u8 *m, unsigned int nbytes)
253 unsigned int k;
254 u64 val = 0;
256 // We can handle up to 8 arbitrary bytes. Any more have to be 0xff.
257 if(nbytes>8) {
258 for(k=0; k<nbytes-8; k++) {
259 if(m[k]!=0xff) return 0; // underflow
263 // Process bytes in order of increasing significance
264 for(k=0; k<8; k++) {
265 u8 byteval;
267 if(k<nbytes) {
268 byteval = m[nbytes-1-k];
270 else {
271 byteval = 0xff;
273 val |= ((u64)byteval) << (k*8);
275 return (i64)val;
278 static i64 dbuf_getuint_ext_le_direct(const u8 *m, unsigned int nbytes)
280 unsigned int k;
281 u64 val = 0;
283 for(k=0; k<nbytes; k++) {
284 if(m[k]!=0) {
285 if(k>7) return 0;
286 val |= ((u64)m[k])<<(k*8);
289 return (i64)val;
292 static i64 dbuf_getuint_ext_x(dbuf *f, i64 pos, unsigned int nbytes,
293 int is_le)
295 u8 m[24];
297 if(nbytes>(unsigned int)sizeof(m)) return 0;
298 dbuf_read(f, m, pos, (i64)nbytes);
299 if(is_le) {
300 return dbuf_getuint_ext_le_direct(m, nbytes);
302 return dbuf_getuint_ext_be_direct(m, nbytes);
305 static i64 dbuf_getint_ext_x(dbuf *f, i64 pos, unsigned int nbytes, int is_le)
307 u8 m[24];
309 if(nbytes>(unsigned int)sizeof(m)) return 0;
310 dbuf_read(f, m, pos, (i64)nbytes);
311 if(is_le) {
312 return 0; // TODO
314 return dbuf_getint_ext_be_direct(m, nbytes);
317 i64 de_getu16be_direct(const u8 *m)
319 return (i64)(((u32)m[1]) | (((u32)m[0])<<8));
322 i64 dbuf_getu16be(dbuf *f, i64 pos)
324 u8 m[2];
325 dbuf_read(f, m, pos, 2);
326 return de_getu16be_direct(m);
329 i64 dbuf_getu16be_p(dbuf *f, i64 *ppos)
331 u8 m[2];
332 dbuf_read(f, m, *ppos, 2);
333 (*ppos) += 2;
334 return de_getu16be_direct(m);
337 i64 de_getu16le_direct(const u8 *m)
339 return (i64)(((u32)m[0]) | (((u32)m[1])<<8));
342 i64 dbuf_getu16le(dbuf *f, i64 pos)
344 u8 m[2];
345 dbuf_read(f, m, pos, 2);
346 return de_getu16le_direct(m);
349 i64 dbuf_getu16le_p(dbuf *f, i64 *ppos)
351 u8 m[2];
352 dbuf_read(f, m, *ppos, 2);
353 (*ppos) += 2;
354 return de_getu16le_direct(m);
357 i64 dbuf_geti16be(dbuf *f, i64 pos)
359 i64 n;
360 n = dbuf_getu16be(f, pos);
361 if(n>=32768) n -= 65536;
362 return n;
365 i64 dbuf_geti16le(dbuf *f, i64 pos)
367 i64 n;
368 n = dbuf_getu16le(f, pos);
369 if(n>=32768) n -= 65536;
370 return n;
373 i64 dbuf_geti16be_p(dbuf *f, i64 *ppos)
375 i64 n;
376 n = dbuf_geti16be(f, *ppos);
377 (*ppos) += 2;
378 return n;
381 i64 dbuf_geti16le_p(dbuf *f, i64 *ppos)
383 i64 n;
384 n = dbuf_geti16le(f, *ppos);
385 (*ppos) += 2;
386 return n;
389 i64 de_getu32be_direct(const u8 *m)
391 return (i64)(((u32)m[3]) | (((u32)m[2])<<8) |
392 (((u32)m[1])<<16) | (((u32)m[0])<<24));
395 i64 dbuf_getu32be(dbuf *f, i64 pos)
397 u8 m[4];
398 dbuf_read(f, m, pos, 4);
399 return de_getu32be_direct(m);
402 i64 dbuf_getu32be_p(dbuf *f, i64 *ppos)
404 u8 m[4];
405 dbuf_read(f, m, *ppos, 4);
406 (*ppos) += 4;
407 return de_getu32be_direct(m);
410 i64 de_getu32le_direct(const u8 *m)
412 return (i64)(((u32)m[0]) | (((u32)m[1])<<8) |
413 (((u32)m[2])<<16) | (((u32)m[3])<<24));
416 i64 dbuf_getu32le(dbuf *f, i64 pos)
418 u8 m[4];
419 dbuf_read(f, m, pos, 4);
420 return de_getu32le_direct(m);
423 i64 dbuf_getu32le_p(dbuf *f, i64 *ppos)
425 u8 m[4];
426 dbuf_read(f, m, *ppos, 4);
427 (*ppos) += 4;
428 return de_getu32le_direct(m);
431 i64 dbuf_geti32be(dbuf *f, i64 pos)
433 i64 n;
434 n = dbuf_getu32be(f, pos);
435 return (i64)(i32)(u32)n;
438 i64 dbuf_geti32le(dbuf *f, i64 pos)
440 i64 n;
441 n = dbuf_getu32le(f, pos);
442 return (i64)(i32)(u32)n;
445 i64 dbuf_geti32be_p(dbuf *f, i64 *ppos)
447 i64 n;
448 n = dbuf_geti32be(f, *ppos);
449 (*ppos) += 4;
450 return n;
453 i64 dbuf_geti32le_p(dbuf *f, i64 *ppos)
455 i64 n;
456 n = dbuf_geti32le(f, *ppos);
457 (*ppos) += 4;
458 return n;
461 u64 de_getu64be_direct(const u8 *m)
463 unsigned int i;
464 u64 val = 0;
466 for(i=0; i<8; i++) {
467 val |= ((u64)m[i])<<((7-i)*8);
469 return val;
472 i64 de_geti64be_direct(const u8 *m)
474 return (i64)de_getu64be_direct(m);
477 i64 dbuf_geti64be(dbuf *f, i64 pos)
479 u8 m[8];
480 dbuf_read(f, m, pos, 8);
481 return de_geti64be_direct(m);
484 u64 de_getu64le_direct(const u8 *m)
486 unsigned int i;
487 u64 val = 0;
489 for(i=0; i<8; i++) {
490 val |= ((u64)m[i])<<(i*8);
492 return val;
495 i64 de_geti64le_direct(const u8 *m)
497 return (i64)de_getu64le_direct(m);
500 i64 dbuf_geti64le(dbuf *f, i64 pos)
502 u8 m[8];
503 dbuf_read(f, m, pos, 8);
504 return de_geti64le_direct(m);
507 i64 dbuf_getu16x(dbuf *f, i64 pos, int is_le)
509 if(is_le) return dbuf_getu16le(f, pos);
510 return dbuf_getu16be(f, pos);
513 i64 dbuf_geti16x(dbuf *f, i64 pos, int is_le)
515 if(is_le) return dbuf_geti16le(f, pos);
516 return dbuf_geti16be(f, pos);
519 i64 dbuf_getu32x(dbuf *f, i64 pos, int is_le)
521 if(is_le) return dbuf_getu32le(f, pos);
522 return dbuf_getu32be(f, pos);
525 i64 dbuf_geti32x(dbuf *f, i64 pos, int is_le)
527 if(is_le) return dbuf_geti32le(f, pos);
528 return dbuf_geti32be(f, pos);
531 i64 dbuf_geti64x(dbuf *f, i64 pos, int is_le)
533 if(is_le) return dbuf_geti64le(f, pos);
534 return dbuf_geti64be(f, pos);
537 u64 dbuf_getu64be(dbuf *f, i64 pos)
539 u8 m[8];
540 dbuf_read(f, m, pos, 8);
541 return de_getu64be_direct(m);
544 u64 dbuf_getu64le(dbuf *f, i64 pos)
546 u8 m[8];
547 dbuf_read(f, m, pos, 8);
548 return de_getu64le_direct(m);
551 u64 dbuf_getu64x(dbuf *f, i64 pos, int is_le)
553 if(is_le) return dbuf_getu64le(f, pos);
554 return dbuf_getu64be(f, pos);
557 i64 dbuf_getint_ext(dbuf *f, i64 pos, unsigned int nbytes,
558 int is_le, int is_signed)
560 if(is_signed) {
561 // TODO: Extend this to any number of bytes, 1-8.
562 switch(nbytes) {
563 case 1: return (i64)(signed char)dbuf_getbyte(f, pos); break;
564 case 2: return dbuf_geti16x(f, pos, is_le); break;
565 case 4: return dbuf_geti32x(f, pos, is_le); break;
566 case 8: return dbuf_geti64x(f, pos, is_le); break;
567 default:
568 return dbuf_getint_ext_x(f, pos, nbytes, is_le);
571 else {
572 switch(nbytes) {
573 case 1: return (i64)dbuf_getbyte(f, pos); break;
574 case 2: return dbuf_getu16x(f, pos, is_le); break;
575 case 4: return dbuf_getu32x(f, pos, is_le); break;
576 case 8: return dbuf_geti64x(f, pos, is_le); break;
577 default:
578 return dbuf_getuint_ext_x(f, pos, nbytes, is_le);
581 return 0;
584 static void init_fltpt_decoder(deark *c)
586 unsigned int x = 1;
587 char b = 0;
589 c->can_decode_fltpt = 0;
590 if(sizeof(float)!=4) return;
591 if(sizeof(double)!=8) return;
592 c->can_decode_fltpt = 1;
594 de_memcpy(&b, &x, 1);
595 if(b==0)
596 c->host_is_le = 0;
597 else
598 c->host_is_le = 1;
601 double de_getfloat32x_direct(deark *c, const u8 *m, int is_le)
603 char buf[4];
604 float val = 0.0;
606 if(c->can_decode_fltpt<0) {
607 init_fltpt_decoder(c);
609 if(!c->can_decode_fltpt) return 0.0;
611 // FIXME: This assumes that the native floating point format is
612 // IEEE 754, but that does not have to be the case.
614 de_memcpy(buf, m, 4);
616 if(is_le != c->host_is_le) {
617 int i;
618 char tmpc;
619 // Reverse order of bytes
620 for(i=0; i<2; i++) {
621 tmpc = buf[i]; buf[i] = buf[3-i]; buf[3-i] = tmpc;
625 de_memcpy(&val, buf, 4);
626 return (double)val;
629 double dbuf_getfloat32x(dbuf *f, i64 pos, int is_le)
631 u8 buf[4];
632 dbuf_read(f, buf, pos, 4);
633 return de_getfloat32x_direct(f->c, buf, is_le);
636 double de_getfloat64x_direct(deark *c, const u8 *m, int is_le)
638 char buf[8];
639 double val = 0.0;
641 if(c->can_decode_fltpt<0) {
642 init_fltpt_decoder(c);
644 if(!c->can_decode_fltpt) return 0.0;
646 de_memcpy(buf, m, 8);
648 if(is_le != c->host_is_le) {
649 int i;
650 char tmpc;
651 // Reverse order of bytes
652 for(i=0; i<4; i++) {
653 tmpc = buf[i]; buf[i] = buf[7-i]; buf[7-i] = tmpc;
657 de_memcpy(&val, buf, 8);
658 return (double)val;
661 double dbuf_getfloat64x(dbuf *f, i64 pos, int is_le)
663 u8 buf[8];
664 dbuf_read(f, buf, pos, 8);
665 return de_getfloat64x_direct(f->c, buf, is_le);
668 int dbuf_read_ascii_number(dbuf *f, i64 pos, i64 fieldsize,
669 int base, i64 *value)
671 char buf[32];
673 *value = 0;
674 if(fieldsize>(i64)(sizeof(buf)-1)) return 0;
676 dbuf_read(f, (u8*)buf, pos, fieldsize);
677 buf[fieldsize] = '\0';
679 *value = de_strtoll(buf, NULL, base);
680 return 1;
683 de_color dbuf_getRGB(dbuf *f, i64 pos, unsigned int flags)
685 u8 buf[3];
686 dbuf_read(f, buf, pos, 3);
687 if(flags&DE_GETRGBFLAG_BGR)
688 return DE_MAKE_RGB(buf[2], buf[1], buf[0]);
689 return DE_MAKE_RGB(buf[0], buf[1], buf[2]);
692 static int copy_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
693 i64 buf_len)
695 dbuf *outf = (dbuf*)brctx->userdata;
696 dbuf_write(outf, buf, buf_len);
697 return 1;
700 void dbuf_copy(dbuf *inf, i64 input_offset, i64 input_len, dbuf *outf)
702 u8 tmpbuf[256];
704 // Fast paths, if the data to copy is all in memory
706 if(inf->cache &&
707 (input_offset>=0) && (input_offset+input_len<=inf->cache_bytes_used))
709 dbuf_write(outf, &inf->cache[input_offset], input_len);
710 return;
713 if(inf->btype==DBUF_TYPE_MEMBUF &&
714 (input_offset>=0) && (input_offset+input_len<=inf->len))
716 dbuf_write(outf, &inf->membuf_buf[input_offset], input_len);
717 return;
720 if(input_len<=(i64)sizeof(tmpbuf)) {
721 // Fast path for small sizes
722 dbuf_read(inf, tmpbuf, input_offset, input_len);
723 dbuf_write(outf, tmpbuf, input_len);
724 return;
727 dbuf_buffered_read(inf, input_offset, input_len, copy_cbfn, (void*)outf);
730 struct copy_at_ctx {
731 dbuf *outf;
732 i64 outpos;
735 static int copy_at_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
736 i64 buf_len)
738 struct copy_at_ctx *ctx = (struct copy_at_ctx*)brctx->userdata;
740 dbuf_write_at(ctx->outf, ctx->outpos, buf, buf_len);
741 ctx->outpos += buf_len;
742 return 1;
745 void dbuf_copy_at(dbuf *inf, i64 input_offset, i64 input_len,
746 dbuf *outf, i64 output_offset)
748 struct copy_at_ctx ctx;
750 ctx.outf = outf;
751 ctx.outpos = output_offset;
752 dbuf_buffered_read(inf, input_offset, input_len, copy_at_cbfn, (void*)&ctx);
755 // An advanced function for reading a string from a file.
756 // The issue is that some strings are both human-readable and machine-readable.
757 // In such a case, we'd like to read some data from a file into a nice printable
758 // ucstring, while also making some or all of the raw bytes available, say for
759 // byte-for-byte string comparisons.
760 // Plus (for NUL-terminated/padded strings), we may need to know the actual length
761 // of the string in the file, so that it can be skipped over, even if we don't
762 // care about the whole string.
763 // Caller is responsible for calling destroy_stringreader() on the returned value.
764 // max_bytes_to_scan: The maximum number of bytes to read from the file.
765 // max_bytes_to_keep: The maximum (or in some cases the exact) number of bytes,
766 // not counting any NUL terminator, to return in ->sz.
767 // The ->str field is a Unicode version of ->sz, so this also affects ->str.
768 // If DE_CONVFLAG_STOP_AT_NUL is not set, it is assumed we are reading a string
769 // of known length, that may have internal NUL bytes. The caller must set
770 // max_bytes_to_scan and max_bytes_to_keep to the same value. The ->sz field will
771 // always be allocated with this many bytes, plus one more for an artificial NUL
772 // terminator.
773 // If DE_CONVFLAG_WANT_UTF8 is set, then the ->sz_utf8 field will be set to a
774 // UTF-8 version of ->str. This is mainly useful if the original string was
775 // UTF-16. sz_utf8 is not "printable" -- use ucstring_get_printable_sz_n(str) for
776 // that.
777 // ->sz_strlen will equal strlen(->sz) if DE_CONVFLAG_STOP_AT_NUL is set, or
778 // the supplied value of max_bytes_to_(scan|keep) if not.
779 // Recognized flags:
780 // - DE_CONVFLAG_STOP_AT_NUL
781 // - DE_CONVFLAG_WANT_UTF8
782 struct de_stringreaderdata *dbuf_read_string(dbuf *f, i64 pos,
783 i64 max_bytes_to_scan,
784 i64 max_bytes_to_keep,
785 unsigned int flags, de_ext_encoding ee)
787 deark *c = f->c;
788 struct de_stringreaderdata *srd;
789 i64 foundpos = 0;
790 int ret;
791 i64 bytes_avail_to_read;
792 i64 bytes_to_malloc;
793 i64 x_strlen = 0;
795 srd = de_malloc(c, sizeof(struct de_stringreaderdata));
796 srd->str = ucstring_create(c);
797 if(max_bytes_to_scan<0) max_bytes_to_scan = 0;
798 if(max_bytes_to_keep<0) max_bytes_to_keep = 0;
800 bytes_avail_to_read = max_bytes_to_scan;
801 if(bytes_avail_to_read > f->len-pos) {
802 bytes_avail_to_read = f->len-pos;
804 if(bytes_avail_to_read<0) bytes_avail_to_read = 0;
806 srd->bytes_consumed = bytes_avail_to_read; // default
808 // From here on, we can safely bail out ("goto done"). The
809 // de_stringreaderdata struct is sufficiently valid.
811 if(!(flags&DE_CONVFLAG_STOP_AT_NUL) &&
812 (max_bytes_to_scan != max_bytes_to_keep))
814 // To reduce possible confusion, we require that
815 // max_bytes_to_scan==max_bytes_to_keep in this case.
816 srd->sz = de_malloc(c, max_bytes_to_keep+1);
817 goto done;
820 if(flags&DE_CONVFLAG_STOP_AT_NUL) {
821 ret = dbuf_search_byte(f, 0x00, pos, bytes_avail_to_read, &foundpos);
822 if(ret) {
823 srd->found_nul = 1;
825 else {
826 // No NUL byte found. Could be an error in some formats, but in
827 // others NUL is used as separator or as padding, not a terminator.
828 foundpos = pos+bytes_avail_to_read;
831 x_strlen = foundpos-pos;
832 srd->bytes_consumed = x_strlen+1;
834 else {
835 x_strlen = max_bytes_to_keep;
836 srd->bytes_consumed = x_strlen;
839 bytes_to_malloc = x_strlen+1;
840 if(bytes_to_malloc>(max_bytes_to_keep+1)) {
841 bytes_to_malloc = max_bytes_to_keep+1;
842 srd->was_truncated = 1;
845 srd->sz = de_malloc(c, bytes_to_malloc);
846 dbuf_read(f, (u8*)srd->sz, pos, bytes_to_malloc-1); // The last byte remains NUL
848 ucstring_append_bytes(srd->str, (const u8*)srd->sz, bytes_to_malloc-1, 0, ee);
850 if(flags&DE_CONVFLAG_WANT_UTF8) {
851 srd->sz_utf8_strlen = (size_t)ucstring_count_utf8_bytes(srd->str);
852 srd->sz_utf8 = de_malloc(c, (i64)srd->sz_utf8_strlen + 1);
853 ucstring_to_sz(srd->str, srd->sz_utf8, srd->sz_utf8_strlen + 1, 0, DE_ENCODING_UTF8);
856 done:
857 if(!srd->sz) {
858 // Always return a valid sz, even on failure.
859 srd->sz = de_malloc(c, 1);
861 if((flags&DE_CONVFLAG_WANT_UTF8) && !srd->sz_utf8) {
862 // Always return a valid sz_utf8 if it was requested, even on failure.
863 srd->sz_utf8 = de_malloc(c, 1);
864 srd->sz_utf8_strlen = 0;
866 srd->sz_strlen = (size_t)x_strlen;
867 return srd;
870 void de_destroy_stringreaderdata(deark *c, struct de_stringreaderdata *srd)
872 if(!srd) return;
873 de_free(c, srd->sz);
874 de_free(c, srd->sz_utf8);
875 ucstring_destroy(srd->str);
876 de_free(c, srd);
879 void dbuf_read_to_ucstring_ex(dbuf *f, i64 pos1, i64 len,
880 de_ucstring *s, unsigned int conv_flags, struct de_encconv_state *es)
882 i64 nbytes_remaining;
883 i64 pos = pos1;
884 int stop_at_nul = 0;
885 #define READTOUCSTRING_BUFLEN 256
886 u8 buf[READTOUCSTRING_BUFLEN];
888 if(conv_flags & DE_CONVFLAG_STOP_AT_NUL) {
889 stop_at_nul = 1;
890 // We handle STOP_AT_NUL ourselves, so don't pass it on.
891 conv_flags -= DE_CONVFLAG_STOP_AT_NUL;
894 // Note: It might be sensible to use dbuf_buffered_read() here, but I've
895 // decided against it for now.
896 nbytes_remaining = len;
897 do {
898 i64 nbytes_to_read;
899 i64 nbytes_in_buf;
900 unsigned int conv_flags_to_use_this_time;
902 // Lack of DE_CONVFLAG_PARTIAL_DATA flag signals end of data, which
903 // isn't necessarily a no-op even with len=0.
904 // That's why we always do this loop at least once.
906 nbytes_to_read = de_min_int(nbytes_remaining, READTOUCSTRING_BUFLEN);
907 dbuf_read(f, buf, pos, nbytes_to_read);
908 pos += nbytes_to_read;
909 nbytes_in_buf = nbytes_to_read;
910 nbytes_remaining -= nbytes_to_read;
912 if(stop_at_nul) {
913 char *tmpp;
915 tmpp = de_memchr(buf, 0x00, (size_t)nbytes_in_buf);
916 if(tmpp) {
917 nbytes_in_buf = (const u8*)tmpp - buf;
918 nbytes_remaining = 0;
922 conv_flags_to_use_this_time = conv_flags;
923 if(nbytes_remaining>0) {
924 // The caller may have aleady set this flag, in which case we will use
925 // it every time.
926 // If not, we still use it for all but the final call to ucstring_append_bytes_ex().
927 conv_flags_to_use_this_time |= DE_CONVFLAG_PARTIAL_DATA;
930 ucstring_append_bytes_ex(s, buf, nbytes_in_buf, conv_flags_to_use_this_time, es);
931 } while(nbytes_remaining>0);
935 // Read (up to) len bytes from f, translate them to characters, and append
936 // them to s.
937 void dbuf_read_to_ucstring(dbuf *f, i64 pos, i64 len,
938 de_ucstring *s, unsigned int conv_flags, de_ext_encoding ee)
940 struct de_encconv_state es;
942 de_encconv_init(&es, ee);
943 dbuf_read_to_ucstring_ex(f, pos, len, s, conv_flags, &es);
946 void dbuf_read_to_ucstring_n(dbuf *f, i64 pos, i64 len, i64 max_len,
947 de_ucstring *s, unsigned int conv_flags, de_ext_encoding ee)
949 struct de_encconv_state es;
951 if(len>max_len) len = max_len;
952 de_encconv_init(&es, ee);
953 dbuf_read_to_ucstring_ex(f, pos, len, s, conv_flags, &es);
956 static int dbufmemcmp_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
957 i64 buf_len)
959 // Return 0 if there is a mismatch.
960 return !de_memcmp(buf,
961 &(((const u8*)brctx->userdata)[brctx->offset]),
962 (size_t)buf_len);
965 int dbuf_memcmp(dbuf *f, i64 pos, const void *s, size_t n)
967 u8 buf1[128];
969 if(f->cache &&
970 pos >= 0 &&
971 pos + (i64)n <= f->cache_bytes_used)
973 // Fastest path: Compare directly to cache.
974 return de_memcmp(s, &f->cache[pos], n);
977 if(n<=sizeof(buf1)) {
978 // Use a stack buffer if small enough.
979 dbuf_read(f, buf1, pos, n);
980 return de_memcmp(buf1, s, n);
983 // Fallback method.
984 return !dbuf_buffered_read(f, pos, n, dbufmemcmp_cbfn, (void*)s);
987 int dbuf_create_file_from_slice(dbuf *inf, i64 pos, i64 data_size,
988 const char *ext, de_finfo *fi, unsigned int createflags)
990 dbuf *f;
991 f = dbuf_create_output_file(inf->c, ext, fi, createflags);
992 if(!f) return 0;
993 dbuf_copy(inf, pos, data_size, f);
994 dbuf_close(f);
995 return 1;
998 static void finfo_shallow_copy(deark *c, de_finfo *src, de_finfo *dst)
1000 UI k;
1002 dst->is_directory = src->is_directory;
1003 dst->mode_flags = src->mode_flags;
1004 for(k=0; k<DE_TIMESTAMPIDX_COUNT; k++) {
1005 dst->timestamp[k] = src->timestamp[k];
1007 dst->internal_mod_time = src->internal_mod_time;
1008 dst->density = src->density;
1009 dst->has_hotspot = src->has_hotspot;
1010 dst->hotspot_x = src->hotspot_x;
1011 dst->hotspot_y = src->hotspot_y;
1014 static dbuf *create_dbuf_lowlevel(deark *c)
1016 dbuf *f;
1018 f = de_malloc(c, sizeof(dbuf));
1019 f->c = c;
1020 f->cache2_pos = -1; // Any offset outside the bounds of the file will do.
1021 return f;
1024 // Create or open a file for writing, that is *not* one of the usual
1025 // "output.000.ext" files we extract from the input file.
1027 // overwrite_mode, flags: Same as for de_fopen_for_write().
1029 // On failure, prints an error message, and sets f->btype to DBUF_TYPE_NULL.
1030 dbuf *dbuf_create_unmanaged_file(deark *c, const char *fname, int overwrite_mode,
1031 unsigned int flags)
1033 dbuf *f;
1034 char msgbuf[200];
1036 f = create_dbuf_lowlevel(c);
1037 f->is_managed = 0;
1038 f->name = de_strdup(c, fname);
1040 f->btype = DBUF_TYPE_OFILE;
1041 f->max_len_hard = c->max_output_file_size;
1042 f->fp = de_fopen_for_write(c, f->name, msgbuf, sizeof(msgbuf),
1043 c->overwrite_mode, flags);
1045 if(!f->fp) {
1046 de_err(c, "Failed to write %s: %s", f->name, msgbuf);
1047 f->btype = DBUF_TYPE_NULL;
1048 c->serious_error_flag = 1;
1051 return f;
1054 dbuf *dbuf_create_unmanaged_file_stdout(deark *c, const char *name)
1056 dbuf *f;
1058 f = create_dbuf_lowlevel(c);
1059 f->is_managed = 0;
1060 f->name = de_strdup(c, name);
1061 f->btype = DBUF_TYPE_STDOUT;
1062 f->max_len_hard = c->max_output_file_size;
1063 f->fp = stdout;
1064 return f;
1067 static void sanitize_ext(const char *ext1, char *ext, size_t extlen)
1069 size_t k;
1071 de_strlcpy(ext, ext1, extlen);
1072 // This part of the filename should come from Deark, and should only
1073 // use a limited set of characters. Just to be sure:
1074 for(k=0; ext[k]; k++) {
1075 if((ext[k]>='0' && ext[k]<='9') ||
1076 (ext[k]>='A' && ext[k]<='Z') ||
1077 (ext[k]>='a' && ext[k]<='z') ||
1078 ext[k]=='.' || ext[k]=='_' || ext[k]=='-' || ext[k]=='+')
1082 else {
1083 ext[k] = '_';
1088 dbuf *dbuf_create_output_file(deark *c, const char *ext1, de_finfo *fi,
1089 unsigned int createflags)
1091 char nbuf[500];
1092 char msgbuf[200];
1093 char ext[128];
1094 int have_ext;
1095 dbuf *f;
1096 const char *basefn;
1097 int file_index;
1098 u8 is_directory = 0;
1099 char *name_from_finfo = NULL;
1100 i64 name_from_finfo_len = 0;
1102 if(ext1) {
1103 have_ext = 1;
1104 sanitize_ext(ext1, ext, sizeof(ext));
1106 else {
1107 have_ext = 0;
1108 ext[0] = '\0';
1111 if(have_ext && fi && fi->original_filename_flag) {
1112 de_dbg(c, "[internal warning: Incorrect use of create_output_file]");
1115 f = create_dbuf_lowlevel(c);
1116 f->max_len_hard = c->max_output_file_size;
1117 f->is_managed = 1;
1119 if(fi && fi->is_directory) {
1120 is_directory = 1;
1123 if(is_directory && !c->keep_dir_entries) {
1124 de_dbg(c, "skipping 'directory' file");
1125 f->btype = DBUF_TYPE_NULL;
1126 goto done;
1129 if(c->extract_policy==DE_EXTRACTPOLICY_MAINONLY) {
1130 if(createflags&DE_CREATEFLAG_IS_AUX) {
1131 de_dbg(c, "skipping 'auxiliary' file");
1132 f->btype = DBUF_TYPE_NULL;
1133 goto done;
1136 else if(c->extract_policy==DE_EXTRACTPOLICY_AUXONLY) {
1137 if(!(createflags&DE_CREATEFLAG_IS_AUX)) {
1138 de_dbg(c, "skipping 'main' file");
1139 f->btype = DBUF_TYPE_NULL;
1140 goto done;
1144 file_index = c->file_count;
1145 c->file_count++;
1147 basefn = c->base_output_filename ? c->base_output_filename : "output";
1149 if(fi && ucstring_isnonempty(fi->file_name_internal)) {
1150 name_from_finfo_len = 1 + ucstring_count_utf8_bytes(fi->file_name_internal);
1151 name_from_finfo = de_malloc(c, name_from_finfo_len);
1152 ucstring_to_sz(fi->file_name_internal, name_from_finfo, (size_t)name_from_finfo_len, 0,
1153 DE_ENCODING_UTF8);
1156 if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && !c->base_output_filename &&
1157 fi && fi->is_directory &&
1158 (fi->is_root_dir || (fi->detect_root_dot_dir && fi->orig_name_was_dot)))
1160 de_strlcpy(nbuf, ".", sizeof(nbuf));
1162 else if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && !c->base_output_filename &&
1163 fi && fi->original_filename_flag && name_from_finfo)
1165 // TODO: This is a "temporary" hack to allow us to, when both reading from
1166 // and writing to an archive format, use some semblance of the correct
1167 // filename (instead of "output.xxx.yyy").
1168 // There are some things that we don't handle optimally, such as
1169 // subdirectories.
1170 // A major redesign of the file naming logic would be good.
1171 de_strlcpy(nbuf, name_from_finfo, sizeof(nbuf));
1173 else {
1174 char fn_suffix[256];
1176 if(have_ext && name_from_finfo) {
1177 de_snprintf(fn_suffix, sizeof(fn_suffix), "%s.%s", name_from_finfo, ext);
1179 else if(have_ext) {
1180 de_strlcpy(fn_suffix, ext, sizeof(fn_suffix));
1182 else if(is_directory && name_from_finfo) {
1183 de_snprintf(fn_suffix, sizeof(fn_suffix), "%s.dir", name_from_finfo);
1185 else if(name_from_finfo) {
1186 de_strlcpy(fn_suffix, name_from_finfo, sizeof(fn_suffix));
1188 else if(is_directory) {
1189 de_strlcpy(fn_suffix, "dir", sizeof(fn_suffix));
1191 else {
1192 de_strlcpy(fn_suffix, "bin", sizeof(fn_suffix));
1195 de_snprintf(nbuf, sizeof(nbuf), "%s.%03d.%s", basefn, file_index, fn_suffix);
1198 f->name = de_strdup(c, nbuf);
1200 if(fi) {
1201 // The finfo object passed to us at file creation is not required to
1202 // remain valid, so make a copy of anything in it that we might need
1203 // later.
1204 f->fi_copy = de_finfo_create(c);
1205 finfo_shallow_copy(c, fi, f->fi_copy);
1207 // Here's where we respect the -intz option, by using it to convert to
1208 // UTC in some cases.
1209 if(f->fi_copy->timestamp[DE_TIMESTAMPIDX_MODIFY].is_valid && f->fi_copy->timestamp[DE_TIMESTAMPIDX_MODIFY].tzcode==DE_TZCODE_LOCAL &&
1210 c->input_tz_offs_seconds!=0)
1212 de_timestamp_cvt_to_utc(&f->fi_copy->timestamp[DE_TIMESTAMPIDX_MODIFY], -c->input_tz_offs_seconds);
1215 if(f->fi_copy->internal_mod_time.is_valid && f->fi_copy->internal_mod_time.tzcode==DE_TZCODE_LOCAL &&
1216 c->input_tz_offs_seconds!=0)
1218 de_timestamp_cvt_to_utc(&f->fi_copy->internal_mod_time, -c->input_tz_offs_seconds);
1222 if(file_index < c->first_output_file) {
1223 f->btype = DBUF_TYPE_NULL;
1224 goto done;
1227 if(c->max_output_files>=0 &&
1228 file_index >= c->first_output_file + c->max_output_files)
1230 f->btype = DBUF_TYPE_NULL;
1231 goto done;
1234 c->num_files_extracted++;
1236 if(c->extrlist_dbuf) {
1237 dbuf_printf(c->extrlist_dbuf, "%s\n", f->name);
1238 dbuf_flush(c->extrlist_dbuf);
1241 if(c->list_mode) {
1242 f->btype = DBUF_TYPE_NULL;
1243 if(c->list_mode_include_file_id) {
1244 de_msg(c, "%d:%s", file_index, f->name);
1246 else {
1247 de_msg(c, "%s", f->name);
1249 goto done;
1252 if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && c->archive_fmt==DE_ARCHIVEFMT_TAR) {
1253 de_info(c, "Adding %s to TAR file", f->name);
1254 f->btype = DBUF_TYPE_ODBUF;
1255 // A dummy max_len_hard value. The parent will do the checking.
1256 f->max_len_hard = DE_DUMMY_MAX_FILE_SIZE;
1257 f->writing_to_tar_archive = 1;
1258 de_tar_start_member_file(c, f);
1260 else if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE) { // ZIP
1261 i64 initial_alloc;
1262 de_info(c, "Adding %s to ZIP file", f->name);
1263 f->btype = DBUF_TYPE_MEMBUF;
1264 f->max_len_hard = DE_MAX_MEMBUF_SIZE;
1265 if(is_directory) {
1266 // A directory entry is not expected to have any data associated
1267 // with it (besides the files it contains).
1268 initial_alloc = 16;
1270 else {
1271 initial_alloc = 65536;
1273 f->membuf_buf = de_malloc(c, initial_alloc);
1274 f->membuf_alloc = initial_alloc;
1275 f->write_memfile_to_zip_archive = 1;
1277 else if(c->output_style==DE_OUTPUTSTYLE_STDOUT) {
1278 de_info(c, "Writing %s to [stdout]", f->name);
1279 f->btype = DBUF_TYPE_STDOUT;
1280 // TODO: Should we increase f->max_len_hard?
1281 f->fp = stdout;
1283 else {
1284 de_info(c, "Writing %s", f->name);
1285 f->btype = DBUF_TYPE_OFILE;
1286 f->fp = de_fopen_for_write(c, f->name, msgbuf, sizeof(msgbuf),
1287 c->overwrite_mode, 0);
1289 if(!f->fp) {
1290 de_err(c, "Failed to write %s: %s", f->name, msgbuf);
1291 f->btype = DBUF_TYPE_NULL;
1292 c->serious_error_flag = 1;
1296 done:
1297 de_free(c, name_from_finfo);
1298 return f;
1301 static void do_on_dbuf_size_exceeded(dbuf *f)
1303 de_err(f->c, "Maximum %s size of %"I64_FMT" bytes exceeded",
1304 (f->btype==DBUF_TYPE_MEMBUF)?"membuf":"output file",
1305 f->max_len_hard);
1306 de_fatalerror(f->c);
1309 dbuf *dbuf_create_membuf(deark *c, i64 initialsize, unsigned int flags)
1311 dbuf *f;
1313 f = create_dbuf_lowlevel(c);
1314 f->btype = DBUF_TYPE_MEMBUF;
1315 f->max_len_hard = DE_MAX_MEMBUF_SIZE;
1317 if(initialsize>0) {
1318 if(initialsize > f->max_len_hard) {
1319 do_on_dbuf_size_exceeded(f);
1321 f->membuf_buf = de_malloc(c, initialsize);
1322 f->membuf_alloc = initialsize;
1325 if(flags&0x01) {
1326 dbuf_set_length_limit(f, initialsize);
1329 return f;
1332 static void membuf_append(dbuf *f, const u8 *m, i64 mlen)
1334 i64 new_alloc_size;
1336 if(f->has_len_limit) {
1337 if(f->len + mlen > f->len_limit) {
1338 mlen = f->len_limit - f->len;
1342 if(mlen<=0) return;
1344 if(mlen > f->membuf_alloc - f->len) {
1345 // Need to allocate more space
1346 new_alloc_size = (f->membuf_alloc + mlen)*2;
1347 if(new_alloc_size<1024) new_alloc_size=1024;
1348 if(new_alloc_size > f->max_len_hard) new_alloc_size = f->max_len_hard;
1349 de_dbg3(f->c, "increasing membuf size %"I64_FMT" -> %"I64_FMT,
1350 f->membuf_alloc, new_alloc_size);
1351 if(f->len + mlen > f->max_len_hard) {
1352 do_on_dbuf_size_exceeded(f);
1354 f->membuf_buf = de_realloc(f->c, f->membuf_buf, f->membuf_alloc, new_alloc_size);
1355 f->membuf_alloc = new_alloc_size;
1358 de_memcpy(&f->membuf_buf[f->len], m, (size_t)mlen);
1359 f->len += mlen;
1362 void dbuf_write(dbuf *f, const u8 *m, i64 len)
1364 if(len<=0) return;
1365 if(f->len + len > f->max_len_hard) {
1366 do_on_dbuf_size_exceeded(f);
1369 if(f->writelistener_cb) {
1370 // Note that the callback function can be changed at any time, so if we
1371 // ever decide to buffer these calls, precautions will be needed.
1372 f->writelistener_cb(f, f->userdata_for_writelistener, m, len);
1375 switch(f->btype) {
1376 case DBUF_TYPE_OFILE:
1377 case DBUF_TYPE_STDOUT:
1378 if(!f->fp) return;
1379 if(f->c->debug_level>=3) {
1380 de_dbg3(f->c, "writing %"I64_FMT" bytes to %s", len, f->name);
1382 fwrite(m, 1, (size_t)len, f->fp);
1383 f->len += len;
1384 return;
1385 case DBUF_TYPE_MEMBUF:
1386 if(f->c->debug_level>=3 && f->name) {
1387 de_dbg3(f->c, "appending %"I64_FMT" bytes to membuf %s", len, f->name);
1389 membuf_append(f, m, len);
1390 return;
1391 case DBUF_TYPE_ODBUF:
1392 dbuf_write(f->parent_dbuf, m, len);
1393 f->len += len;
1394 return;
1395 case DBUF_TYPE_CUSTOM:
1396 if(f->customwrite_fn) {
1397 f->customwrite_fn(f, f->userdata_for_customwrite, m, len);
1399 f->len += len;
1400 return;
1401 case DBUF_TYPE_NULL:
1402 f->len += len;
1403 return;
1406 de_internal_err_fatal(f->c, "Invalid output file type (%d)", f->btype);
1409 void dbuf_writebyte(dbuf *f, u8 n)
1411 dbuf_write(f, &n, 1);
1414 // Allowed only for membufs, and unmanaged output files.
1415 // For unmanaged output files, must be used with care, and should not be
1416 // mixed with dbuf_write().
1417 void dbuf_write_at(dbuf *f, i64 pos, const u8 *m, i64 len)
1419 if(len<1 || pos<0) return;
1421 if(pos + len > f->max_len_hard) {
1422 do_on_dbuf_size_exceeded(f);
1425 if(f->btype==DBUF_TYPE_MEMBUF) {
1426 i64 amt_overwrite, amt_newzeroes, amt_append;
1428 if(pos+len <= f->len) { // entirely within the current file
1429 amt_overwrite = len;
1430 amt_newzeroes = 0;
1431 amt_append = 0;
1433 else if(pos >= f->len) { // starts after the end of the current file
1434 amt_overwrite = 0;
1435 amt_newzeroes = pos - f->len;
1436 amt_append = len;
1438 else { // overlaps the end of the current file
1439 amt_overwrite = f->len - pos;
1440 amt_newzeroes = 0;
1441 amt_append = len - amt_overwrite;
1444 if(amt_overwrite>0) {
1445 de_memcpy(&f->membuf_buf[pos], m, (size_t)amt_overwrite);
1447 if(amt_newzeroes>0) {
1448 dbuf_write_zeroes(f, amt_newzeroes);
1451 if(amt_append>0) {
1452 membuf_append(f, &m[amt_overwrite], amt_append);
1455 else if(f->btype==DBUF_TYPE_OFILE && !f->is_managed) {
1456 i64 curpos = de_ftell(f->fp);
1457 if(pos != curpos) {
1458 de_fseek(f->fp, pos, SEEK_SET);
1460 fwrite(m, 1, (size_t)len, f->fp);
1461 if(pos+len > f->len) {
1462 f->len = pos+len;
1465 else if(f->btype==DBUF_TYPE_NULL) {
1466 if(pos+len > f->len) {
1467 f->len = pos+len;
1470 else {
1471 de_internal_err_fatal(f->c, "Attempt to seek on non-seekable stream");
1475 void dbuf_writebyte_at(dbuf *f, i64 pos, u8 n)
1477 if(f->btype==DBUF_TYPE_MEMBUF && pos>=0 && pos<f->len) {
1478 // Fast path when overwriting a byte in a membuf
1479 f->membuf_buf[pos] = n;
1480 return;
1483 dbuf_write_at(f, pos, &n, 1);
1486 void dbuf_write_run(dbuf *f, u8 n, i64 len)
1488 u8 buf[1024];
1489 i64 amt_left;
1490 i64 amt_to_write;
1492 de_memset(buf, n, (size_t)len<sizeof(buf) ? (size_t)len : sizeof(buf));
1493 amt_left = len;
1494 while(amt_left > 0) {
1495 if((size_t)amt_left<sizeof(buf))
1496 amt_to_write = amt_left;
1497 else
1498 amt_to_write = sizeof(buf);
1499 dbuf_write(f, buf, amt_to_write);
1500 amt_left -= amt_to_write;
1504 void dbuf_write_zeroes(dbuf *f, i64 len)
1506 dbuf_write_run(f, 0, len);
1509 // Make the membuf have exactly len bytes of content.
1510 void dbuf_truncate(dbuf *f, i64 desired_len)
1512 if(desired_len<0) desired_len=0;
1513 if(desired_len>f->len) {
1514 dbuf_write_zeroes(f, desired_len - f->len);
1516 else if(desired_len<f->len) {
1517 if(f->btype==DBUF_TYPE_MEMBUF) {
1518 f->len = desired_len;
1523 void de_writeu16le_direct(u8 *m, i64 n)
1525 m[0] = (u8)(n & 0x00ff);
1526 m[1] = (u8)((n & 0xff00)>>8);
1529 void de_writeu16be_direct(u8 *m, i64 n)
1531 m[0] = (u8)((n & 0xff00)>>8);
1532 m[1] = (u8)(n & 0x00ff);
1535 void dbuf_writeu16le(dbuf *f, i64 n)
1537 u8 buf[2];
1538 de_writeu16le_direct(buf, n);
1539 dbuf_write(f, buf, 2);
1542 void dbuf_writeu16be(dbuf *f, i64 n)
1544 u8 buf[2];
1545 de_writeu16be_direct(buf, n);
1546 dbuf_write(f, buf, 2);
1549 void dbuf_writei16le(dbuf *f, i64 n)
1551 if(n<0) {
1552 dbuf_writeu16le(f, n+65536);
1554 else {
1555 dbuf_writeu16le(f, n);
1559 void dbuf_writei16be(dbuf *f, i64 n)
1561 if(n<0) {
1562 dbuf_writeu16be(f, n+65536);
1564 else {
1565 dbuf_writeu16be(f, n);
1569 void de_writeu32be_direct(u8 *m, i64 n)
1571 m[0] = (u8)((n & 0xff000000)>>24);
1572 m[1] = (u8)((n & 0x00ff0000)>>16);
1573 m[2] = (u8)((n & 0x0000ff00)>>8);
1574 m[3] = (u8)(n & 0x000000ff);
1577 void dbuf_writeu32be(dbuf *f, i64 n)
1579 u8 buf[4];
1580 de_writeu32be_direct(buf, n);
1581 dbuf_write(f, buf, 4);
1584 void de_writeu32le_direct(u8 *m, i64 n)
1586 m[0] = (u8)(n & 0x000000ff);
1587 m[1] = (u8)((n & 0x0000ff00)>>8);
1588 m[2] = (u8)((n & 0x00ff0000)>>16);
1589 m[3] = (u8)((n & 0xff000000)>>24);
1592 void dbuf_writeu32le(dbuf *f, i64 n)
1594 u8 buf[4];
1595 de_writeu32le_direct(buf, n);
1596 dbuf_write(f, buf, 4);
1599 void dbuf_writei32le(dbuf *f, i64 n)
1601 if(n<0) {
1602 dbuf_writeu32le(f, n+0x100000000LL);
1604 else {
1605 dbuf_writeu32le(f, n);
1608 void dbuf_writei32be(dbuf *f, i64 n)
1610 if(n<0) {
1611 dbuf_writeu32be(f, n+0x100000000LL);
1613 else {
1614 dbuf_writeu32be(f, n);
1618 void de_writeu64le_direct(u8 *m, u64 n)
1620 de_writeu32le_direct(&m[0], (i64)(u32)(n&0xffffffffULL));
1621 de_writeu32le_direct(&m[4], (i64)(u32)(n>>32));
1624 void dbuf_writeu64le(dbuf *f, u64 n)
1626 u8 buf[8];
1627 de_writeu64le_direct(buf, n);
1628 dbuf_write(f, buf, 8);
1631 void dbuf_puts(dbuf *f, const char *sz)
1633 dbuf_write(f, (const u8*)sz, (i64)de_strlen(sz));
1636 // TODO: Remove the buffer size limitation?
1637 void dbuf_printf(dbuf *f, const char *fmt, ...)
1639 char buf[1024];
1640 va_list ap;
1642 va_start(ap, fmt);
1643 de_vsnprintf(buf, sizeof(buf), fmt, ap);
1644 va_end(ap);
1646 dbuf_puts(f, buf);
1649 void dbuf_flush(dbuf *f)
1651 if(f->btype==DBUF_TYPE_OFILE) {
1652 fflush(f->fp);
1656 dbuf *dbuf_open_input_file(deark *c, const char *fn)
1658 dbuf *f;
1659 unsigned int returned_flags = 0;
1660 char msgbuf[200];
1662 if(!fn) {
1663 c->serious_error_flag = 1;
1664 return NULL;
1666 f = create_dbuf_lowlevel(c);
1667 f->btype = DBUF_TYPE_IFILE;
1668 f->cache_policy = DE_CACHE_POLICY_ENABLED;
1670 f->fp = de_fopen_for_read(c, fn, &f->len, msgbuf, sizeof(msgbuf), &returned_flags);
1672 if(!f->fp) {
1673 de_err(c, "Can't read %s: %s", fn, msgbuf);
1674 de_free(c, f);
1675 c->serious_error_flag = 1;
1676 return NULL;
1679 if(returned_flags & 0x1) {
1680 // This "file" is actually a pipe.
1681 f->btype = DBUF_TYPE_FIFO;
1682 f->cache_policy = DE_CACHE_POLICY_NONE;
1683 populate_cache_from_pipe(f);
1686 if(!f->cache && f->cache_policy==DE_CACHE_POLICY_ENABLED) {
1687 populate_cache(f);
1690 return f;
1693 dbuf *dbuf_open_input_stdin(deark *c)
1695 dbuf *f;
1697 f = create_dbuf_lowlevel(c);
1698 f->btype = DBUF_TYPE_STDIN;
1700 // Set to NONE, to make sure we don't try to auto-populate the cache later.
1701 f->cache_policy = DE_CACHE_POLICY_NONE;
1703 populate_cache_from_pipe(f);
1705 return f;
1708 dbuf *dbuf_open_input_subfile(dbuf *parent, i64 offset, i64 size)
1710 dbuf *f;
1711 deark *c;
1713 c = parent->c;
1714 f = create_dbuf_lowlevel(c);
1715 f->btype = DBUF_TYPE_IDBUF;
1716 f->parent_dbuf = parent;
1717 f->offset_into_parent_dbuf = offset;
1718 f->len = size;
1719 return f;
1722 dbuf *dbuf_create_custom_dbuf(deark *c, i64 apparent_size, unsigned int flags)
1724 dbuf *f;
1726 f = create_dbuf_lowlevel(c);
1727 f->btype = DBUF_TYPE_CUSTOM;
1728 f->len = apparent_size;
1729 f->max_len_hard = DE_DUMMY_MAX_FILE_SIZE;
1730 return f;
1733 void dbuf_set_writelistener(dbuf *f, de_writelistener_cb_type fn, void *userdata)
1735 f->userdata_for_writelistener = userdata;
1736 f->writelistener_cb = fn;
1739 void dbuf_close(dbuf *f)
1741 deark *c;
1742 if(!f) return;
1743 c = f->c;
1745 if(f->btype==DBUF_TYPE_OFILE || f->btype==DBUF_TYPE_STDOUT) {
1746 c->total_output_size += f->len;
1749 if(f->btype==DBUF_TYPE_MEMBUF && f->write_memfile_to_zip_archive) {
1750 de_zip_add_file_to_archive(c, f);
1751 if(f->name) {
1752 de_dbg3(c, "closing memfile %s", f->name);
1755 else if(f->writing_to_tar_archive) {
1756 de_tar_end_member_file(c, f);
1759 switch(f->btype) {
1760 case DBUF_TYPE_IFILE:
1761 case DBUF_TYPE_OFILE:
1762 if(f->name) {
1763 de_dbg3(c, "closing file %s", f->name);
1765 de_fclose(f->fp);
1766 f->fp = NULL;
1768 if(f->btype==DBUF_TYPE_OFILE && f->is_managed) {
1769 de_update_file_attribs(f, c->preserve_file_times);
1771 break;
1772 case DBUF_TYPE_FIFO:
1773 de_fclose(f->fp);
1774 f->fp = NULL;
1775 break;
1776 case DBUF_TYPE_STDOUT:
1777 if(f->name && f->is_managed) {
1778 de_dbg3(c, "finished writing %s to stdout", f->name);
1780 else if(!f->is_managed) {
1781 de_dbg3(c, "finished writing %s", f->name);
1783 f->fp = NULL;
1784 break;
1785 case DBUF_TYPE_MEMBUF:
1786 case DBUF_TYPE_IDBUF:
1787 case DBUF_TYPE_ODBUF:
1788 case DBUF_TYPE_STDIN:
1789 case DBUF_TYPE_CUSTOM:
1790 case DBUF_TYPE_NULL:
1791 break;
1792 default:
1793 de_internal_err_nonfatal(c, "Don't know how to close this type of file (%d)", f->btype);
1796 de_free(c, f->membuf_buf);
1797 de_free(c, f->name);
1798 de_free(c, f->cache);
1799 if(f->fi_copy) de_finfo_destroy(c, f->fi_copy);
1800 de_free(c, f);
1802 if(c->total_output_size > c->max_total_output_size) {
1803 // FIXME: Since we only do this check when a file is closed, it can
1804 // potentially be subverted in the (rare) case that Deark has multiple
1805 // output files open simultanously.
1806 de_err(c, "Maximum total output size of %"I64_FMT" bytes exceeded",
1807 c->max_total_output_size);
1808 de_fatalerror(c);
1812 void dbuf_empty(dbuf *f)
1814 if(f->btype == DBUF_TYPE_MEMBUF) {
1815 f->len = 0;
1819 // Provides direct (presumably read-only) access to the memory in a membuf.
1820 // Use with care: The memory is still owned by the dbuf.
1821 // Note: Another, arguably safer, way to do this is to use dbuf_buffered_read().
1822 const u8 *dbuf_get_membuf_direct_ptr(dbuf *f)
1824 if(f->btype != DBUF_TYPE_MEMBUF) return NULL;
1825 return f->membuf_buf;
1828 // Search a section of a dbuf for a given byte.
1829 // 'haystack_len' is the number of bytes to search.
1830 // Returns 0 if not found.
1831 // If found, sets *foundpos to the position in the file where it was found
1832 // (not relative to startpos).
1833 int dbuf_search_byte(dbuf *f, const u8 b, i64 startpos,
1834 i64 haystack_len, i64 *foundpos)
1836 i64 i;
1838 for(i=0; i<haystack_len; i++) {
1839 if(b == dbuf_getbyte(f, startpos+i)) {
1840 *foundpos = startpos+i;
1841 return 1;
1844 return 0;
1847 struct search_ctx {
1848 const u8 *needle;
1849 i64 needle_len;
1850 int foundflag;
1851 i64 foundpos_rel;
1854 static int search_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
1855 i64 buf_len)
1857 struct search_ctx *sctx = (struct search_ctx*)brctx->userdata;
1858 i64 i;
1859 i64 num_starting_positions_to_check;
1861 if(buf_len < sctx->needle_len) return 0;
1862 num_starting_positions_to_check = buf_len + 1 - sctx->needle_len;
1864 for(i=0; i<num_starting_positions_to_check; i++) {
1865 if(sctx->needle[0]==buf[i] &&
1866 !de_memcmp(sctx->needle, &buf[i], (size_t)sctx->needle_len))
1868 sctx->foundpos_rel = brctx->offset+i;
1869 sctx->foundflag = 1;
1870 return 0;
1874 if(brctx->eof_flag) return 0;
1875 brctx->bytes_consumed = num_starting_positions_to_check;
1876 return 1;
1879 // Search a section of a dbuf for a given byte sequence.
1881 // This function is inefficient, but it's good enough for Deark's needs.
1882 // Maximum 'needle_len' is DE_BUFFERED_READ_MIN_BLKSIZE bytes, but it's expected to
1883 // be quite short. If it gets close to the maximum, the search could get very
1884 // inefficient.
1886 // 'haystack_len' is the number of bytes to search in (the sequence must be completely
1887 // within that range, not just start there).
1888 // Returns 0 if not found.
1889 // If found, sets *foundpos to the position in the file where it was found
1890 // (not relative to startpos).
1891 int dbuf_search(dbuf *f, const u8 *needle, i64 needle_len,
1892 i64 startpos, i64 haystack_len, i64 *foundpos)
1894 int retval = 0;
1895 struct search_ctx sctx;
1897 *foundpos = 0;
1899 if(startpos < 0) {
1900 haystack_len += startpos;
1901 if(haystack_len < 0) {
1902 goto done;
1904 startpos = 0;
1906 if(startpos > f->len) {
1907 goto done;
1909 if(haystack_len > f->len - startpos) {
1910 haystack_len = f->len - startpos;
1912 if(needle_len > haystack_len) {
1913 goto done;
1915 if(needle_len > DE_BUFFERED_READ_MIN_BLKSIZE) {
1916 goto done;
1918 if(needle_len<1) {
1919 retval = 1;
1920 *foundpos = startpos;
1921 goto done;
1924 de_zeromem(&sctx, sizeof(struct search_ctx));
1925 sctx.needle = needle;
1926 sctx.needle_len = needle_len;
1927 (void)dbuf_buffered_read(f, startpos, haystack_len, search_cbfn, (void*)&sctx);
1928 if(sctx.foundflag) {
1929 *foundpos = startpos + sctx.foundpos_rel;
1930 retval = 1;
1933 done:
1934 return retval;
1937 // Search for the aligned pair of 0x00 bytes that marks the end of a UTF-16 string.
1938 // Endianness doesn't matter, because we're only looking for 0x00 0x00.
1939 // The returned 'bytes_consumed' is in bytes, and includes the 2 bytes for the NUL
1940 // terminator.
1941 // Returns 0 if the NUL is not found, in which case *bytes_consumed is not
1942 // meaningful.
1943 int dbuf_get_utf16_NULterm_len(dbuf *f, i64 pos1, i64 bytes_avail,
1944 i64 *bytes_consumed)
1946 i64 x;
1947 i64 pos = pos1;
1949 *bytes_consumed = bytes_avail;
1950 while(1) {
1951 if(pos1+bytes_avail-pos < 2) {
1952 break;
1954 x = dbuf_getu16le(f, pos);
1955 pos += 2;
1956 if(x==0) {
1957 *bytes_consumed = pos - pos1;
1958 return 1;
1961 return 0;
1964 int dbuf_find_line(dbuf *f, i64 pos1, i64 *pcontent_len, i64 *ptotal_len)
1966 u8 b0, b1;
1967 i64 pos;
1968 i64 eol_pos = 0;
1969 i64 eol_size = 0;
1971 *pcontent_len = 0;
1972 *ptotal_len = 0;
1973 if(pos1<0 || pos1>=f->len) {
1974 return 0;
1977 pos = pos1;
1979 while(1) {
1980 if(pos>=f->len) {
1981 // No EOL.
1982 eol_pos = pos;
1983 eol_size = 0;
1984 break;
1987 b0 = dbuf_getbyte(f, pos);
1989 if(b0==0x0d) {
1990 eol_pos = pos;
1991 // Look ahead at the next byte.
1992 b1 = dbuf_getbyte(f, pos+1);
1993 if(b1==0x0a) {
1994 // CR+LF
1995 eol_size = 2;
1996 break;
1998 // LF
1999 eol_pos = pos;
2000 eol_size = 1;
2001 break;
2003 else if(b0==0x0a) {
2004 eol_pos = pos;
2005 eol_size = 1;
2006 break;
2009 pos++;
2012 *pcontent_len = eol_pos - pos1;
2013 *ptotal_len = *pcontent_len + eol_size;
2015 return (*ptotal_len > 0);
2018 // Enforce a maximum size when writing to a dbuf.
2019 // Attempting to write more than this is a silent no-op.
2020 // May be valid only for memory buffers.
2021 void dbuf_set_length_limit(dbuf *f, i64 max_len)
2023 f->has_len_limit = 1;
2024 f->len_limit = max_len;
2027 int dbuf_has_utf8_bom(dbuf *f, i64 pos)
2029 return !dbuf_memcmp(f, pos, "\xef\xbb\xbf", 3);
2032 // Write the contents of a dbuf to a file.
2033 // This function intended for use in development/debugging.
2034 int dbuf_dump_to_file(dbuf *inf, const char *fn)
2036 dbuf *outf;
2037 deark *c = inf->c;
2039 outf = dbuf_create_unmanaged_file(c, fn, DE_OVERWRITEMODE_STANDARD, 0);
2040 dbuf_copy(inf, 0, inf->len, outf);
2041 dbuf_close(outf);
2042 return 1;
2045 static void reverse_fourcc(u8 *buf, int nbytes)
2047 size_t k;
2049 for(k=0; k<((size_t)nbytes)/2; k++) {
2050 u8 tmpc;
2051 tmpc = buf[k];
2052 buf[k] = buf[(size_t)nbytes-1-k];
2053 buf[(size_t)nbytes-1-k] = tmpc;
2057 // Though we call it a "fourcc", we support 'nbytes' from 1 to 4.
2058 void dbuf_read_fourcc(dbuf *f, i64 pos, struct de_fourcc *fcc,
2059 int nbytes, unsigned int flags)
2061 if(nbytes<1 || nbytes>4) return;
2063 de_zeromem(fcc->bytes, 4);
2064 dbuf_read(f, fcc->bytes, pos, (i64)nbytes);
2065 if(flags&DE_4CCFLAG_REVERSED) {
2066 reverse_fourcc(fcc->bytes, nbytes);
2069 fcc->id = (u32)de_getu32be_direct(fcc->bytes);
2070 if(nbytes<4) {
2071 fcc->id >>= (4-(unsigned int)nbytes)*8;
2074 de_bytes_to_printable_sz(fcc->bytes, (i64)nbytes,
2075 fcc->id_sanitized_sz, sizeof(fcc->id_sanitized_sz),
2076 0, DE_ENCODING_ASCII);
2077 de_bytes_to_printable_sz(fcc->bytes, (i64)nbytes,
2078 fcc->id_dbgstr, sizeof(fcc->id_dbgstr),
2079 DE_CONVFLAG_ALLOW_HL, DE_ENCODING_ASCII);
2082 static int buffered_read_internal(struct de_bufferedreadctx *brctx,
2083 dbuf *f, i64 pos1, i64 len, de_buffered_read_cbfn cbfn)
2085 int retval = 0;
2086 i64 pos = pos1; // Absolute pos of next byte to read from f
2087 i64 offs_of_first_byte_in_buf; // Relative to pos1, where in f is buf[0]?
2088 i64 num_unconsumed_bytes_in_buf;
2089 #define BRBUFLEN 4096 // Must be >= DE_BUFFERED_READ_MIN_BLKSIZE
2090 u8 buf[BRBUFLEN];
2092 num_unconsumed_bytes_in_buf = 0;
2093 offs_of_first_byte_in_buf = 0;
2095 while(1) {
2096 i64 nbytes_avail_to_read;
2097 i64 bytestoread;
2098 int ret;
2100 nbytes_avail_to_read = pos1+len-pos;
2101 if(nbytes_avail_to_read<1 && num_unconsumed_bytes_in_buf<1) {
2102 break;
2105 // max bytes that will fit in buf:
2106 bytestoread = BRBUFLEN-num_unconsumed_bytes_in_buf;
2108 // max bytes available to read:
2109 if(bytestoread >= nbytes_avail_to_read) {
2110 bytestoread = nbytes_avail_to_read;
2111 brctx->eof_flag = 1;
2113 else {
2114 brctx->eof_flag = 0;
2117 dbuf_read(f, &buf[num_unconsumed_bytes_in_buf], pos, bytestoread);
2118 pos += bytestoread;
2119 num_unconsumed_bytes_in_buf += bytestoread;
2121 brctx->offset = offs_of_first_byte_in_buf;
2122 brctx->bytes_consumed = num_unconsumed_bytes_in_buf;
2123 ret = cbfn(brctx, buf, num_unconsumed_bytes_in_buf);
2124 if(!ret) goto done;
2125 if(brctx->bytes_consumed<1 || brctx->bytes_consumed>num_unconsumed_bytes_in_buf) {
2126 goto done;
2129 if(brctx->bytes_consumed < num_unconsumed_bytes_in_buf) {
2130 // cbfn didn't consume all bytes
2131 // TODO: For better efficiency, we could leave the buffer as it is until
2132 // the unconsumed byte count drops below DE_BUFFERED_READ_MIN_BLKSIZE.
2133 // But that's only useful if some consumers consume only a small number of bytes.
2134 de_memmove(buf, &buf[brctx->bytes_consumed],
2135 (size_t)(num_unconsumed_bytes_in_buf-brctx->bytes_consumed));
2136 num_unconsumed_bytes_in_buf -= brctx->bytes_consumed;
2138 else {
2139 num_unconsumed_bytes_in_buf = 0;
2141 offs_of_first_byte_in_buf += brctx->bytes_consumed;
2143 retval = 1;
2144 done:
2145 return retval;
2148 // Special case where all bytes are already in memory
2149 static int buffered_read_from_mem(struct de_bufferedreadctx *brctx,
2150 dbuf *f, const u8 *mem, i64 pos1, i64 len, de_buffered_read_cbfn cbfn)
2152 int retval = 0;
2153 i64 total_nbytes_consumed = 0;
2155 while(1) {
2156 int ret;
2157 i64 nbytes_to_send;
2159 nbytes_to_send = len - total_nbytes_consumed;
2160 if(nbytes_to_send<1) break;
2161 brctx->bytes_consumed = nbytes_to_send;
2162 brctx->offset = total_nbytes_consumed;
2163 brctx->eof_flag = 1;
2165 ret = cbfn(brctx, &mem[pos1+total_nbytes_consumed],
2166 nbytes_to_send);
2167 if(!ret) goto done;
2168 if(brctx->bytes_consumed<1 || brctx->bytes_consumed>nbytes_to_send) {
2169 goto done;
2171 total_nbytes_consumed += brctx->bytes_consumed;
2173 retval = 1;
2174 done:
2175 return retval;
2178 static int buffered_read_zero_len(struct de_bufferedreadctx *brctx,
2179 de_buffered_read_cbfn cbfn)
2181 const u8 dummybuf[1] = { 0 };
2182 int ret;
2184 brctx->offset = 0;
2185 brctx->eof_flag = 1;
2186 brctx->bytes_consumed = 0;
2187 ret = cbfn(brctx, dummybuf, 0);
2188 return ret?1:0;
2191 // dbuf_buffered_read:
2192 // Read a slice of a dbuf, and pass its data to a callback function, one
2193 // segment at a time.
2194 // cbfn: Caller-implemented callback function.
2195 // - It must be prepared for an arbitrarily large number of bytes to be passed
2196 // to it at once (though it does not have to consume them all).
2197 // - It must consume at least 1 byte, unless 0 bytes were passed to it.
2198 // - If it does not consume all the bytes passed to it, it must set
2199 // brctx->bytes_consumed.
2200 // - It must return nonzero normally, 0 to abort.
2201 // We guarantee that:
2202 // - brctx->eof_flag will be nonzero if and only if there is no data after this.
2203 // - If eof_flag is not set, at least DE_BUFFERED_READ_MIN_BLKSIZE bytes will
2204 // be provided.
2205 // - If the caller supplies 0 bytes of input data, the callback function will be
2206 // called exactly once. This is the only case where the callback will be
2207 // called with buf_len==0.
2208 // - If the source dbuf is a MEMBUF, and the requested bytes are all in range,
2209 // then all requested bytes will be provided in the first call to the callback
2210 // function.
2211 // Return value: 1 normally, 0 if the callback function ever returned 0.
2212 int dbuf_buffered_read(dbuf *f, i64 pos1, i64 len,
2213 de_buffered_read_cbfn cbfn, void *userdata)
2215 struct de_bufferedreadctx brctx;
2217 brctx.c = f->c;
2218 brctx.userdata = userdata;
2220 if(len<=0) { // Get this special case out of the way.
2221 return buffered_read_zero_len(&brctx, cbfn);
2224 // Use an optimized routine if all the data we need to read is already in memory.
2225 if(f->cache && (pos1>=0) && (pos1+len<=f->cache_bytes_used)) {
2226 return buffered_read_from_mem(&brctx, f, f->cache, pos1, len, cbfn);
2229 // Not an "optimization", since we promise this behavior for MEMBUFs.
2230 if(f->btype==DBUF_TYPE_MEMBUF && (pos1>=0) && (pos1+len<=f->len)) {
2231 return buffered_read_from_mem(&brctx, f, f->membuf_buf, pos1, len, cbfn);
2234 // The general case:
2235 return buffered_read_internal(&brctx, f, pos1, len, cbfn);
2238 int de_is_all_zeroes(const u8 *b, i64 n)
2240 i64 k;
2241 for(k=0; k<n; k++) {
2242 if(b[k]!=0) return 0;
2244 return 1;
2247 static int is_all_zeroes_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
2248 i64 buf_len)
2250 return de_is_all_zeroes(buf, buf_len);
2253 // Returns 1 if the given slice has only bytes with value 0.
2254 int dbuf_is_all_zeroes(dbuf *f, i64 pos, i64 len)
2256 return dbuf_buffered_read(f, pos, len, is_all_zeroes_cbfn, NULL);
2259 void de_bitbuf_lowelevel_add_byte(struct de_bitbuf_lowlevel *bbll, u8 n)
2261 if(bbll->nbits_in_bitbuf>56) return;
2262 if(bbll->is_lsb==0) {
2263 bbll->bit_buf = (bbll->bit_buf<<8) | n;
2265 else {
2266 bbll->bit_buf |= (u64)n << bbll->nbits_in_bitbuf;
2268 bbll->nbits_in_bitbuf += 8;
2271 u64 de_bitbuf_lowelevel_get_bits(struct de_bitbuf_lowlevel *bbll, UI nbits)
2273 u64 n;
2274 u64 mask;
2276 if(nbits > bbll->nbits_in_bitbuf) return 0;
2277 mask = ((u64)1 << nbits)-1;
2278 if(bbll->is_lsb==0) {
2279 bbll->nbits_in_bitbuf -= nbits;
2280 n = (bbll->bit_buf >> bbll->nbits_in_bitbuf) & mask;
2282 else {
2283 n = bbll->bit_buf & mask;
2284 bbll->bit_buf >>= nbits;
2285 bbll->nbits_in_bitbuf -= nbits;
2287 return n;
2290 void de_bitbuf_lowelevel_empty(struct de_bitbuf_lowlevel *bbll)
2292 bbll->bit_buf = 0;
2293 bbll->nbits_in_bitbuf = 0;
2296 u64 de_bitreader_getbits(struct de_bitreader *bitrd, UI nbits)
2298 if(bitrd->eof_flag) return 0;
2299 if(nbits==0) {
2300 // TODO: Decide if we always want to do this. Could risk infinite loops
2301 // with this successful no-op.
2302 return 0;
2304 if(nbits > 57) {
2305 bitrd->eof_flag = 1;
2306 return 0;
2309 while(bitrd->bbll.nbits_in_bitbuf < nbits) {
2310 u8 b;
2312 if(bitrd->curpos >= bitrd->endpos) {
2313 bitrd->eof_flag = 1;
2314 return 0;
2316 b = dbuf_getbyte_p(bitrd->f, &bitrd->curpos);
2317 de_bitbuf_lowelevel_add_byte(&bitrd->bbll, b);
2320 return de_bitbuf_lowelevel_get_bits(&bitrd->bbll, nbits);
2323 char *de_bitreader_describe_curpos(struct de_bitreader *bitrd, char *buf, size_t buf_len)
2325 i64 curpos;
2326 UI nwholebytes;
2327 UI nbits;
2329 nwholebytes = (i64)(bitrd->bbll.nbits_in_bitbuf / 8);
2330 nbits = bitrd->bbll.nbits_in_bitbuf % 8;
2331 curpos = bitrd->curpos - (i64)nwholebytes;
2333 if(nbits==0) {
2334 de_snprintf(buf, buf_len, "%"I64_FMT, curpos);
2336 else {
2337 de_snprintf(buf, buf_len, "%"I64_FMT"+%ubits", curpos-1, (UI)(8-nbits));
2339 return buf;