Split misc.c into two files
[deark.git] / src / deark-dbuf.c
blob73986fa716f91f1692e2da3b7eb8e13d814b6c7c
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // deark-dbuf.c
6 //
7 // Functions related to the dbuf object.
9 #define DE_NOT_IN_MODULE
10 #include "deark-config.h"
11 #include "deark-private.h"
13 #define DE_DUMMY_MAX_FILE_SIZE (1LL<<56)
14 #define DE_MAX_MEMBUF_SIZE 2000000000
15 #define DE_CACHE_SIZE 262144
17 // Fill the cache that remembers the first part of the file.
18 // TODO: We should probably use memory-mapped files instead when possible,
19 // but this is simple and portable, and does most of what we need.
20 static void populate_cache(dbuf *f)
22 i64 bytes_to_read;
23 i64 bytes_read;
25 if(f->btype!=DBUF_TYPE_IFILE) return;
27 bytes_to_read = DE_CACHE_SIZE;
28 if(f->len < bytes_to_read) {
29 bytes_to_read = f->len;
32 f->cache = de_malloc(f->c, DE_CACHE_SIZE);
33 de_fseek(f->fp, 0, SEEK_SET);
34 bytes_read = fread(f->cache, 1, (size_t)bytes_to_read, f->fp);
35 f->cache_bytes_used = bytes_read;
36 f->file_pos_known = 0;
39 // Read all data from stdin (or a named pipe) into memory.
40 static void populate_cache_from_pipe(dbuf *f)
42 FILE *fp;
43 i64 cache_bytes_alloc = 0;
45 if(f->btype==DBUF_TYPE_STDIN) {
46 fp = stdin;
48 else if(f->btype==DBUF_TYPE_FIFO) {
49 fp = f->fp;
51 else {
52 return;
55 f->cache_bytes_used = 0;
57 while(1) {
58 i64 bytes_to_read, bytes_read;
60 if(f->cache_bytes_used >= cache_bytes_alloc) {
61 i64 old_cache_size, new_cache_size;
63 // Cache is full. Increase its size.
64 old_cache_size = cache_bytes_alloc;
65 new_cache_size = old_cache_size*2;
66 if(new_cache_size<DE_CACHE_SIZE) new_cache_size = DE_CACHE_SIZE;
67 f->cache = de_realloc(f->c, f->cache, old_cache_size, new_cache_size);
68 cache_bytes_alloc = new_cache_size;
71 // Try to read as many bytes as it would take to fill the cache.
72 bytes_to_read = cache_bytes_alloc - f->cache_bytes_used;
73 if(bytes_to_read<1) break; // Shouldn't happen
75 bytes_read = fread(&f->cache[f->cache_bytes_used], 1, (size_t)bytes_to_read, fp);
76 if(bytes_read<1 || bytes_read>bytes_to_read) break;
77 f->cache_bytes_used += bytes_read;
78 if(feof(fp) || ferror(fp)) break;
81 f->len = f->cache_bytes_used;
84 // Read len bytes, starting at file position pos, into buf.
85 // Unread bytes will be set to 0.
86 void dbuf_read(dbuf *f, u8 *buf, i64 pos, i64 len)
88 i64 bytes_read = 0;
89 i64 bytes_to_read;
90 deark *c;
92 c = f->c;
94 if(pos < 0) {
95 if((-pos) >= len) {
96 // All requested bytes are before the beginning of the file
97 de_zeromem(buf, (size_t)len);
98 return;
100 // Some requested bytes are before the beginning of the file.
101 // Zero out the ones that are:
102 de_zeromem(buf, (size_t)(-pos));
103 // And adjust the parameters:
104 buf += (-pos);
105 len -= (-pos);
106 pos = 0;
109 bytes_to_read = len;
110 if(pos >= f->len) {
111 bytes_to_read = 0;
113 else if(pos + bytes_to_read > f->len) {
114 bytes_to_read = f->len - pos;
117 if(bytes_to_read<1) {
118 goto done_read;
121 // If the data we need is all cached, get it from cache.
122 if(f->cache &&
123 pos >= 0 &&
124 pos + bytes_to_read <= f->cache_bytes_used)
126 de_memcpy(buf, &f->cache[pos], (size_t)bytes_to_read);
127 bytes_read = bytes_to_read;
128 goto done_read;
131 switch(f->btype) {
132 case DBUF_TYPE_IFILE:
133 if(!f->fp) {
134 de_internal_err_fatal(c, "File not open");
135 goto done_read;
138 // For performance reasons, don't call fseek if we're already at the
139 // right position.
140 if(!f->file_pos_known || f->file_pos!=pos) {
141 de_fseek(f->fp, pos, SEEK_SET);
144 bytes_read = fread(buf, 1, (size_t)bytes_to_read, f->fp);
146 f->file_pos = pos + bytes_read;
147 f->file_pos_known = 1;
148 break;
150 case DBUF_TYPE_IDBUF:
151 // Recursive call to the parent dbuf.
152 dbuf_read(f->parent_dbuf, buf, f->offset_into_parent_dbuf+pos, bytes_to_read);
154 // The parent dbuf always writes 'bytes_to_read' bytes.
155 bytes_read = bytes_to_read;
156 break;
158 case DBUF_TYPE_MEMBUF:
159 de_memcpy(buf, &f->membuf_buf[pos], (size_t)bytes_to_read);
160 bytes_read = bytes_to_read;
161 break;
163 default:
164 de_internal_err_fatal(c, "getbytes from this I/O type not implemented");
165 goto done_read;
168 done_read:
169 // Zero out any requested bytes that were not read.
170 if(bytes_read < len) {
171 de_zeromem(buf+bytes_read, (size_t)(len - bytes_read));
175 // A function that works a little more like a standard read/fread function than
176 // does dbuf_read. It returns the number of bytes read, won't read past end of
177 // file, and helps track the file position.
178 i64 dbuf_standard_read(dbuf *f, u8 *buf, i64 n, i64 *fpos)
180 i64 amt_to_read;
182 if(*fpos < 0 || *fpos >= f->len) return 0;
184 amt_to_read = n;
185 if(*fpos + amt_to_read > f->len) amt_to_read = f->len - *fpos;
186 dbuf_read(f, buf, *fpos, amt_to_read);
187 *fpos += amt_to_read;
188 return amt_to_read;
191 u8 dbuf_getbyte(dbuf *f, i64 pos)
193 if(pos<0 || pos>=f->len) return 0x00;
195 if(pos<f->cache_bytes_used) {
196 return f->cache[pos];
198 if(f->btype==DBUF_TYPE_MEMBUF) {
199 // Note that it is necessary to handle read+write dbuf types specially,
200 // so that the "cache2" feature isn't used.
201 return f->membuf_buf[pos];
204 // TODO: I don't like that cache2 exists, but without it some large images
205 // are decoded too slowly (especially on Windows), and I haven't figured out
206 // a solution I like better.
207 if(pos==f->cache2_pos) {
208 return f->cache2;
210 f->cache2_pos = pos;
211 dbuf_read(f, &f->cache2, pos, 1);
212 return f->cache2;
215 i64 de_geti8_direct(const u8 *m)
217 u8 b = m[0];
219 if(b<=127) return (i64)b;
220 return ((i64)b)-256;
223 i64 dbuf_geti8(dbuf *f, i64 pos)
225 u8 b;
227 b = dbuf_getbyte(f, pos);
228 return de_geti8_direct(&b);
231 u8 dbuf_getbyte_p(dbuf *f, i64 *ppos)
233 u8 b;
234 b = dbuf_getbyte(f, *ppos);
235 (*ppos)++;
236 return b;
239 static i64 dbuf_getuint_ext_be_direct(const u8 *m, unsigned int nbytes)
241 unsigned int k;
242 u64 val = 0;
244 for(k=0; k<nbytes; k++) {
245 if(val>0x00ffffffffffffffULL) return 0;
246 val = (val<<8) | (u64)m[k];
248 return (i64)val;
251 static i64 dbuf_getint_ext_be_direct(const u8 *m, unsigned int nbytes)
253 unsigned int k;
254 u64 val = 0;
256 // We can handle up to 8 arbitrary bytes. Any more have to be 0xff.
257 if(nbytes>8) {
258 for(k=0; k<nbytes-8; k++) {
259 if(m[k]!=0xff) return 0; // underflow
263 // Process bytes in order of increasing significance
264 for(k=0; k<8; k++) {
265 u8 byteval;
267 if(k<nbytes) {
268 byteval = m[nbytes-1-k];
270 else {
271 byteval = 0xff;
273 val |= ((u64)byteval) << (k*8);
275 return (i64)val;
278 static i64 dbuf_getuint_ext_le_direct(const u8 *m, unsigned int nbytes)
280 unsigned int k;
281 u64 val = 0;
283 for(k=0; k<nbytes; k++) {
284 if(m[k]!=0) {
285 if(k>7) return 0;
286 val |= ((u64)m[k])<<(k*8);
289 return (i64)val;
292 static i64 dbuf_getuint_ext_x(dbuf *f, i64 pos, unsigned int nbytes,
293 int is_le)
295 u8 m[24];
297 if(nbytes>(unsigned int)sizeof(m)) return 0;
298 dbuf_read(f, m, pos, (i64)nbytes);
299 if(is_le) {
300 return dbuf_getuint_ext_le_direct(m, nbytes);
302 return dbuf_getuint_ext_be_direct(m, nbytes);
305 static i64 dbuf_getint_ext_x(dbuf *f, i64 pos, unsigned int nbytes, int is_le)
307 u8 m[24];
309 if(nbytes>(unsigned int)sizeof(m)) return 0;
310 dbuf_read(f, m, pos, (i64)nbytes);
311 if(is_le) {
312 return 0; // TODO
314 return dbuf_getint_ext_be_direct(m, nbytes);
317 i64 de_getu16be_direct(const u8 *m)
319 return (i64)(((u32)m[1]) | (((u32)m[0])<<8));
322 i64 dbuf_getu16be(dbuf *f, i64 pos)
324 u8 m[2];
325 dbuf_read(f, m, pos, 2);
326 return de_getu16be_direct(m);
329 i64 dbuf_getu16be_p(dbuf *f, i64 *ppos)
331 u8 m[2];
332 dbuf_read(f, m, *ppos, 2);
333 (*ppos) += 2;
334 return de_getu16be_direct(m);
337 i64 de_getu16le_direct(const u8 *m)
339 return (i64)(((u32)m[0]) | (((u32)m[1])<<8));
342 i64 dbuf_getu16le(dbuf *f, i64 pos)
344 u8 m[2];
345 dbuf_read(f, m, pos, 2);
346 return de_getu16le_direct(m);
349 i64 dbuf_getu16le_p(dbuf *f, i64 *ppos)
351 u8 m[2];
352 dbuf_read(f, m, *ppos, 2);
353 (*ppos) += 2;
354 return de_getu16le_direct(m);
357 i64 dbuf_geti16be(dbuf *f, i64 pos)
359 i64 n;
360 n = dbuf_getu16be(f, pos);
361 if(n>=32768) n -= 65536;
362 return n;
365 i64 dbuf_geti16le(dbuf *f, i64 pos)
367 i64 n;
368 n = dbuf_getu16le(f, pos);
369 if(n>=32768) n -= 65536;
370 return n;
373 i64 dbuf_geti16be_p(dbuf *f, i64 *ppos)
375 i64 n;
376 n = dbuf_geti16be(f, *ppos);
377 (*ppos) += 2;
378 return n;
381 i64 dbuf_geti16le_p(dbuf *f, i64 *ppos)
383 i64 n;
384 n = dbuf_geti16le(f, *ppos);
385 (*ppos) += 2;
386 return n;
389 i64 de_getu32be_direct(const u8 *m)
391 return (i64)(((u32)m[3]) | (((u32)m[2])<<8) |
392 (((u32)m[1])<<16) | (((u32)m[0])<<24));
395 i64 dbuf_getu32be(dbuf *f, i64 pos)
397 u8 m[4];
398 dbuf_read(f, m, pos, 4);
399 return de_getu32be_direct(m);
402 i64 dbuf_getu32be_p(dbuf *f, i64 *ppos)
404 u8 m[4];
405 dbuf_read(f, m, *ppos, 4);
406 (*ppos) += 4;
407 return de_getu32be_direct(m);
410 i64 de_getu32le_direct(const u8 *m)
412 return (i64)(((u32)m[0]) | (((u32)m[1])<<8) |
413 (((u32)m[2])<<16) | (((u32)m[3])<<24));
416 i64 dbuf_getu32le(dbuf *f, i64 pos)
418 u8 m[4];
419 dbuf_read(f, m, pos, 4);
420 return de_getu32le_direct(m);
423 i64 dbuf_getu32le_p(dbuf *f, i64 *ppos)
425 u8 m[4];
426 dbuf_read(f, m, *ppos, 4);
427 (*ppos) += 4;
428 return de_getu32le_direct(m);
431 i64 dbuf_geti32be(dbuf *f, i64 pos)
433 i64 n;
434 n = dbuf_getu32be(f, pos);
435 return (i64)(i32)(u32)n;
438 i64 dbuf_geti32le(dbuf *f, i64 pos)
440 i64 n;
441 n = dbuf_getu32le(f, pos);
442 return (i64)(i32)(u32)n;
445 i64 dbuf_geti32be_p(dbuf *f, i64 *ppos)
447 i64 n;
448 n = dbuf_geti32be(f, *ppos);
449 (*ppos) += 4;
450 return n;
453 i64 dbuf_geti32le_p(dbuf *f, i64 *ppos)
455 i64 n;
456 n = dbuf_geti32le(f, *ppos);
457 (*ppos) += 4;
458 return n;
461 u64 de_getu64be_direct(const u8 *m)
463 unsigned int i;
464 u64 val = 0;
466 for(i=0; i<8; i++) {
467 val |= ((u64)m[i])<<((7-i)*8);
469 return val;
472 i64 de_geti64be_direct(const u8 *m)
474 return (i64)de_getu64be_direct(m);
477 i64 dbuf_geti64be(dbuf *f, i64 pos)
479 u8 m[8];
480 dbuf_read(f, m, pos, 8);
481 return de_geti64be_direct(m);
484 u64 de_getu64le_direct(const u8 *m)
486 unsigned int i;
487 u64 val = 0;
489 for(i=0; i<8; i++) {
490 val |= ((u64)m[i])<<(i*8);
492 return val;
495 i64 de_geti64le_direct(const u8 *m)
497 return (i64)de_getu64le_direct(m);
500 i64 dbuf_geti64le(dbuf *f, i64 pos)
502 u8 m[8];
503 dbuf_read(f, m, pos, 8);
504 return de_geti64le_direct(m);
507 i64 dbuf_getu16x(dbuf *f, i64 pos, int is_le)
509 if(is_le) return dbuf_getu16le(f, pos);
510 return dbuf_getu16be(f, pos);
513 i64 dbuf_geti16x(dbuf *f, i64 pos, int is_le)
515 if(is_le) return dbuf_geti16le(f, pos);
516 return dbuf_geti16be(f, pos);
519 i64 dbuf_getu32x(dbuf *f, i64 pos, int is_le)
521 if(is_le) return dbuf_getu32le(f, pos);
522 return dbuf_getu32be(f, pos);
525 i64 dbuf_geti32x(dbuf *f, i64 pos, int is_le)
527 if(is_le) return dbuf_geti32le(f, pos);
528 return dbuf_geti32be(f, pos);
531 i64 dbuf_geti64x(dbuf *f, i64 pos, int is_le)
533 if(is_le) return dbuf_geti64le(f, pos);
534 return dbuf_geti64be(f, pos);
537 u64 dbuf_getu64be(dbuf *f, i64 pos)
539 u8 m[8];
540 dbuf_read(f, m, pos, 8);
541 return de_getu64be_direct(m);
544 u64 dbuf_getu64le(dbuf *f, i64 pos)
546 u8 m[8];
547 dbuf_read(f, m, pos, 8);
548 return de_getu64le_direct(m);
551 u64 dbuf_getu64x(dbuf *f, i64 pos, int is_le)
553 if(is_le) return dbuf_getu64le(f, pos);
554 return dbuf_getu64be(f, pos);
557 i64 dbuf_getint_ext(dbuf *f, i64 pos, unsigned int nbytes,
558 int is_le, int is_signed)
560 if(is_signed) {
561 // TODO: Extend this to any number of bytes, 1-8.
562 switch(nbytes) {
563 case 1: return (i64)(signed char)dbuf_getbyte(f, pos); break;
564 case 2: return dbuf_geti16x(f, pos, is_le); break;
565 case 4: return dbuf_geti32x(f, pos, is_le); break;
566 case 8: return dbuf_geti64x(f, pos, is_le); break;
567 default:
568 return dbuf_getint_ext_x(f, pos, nbytes, is_le);
571 else {
572 switch(nbytes) {
573 case 1: return (i64)dbuf_getbyte(f, pos); break;
574 case 2: return dbuf_getu16x(f, pos, is_le); break;
575 case 4: return dbuf_getu32x(f, pos, is_le); break;
576 case 8: return dbuf_geti64x(f, pos, is_le); break;
577 default:
578 return dbuf_getuint_ext_x(f, pos, nbytes, is_le);
581 return 0;
584 static void init_fltpt_decoder(deark *c)
586 unsigned int x = 1;
587 char b = 0;
589 c->can_decode_fltpt = 0;
590 if(sizeof(float)!=4) return;
591 if(sizeof(double)!=8) return;
592 c->can_decode_fltpt = 1;
594 de_memcpy(&b, &x, 1);
595 if(b==0)
596 c->host_is_le = 0;
597 else
598 c->host_is_le = 1;
601 double de_getfloat32x_direct(deark *c, const u8 *m, int is_le)
603 char buf[4];
604 float val = 0.0;
606 if(c->can_decode_fltpt<0) {
607 init_fltpt_decoder(c);
609 if(!c->can_decode_fltpt) return 0.0;
611 // FIXME: This assumes that the native floating point format is
612 // IEEE 754, but that does not have to be the case.
614 de_memcpy(buf, m, 4);
616 if(is_le != c->host_is_le) {
617 int i;
618 char tmpc;
619 // Reverse order of bytes
620 for(i=0; i<2; i++) {
621 tmpc = buf[i]; buf[i] = buf[3-i]; buf[3-i] = tmpc;
625 de_memcpy(&val, buf, 4);
626 return (double)val;
629 double dbuf_getfloat32x(dbuf *f, i64 pos, int is_le)
631 u8 buf[4];
632 dbuf_read(f, buf, pos, 4);
633 return de_getfloat32x_direct(f->c, buf, is_le);
636 double de_getfloat64x_direct(deark *c, const u8 *m, int is_le)
638 char buf[8];
639 double val = 0.0;
641 if(c->can_decode_fltpt<0) {
642 init_fltpt_decoder(c);
644 if(!c->can_decode_fltpt) return 0.0;
646 de_memcpy(buf, m, 8);
648 if(is_le != c->host_is_le) {
649 int i;
650 char tmpc;
651 // Reverse order of bytes
652 for(i=0; i<4; i++) {
653 tmpc = buf[i]; buf[i] = buf[7-i]; buf[7-i] = tmpc;
657 de_memcpy(&val, buf, 8);
658 return (double)val;
661 double dbuf_getfloat64x(dbuf *f, i64 pos, int is_le)
663 u8 buf[8];
664 dbuf_read(f, buf, pos, 8);
665 return de_getfloat64x_direct(f->c, buf, is_le);
668 int dbuf_read_ascii_number(dbuf *f, i64 pos, i64 fieldsize,
669 int base, i64 *value)
671 char buf[32];
673 *value = 0;
674 if(fieldsize>(i64)(sizeof(buf)-1)) return 0;
676 dbuf_read(f, (u8*)buf, pos, fieldsize);
677 buf[fieldsize] = '\0';
679 *value = de_strtoll(buf, NULL, base);
680 return 1;
683 de_color dbuf_getRGB(dbuf *f, i64 pos, unsigned int flags)
685 u8 buf[3];
686 dbuf_read(f, buf, pos, 3);
687 if(flags&DE_GETRGBFLAG_BGR)
688 return DE_MAKE_RGB(buf[2], buf[1], buf[0]);
689 return DE_MAKE_RGB(buf[0], buf[1], buf[2]);
692 static int copy_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
693 i64 buf_len)
695 dbuf *outf = (dbuf*)brctx->userdata;
696 dbuf_write(outf, buf, buf_len);
697 return 1;
700 void dbuf_copy(dbuf *inf, i64 input_offset, i64 input_len, dbuf *outf)
702 u8 tmpbuf[256];
704 // Fast paths, if the data to copy is all in memory
706 if(inf->cache &&
707 (input_offset>=0) && (input_offset+input_len<=inf->cache_bytes_used))
709 dbuf_write(outf, &inf->cache[input_offset], input_len);
710 return;
713 if(inf->btype==DBUF_TYPE_MEMBUF &&
714 (input_offset>=0) && (input_offset+input_len<=inf->len))
716 dbuf_write(outf, &inf->membuf_buf[input_offset], input_len);
717 return;
720 if(input_len<=(i64)sizeof(tmpbuf)) {
721 // Fast path for small sizes
722 dbuf_read(inf, tmpbuf, input_offset, input_len);
723 dbuf_write(outf, tmpbuf, input_len);
724 return;
727 dbuf_buffered_read(inf, input_offset, input_len, copy_cbfn, (void*)outf);
730 struct copy_at_ctx {
731 dbuf *outf;
732 i64 outpos;
735 static int copy_at_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
736 i64 buf_len)
738 struct copy_at_ctx *ctx = (struct copy_at_ctx*)brctx->userdata;
740 dbuf_write_at(ctx->outf, ctx->outpos, buf, buf_len);
741 ctx->outpos += buf_len;
742 return 1;
745 void dbuf_copy_at(dbuf *inf, i64 input_offset, i64 input_len,
746 dbuf *outf, i64 output_offset)
748 struct copy_at_ctx ctx;
750 ctx.outf = outf;
751 ctx.outpos = output_offset;
752 dbuf_buffered_read(inf, input_offset, input_len, copy_at_cbfn, (void*)&ctx);
755 // An advanced function for reading a string from a file.
756 // The issue is that some strings are both human-readable and machine-readable.
757 // In such a case, we'd like to read some data from a file into a nice printable
758 // ucstring, while also making some or all of the raw bytes available, say for
759 // byte-for-byte string comparisons.
760 // Plus (for NUL-terminated/padded strings), we may need to know the actual length
761 // of the string in the file, so that it can be skipped over, even if we don't
762 // care about the whole string.
763 // Caller is responsible for calling destroy_stringreader() on the returned value.
764 // max_bytes_to_scan: The maximum number of bytes to read from the file.
765 // max_bytes_to_keep: The maximum (or in some cases the exact) number of bytes,
766 // not counting any NUL terminator, to return in ->sz.
767 // The ->str field is a Unicode version of ->sz, so this also affects ->str.
768 // If DE_CONVFLAG_STOP_AT_NUL is not set, it is assumed we are reading a string
769 // of known length, that may have internal NUL bytes. The caller must set
770 // max_bytes_to_scan and max_bytes_to_keep to the same value. The ->sz field will
771 // always be allocated with this many bytes, plus one more for an artificial NUL
772 // terminator.
773 // If DE_CONVFLAG_WANT_UTF8 is set, then the ->sz_utf8 field will be set to a
774 // UTF-8 version of ->str. This is mainly useful if the original string was
775 // UTF-16. sz_utf8 is not "printable" -- use ucstring_get_printable_sz_n(str) for
776 // that.
777 // ->sz_strlen will equal strlen(->sz) if DE_CONVFLAG_STOP_AT_NUL is set, or
778 // the supplied value of max_bytes_to_(scan|keep) if not.
779 // Recognized flags:
780 // - DE_CONVFLAG_STOP_AT_NUL
781 // - DE_CONVFLAG_WANT_UTF8
782 struct de_stringreaderdata *dbuf_read_string(dbuf *f, i64 pos,
783 i64 max_bytes_to_scan,
784 i64 max_bytes_to_keep,
785 unsigned int flags, de_ext_encoding ee)
787 deark *c = f->c;
788 struct de_stringreaderdata *srd;
789 i64 foundpos = 0;
790 int ret;
791 i64 bytes_avail_to_read;
792 i64 bytes_to_malloc;
793 i64 x_strlen = 0;
795 srd = de_malloc(c, sizeof(struct de_stringreaderdata));
796 srd->str = ucstring_create(c);
797 if(max_bytes_to_scan<0) max_bytes_to_scan = 0;
798 if(max_bytes_to_keep<0) max_bytes_to_keep = 0;
800 bytes_avail_to_read = max_bytes_to_scan;
801 if(bytes_avail_to_read > f->len-pos) {
802 bytes_avail_to_read = f->len-pos;
804 if(bytes_avail_to_read<0) bytes_avail_to_read = 0;
806 srd->bytes_consumed = bytes_avail_to_read; // default
808 // From here on, we can safely bail out ("goto done"). The
809 // de_stringreaderdata struct is sufficiently valid.
811 if(!(flags&DE_CONVFLAG_STOP_AT_NUL) &&
812 (max_bytes_to_scan != max_bytes_to_keep))
814 // To reduce possible confusion, we require that
815 // max_bytes_to_scan==max_bytes_to_keep in this case.
816 srd->sz = de_malloc(c, max_bytes_to_keep+1);
817 goto done;
820 if(flags&DE_CONVFLAG_STOP_AT_NUL) {
821 ret = dbuf_search_byte(f, 0x00, pos, bytes_avail_to_read, &foundpos);
822 if(ret) {
823 srd->found_nul = 1;
825 else {
826 // No NUL byte found. Could be an error in some formats, but in
827 // others NUL is used as separator or as padding, not a terminator.
828 foundpos = pos+bytes_avail_to_read;
831 x_strlen = foundpos-pos;
832 srd->bytes_consumed = x_strlen+1;
834 else {
835 x_strlen = max_bytes_to_keep;
836 srd->bytes_consumed = x_strlen;
839 bytes_to_malloc = x_strlen+1;
840 if(bytes_to_malloc>(max_bytes_to_keep+1)) {
841 bytes_to_malloc = max_bytes_to_keep+1;
842 srd->was_truncated = 1;
845 srd->sz = de_malloc(c, bytes_to_malloc);
846 dbuf_read(f, (u8*)srd->sz, pos, bytes_to_malloc-1); // The last byte remains NUL
848 ucstring_append_bytes(srd->str, (const u8*)srd->sz, bytes_to_malloc-1, 0, ee);
850 if(flags&DE_CONVFLAG_WANT_UTF8) {
851 srd->sz_utf8_strlen = (size_t)ucstring_count_utf8_bytes(srd->str);
852 srd->sz_utf8 = de_malloc(c, (i64)srd->sz_utf8_strlen + 1);
853 ucstring_to_sz(srd->str, srd->sz_utf8, srd->sz_utf8_strlen + 1, 0, DE_ENCODING_UTF8);
856 done:
857 if(!srd->sz) {
858 // Always return a valid sz, even on failure.
859 srd->sz = de_malloc(c, 1);
861 if((flags&DE_CONVFLAG_WANT_UTF8) && !srd->sz_utf8) {
862 // Always return a valid sz_utf8 if it was requested, even on failure.
863 srd->sz_utf8 = de_malloc(c, 1);
864 srd->sz_utf8_strlen = 0;
866 srd->sz_strlen = (size_t)x_strlen;
867 return srd;
870 void de_destroy_stringreaderdata(deark *c, struct de_stringreaderdata *srd)
872 if(!srd) return;
873 de_free(c, srd->sz);
874 de_free(c, srd->sz_utf8);
875 ucstring_destroy(srd->str);
876 de_free(c, srd);
879 void dbuf_read_to_ucstring_ex(dbuf *f, i64 pos1, i64 len,
880 de_ucstring *s, unsigned int conv_flags, struct de_encconv_state *es)
882 i64 nbytes_remaining;
883 i64 pos = pos1;
884 int stop_at_nul = 0;
885 #define READTOUCSTRING_BUFLEN 256
886 u8 buf[READTOUCSTRING_BUFLEN];
888 if(conv_flags & DE_CONVFLAG_STOP_AT_NUL) {
889 stop_at_nul = 1;
890 // We handle STOP_AT_NUL ourselves, so don't pass it on.
891 conv_flags -= DE_CONVFLAG_STOP_AT_NUL;
894 // Note: It might be sensible to use dbuf_buffered_read() here, but I've
895 // decided against it for now.
896 nbytes_remaining = len;
897 do {
898 i64 nbytes_to_read;
899 i64 nbytes_in_buf;
900 unsigned int conv_flags_to_use_this_time;
902 // Lack of DE_CONVFLAG_PARTIAL_DATA flag signals end of data, which
903 // isn't necessarily a no-op even with len=0.
904 // That's why we always do this loop at least once.
906 nbytes_to_read = de_min_int(nbytes_remaining, READTOUCSTRING_BUFLEN);
907 dbuf_read(f, buf, pos, nbytes_to_read);
908 pos += nbytes_to_read;
909 nbytes_in_buf = nbytes_to_read;
910 nbytes_remaining -= nbytes_to_read;
912 if(stop_at_nul) {
913 char *tmpp;
915 tmpp = de_memchr(buf, 0x00, (size_t)nbytes_in_buf);
916 if(tmpp) {
917 nbytes_in_buf = (const u8*)tmpp - buf;
918 nbytes_remaining = 0;
922 conv_flags_to_use_this_time = conv_flags;
923 if(nbytes_remaining>0) {
924 // The caller may have aleady set this flag, in which case we will use
925 // it every time.
926 // If not, we still use it for all but the final call to ucstring_append_bytes_ex().
927 conv_flags_to_use_this_time |= DE_CONVFLAG_PARTIAL_DATA;
930 ucstring_append_bytes_ex(s, buf, nbytes_in_buf, conv_flags_to_use_this_time, es);
931 } while(nbytes_remaining>0);
935 // Read (up to) len bytes from f, translate them to characters, and append
936 // them to s.
937 void dbuf_read_to_ucstring(dbuf *f, i64 pos, i64 len,
938 de_ucstring *s, unsigned int conv_flags, de_ext_encoding ee)
940 struct de_encconv_state es;
942 de_encconv_init(&es, ee);
943 dbuf_read_to_ucstring_ex(f, pos, len, s, conv_flags, &es);
946 void dbuf_read_to_ucstring_n(dbuf *f, i64 pos, i64 len, i64 max_len,
947 de_ucstring *s, unsigned int conv_flags, de_ext_encoding ee)
949 struct de_encconv_state es;
951 if(len>max_len) len = max_len;
952 de_encconv_init(&es, ee);
953 dbuf_read_to_ucstring_ex(f, pos, len, s, conv_flags, &es);
956 static int dbufmemcmp_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
957 i64 buf_len)
959 // Return 0 if there is a mismatch.
960 return !de_memcmp(buf,
961 &(((const u8*)brctx->userdata)[brctx->offset]),
962 (size_t)buf_len);
965 int dbuf_memcmp(dbuf *f, i64 pos, const void *s, size_t n)
967 u8 buf1[128];
969 if(f->cache &&
970 pos >= 0 &&
971 pos + (i64)n <= f->cache_bytes_used)
973 // Fastest path: Compare directly to cache.
974 return de_memcmp(s, &f->cache[pos], n);
977 if(n<=sizeof(buf1)) {
978 // Use a stack buffer if small enough.
979 dbuf_read(f, buf1, pos, n);
980 return de_memcmp(buf1, s, n);
983 // Fallback method.
984 return !dbuf_buffered_read(f, pos, n, dbufmemcmp_cbfn, (void*)s);
987 int dbuf_create_file_from_slice(dbuf *inf, i64 pos, i64 data_size,
988 const char *ext, de_finfo *fi, unsigned int createflags)
990 dbuf *f;
991 f = dbuf_create_output_file(inf->c, ext, fi, createflags);
992 if(!f) return 0;
993 dbuf_copy(inf, pos, data_size, f);
994 dbuf_close(f);
995 return 1;
998 static void finfo_shallow_copy(deark *c, de_finfo *src, de_finfo *dst)
1000 UI k;
1002 dst->is_directory = src->is_directory;
1003 dst->has_riscos_data = src->has_riscos_data;
1004 dst->riscos_appended_type = src->riscos_appended_type;
1005 dst->riscos_attribs = src->riscos_attribs;
1006 dst->mode_flags = src->mode_flags;
1007 for(k=0; k<DE_TIMESTAMPIDX_COUNT; k++) {
1008 dst->timestamp[k] = src->timestamp[k];
1010 dst->internal_mod_time = src->internal_mod_time;
1011 dst->density = src->density;
1012 dst->has_hotspot = src->has_hotspot;
1013 dst->hotspot_x = src->hotspot_x;
1014 dst->hotspot_y = src->hotspot_y;
1015 dst->load_addr = src->load_addr;
1016 dst->exec_addr = src->exec_addr;
1019 static dbuf *create_dbuf_lowlevel(deark *c)
1021 dbuf *f;
1023 f = de_malloc(c, sizeof(dbuf));
1024 f->c = c;
1025 f->cache2_pos = -1; // Any offset outside the bounds of the file will do.
1026 return f;
1029 // Create or open a file for writing, that is *not* one of the usual
1030 // "output.000.ext" files we extract from the input file.
1032 // overwrite_mode, flags: Same as for de_fopen_for_write().
1034 // On failure, prints an error message, and sets f->btype to DBUF_TYPE_NULL.
1035 dbuf *dbuf_create_unmanaged_file(deark *c, const char *fname, int overwrite_mode,
1036 unsigned int flags)
1038 dbuf *f;
1039 char msgbuf[200];
1041 f = create_dbuf_lowlevel(c);
1042 f->is_managed = 0;
1043 f->name = de_strdup(c, fname);
1045 f->btype = DBUF_TYPE_OFILE;
1046 f->max_len_hard = c->max_output_file_size;
1047 f->fp = de_fopen_for_write(c, f->name, msgbuf, sizeof(msgbuf),
1048 c->overwrite_mode, flags);
1050 if(!f->fp) {
1051 de_err(c, "Failed to write %s: %s", f->name, msgbuf);
1052 f->btype = DBUF_TYPE_NULL;
1053 c->serious_error_flag = 1;
1056 return f;
1059 dbuf *dbuf_create_unmanaged_file_stdout(deark *c, const char *name)
1061 dbuf *f;
1063 f = create_dbuf_lowlevel(c);
1064 f->is_managed = 0;
1065 f->name = de_strdup(c, name);
1066 f->btype = DBUF_TYPE_STDOUT;
1067 f->max_len_hard = c->max_output_file_size;
1068 f->fp = stdout;
1069 return f;
1072 static void sanitize_ext(const char *ext1, char *ext, size_t extlen)
1074 size_t k;
1076 de_strlcpy(ext, ext1, extlen);
1077 // This part of the filename should come from Deark, and should only
1078 // use a limited set of characters. Just to be sure:
1079 for(k=0; ext[k]; k++) {
1080 if((ext[k]>='0' && ext[k]<='9') ||
1081 (ext[k]>='A' && ext[k]<='Z') ||
1082 (ext[k]>='a' && ext[k]<='z') ||
1083 ext[k]=='.' || ext[k]=='_' || ext[k]=='-' || ext[k]=='+')
1087 else {
1088 ext[k] = '_';
1093 dbuf *dbuf_create_output_file(deark *c, const char *ext1, de_finfo *fi,
1094 unsigned int createflags)
1096 char nbuf[500];
1097 char msgbuf[200];
1098 char ext[128];
1099 int have_ext;
1100 dbuf *f;
1101 const char *basefn;
1102 int file_index;
1103 u8 is_directory = 0;
1104 char *name_from_finfo = NULL;
1105 i64 name_from_finfo_len = 0;
1107 if(ext1) {
1108 have_ext = 1;
1109 sanitize_ext(ext1, ext, sizeof(ext));
1111 else {
1112 have_ext = 0;
1113 ext[0] = '\0';
1116 if(have_ext && fi && fi->original_filename_flag) {
1117 de_dbg(c, "[internal warning: Incorrect use of create_output_file]");
1120 f = create_dbuf_lowlevel(c);
1121 f->max_len_hard = c->max_output_file_size;
1122 f->is_managed = 1;
1124 if(fi && fi->is_directory) {
1125 is_directory = 1;
1128 if(is_directory && !c->keep_dir_entries) {
1129 de_dbg(c, "skipping 'directory' file");
1130 f->btype = DBUF_TYPE_NULL;
1131 goto done;
1134 if(c->extract_policy==DE_EXTRACTPOLICY_MAINONLY) {
1135 if(createflags&DE_CREATEFLAG_IS_AUX) {
1136 de_dbg(c, "skipping 'auxiliary' file");
1137 f->btype = DBUF_TYPE_NULL;
1138 goto done;
1141 else if(c->extract_policy==DE_EXTRACTPOLICY_AUXONLY) {
1142 if(!(createflags&DE_CREATEFLAG_IS_AUX)) {
1143 de_dbg(c, "skipping 'main' file");
1144 f->btype = DBUF_TYPE_NULL;
1145 goto done;
1149 file_index = c->file_count;
1150 c->file_count++;
1152 basefn = c->base_output_filename ? c->base_output_filename : "output";
1154 if(fi && ucstring_isnonempty(fi->file_name_internal)) {
1155 name_from_finfo_len = 1 + ucstring_count_utf8_bytes(fi->file_name_internal);
1156 name_from_finfo = de_malloc(c, name_from_finfo_len);
1157 ucstring_to_sz(fi->file_name_internal, name_from_finfo, (size_t)name_from_finfo_len, 0,
1158 DE_ENCODING_UTF8);
1161 if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && !c->base_output_filename &&
1162 fi && fi->is_directory &&
1163 (fi->is_root_dir || (fi->detect_root_dot_dir && fi->orig_name_was_dot)))
1165 de_strlcpy(nbuf, ".", sizeof(nbuf));
1167 else if(c->special_1st_filename && (file_index==c->first_output_file) &&
1168 !is_directory)
1170 de_strlcpy(nbuf, c->special_1st_filename, sizeof(nbuf));
1172 else if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && !c->base_output_filename &&
1173 fi && fi->original_filename_flag && name_from_finfo)
1175 // TODO: This is a "temporary" hack to allow us to, when both reading from
1176 // and writing to an archive format, use some semblance of the correct
1177 // filename (instead of "output.xxx.yyy").
1178 // There are some things that we don't handle optimally, such as
1179 // subdirectories.
1180 // A major redesign of the file naming logic would be good.
1181 de_strlcpy(nbuf, name_from_finfo, sizeof(nbuf));
1183 else {
1184 char fn_suffix[256];
1186 if(have_ext && name_from_finfo) {
1187 de_snprintf(fn_suffix, sizeof(fn_suffix), "%s.%s", name_from_finfo, ext);
1189 else if(have_ext) {
1190 de_strlcpy(fn_suffix, ext, sizeof(fn_suffix));
1192 else if(is_directory && name_from_finfo) {
1193 de_snprintf(fn_suffix, sizeof(fn_suffix), "%s.dir", name_from_finfo);
1195 else if(name_from_finfo) {
1196 de_strlcpy(fn_suffix, name_from_finfo, sizeof(fn_suffix));
1198 else if(is_directory) {
1199 de_strlcpy(fn_suffix, "dir", sizeof(fn_suffix));
1201 else {
1202 de_strlcpy(fn_suffix, "bin", sizeof(fn_suffix));
1205 de_snprintf(nbuf, sizeof(nbuf), "%s.%03d.%s", basefn, file_index, fn_suffix);
1208 f->name = de_strdup(c, nbuf);
1210 if(fi) {
1211 // The finfo object passed to us at file creation is not required to
1212 // remain valid, so make a copy of anything in it that we might need
1213 // later.
1214 f->fi_copy = de_finfo_create(c);
1215 finfo_shallow_copy(c, fi, f->fi_copy);
1216 fi->riscos_appended_type = 0;
1218 // Here's where we respect the -intz option, by using it to convert to
1219 // UTC in some cases.
1220 if(f->fi_copy->timestamp[DE_TIMESTAMPIDX_MODIFY].is_valid && f->fi_copy->timestamp[DE_TIMESTAMPIDX_MODIFY].tzcode==DE_TZCODE_LOCAL &&
1221 c->input_tz_offs_seconds!=0)
1223 de_timestamp_cvt_to_utc(&f->fi_copy->timestamp[DE_TIMESTAMPIDX_MODIFY], -c->input_tz_offs_seconds);
1226 if(f->fi_copy->internal_mod_time.is_valid && f->fi_copy->internal_mod_time.tzcode==DE_TZCODE_LOCAL &&
1227 c->input_tz_offs_seconds!=0)
1229 de_timestamp_cvt_to_utc(&f->fi_copy->internal_mod_time, -c->input_tz_offs_seconds);
1233 if(file_index < c->first_output_file) {
1234 f->btype = DBUF_TYPE_NULL;
1235 goto done;
1238 if(file_index >= c->first_output_file + c->max_output_files)
1240 f->btype = DBUF_TYPE_NULL;
1241 if(file_index == c->first_output_file + c->max_output_files) {
1242 if(!c->user_set_max_output_files) {
1243 de_err(c, "Limit of %d output files exceeded", c->max_output_files);
1246 goto done;
1249 c->num_files_extracted++;
1251 if(c->extrlist_dbuf) {
1252 dbuf_printf(c->extrlist_dbuf, "%s\n", f->name);
1253 dbuf_flush(c->extrlist_dbuf);
1256 if(c->list_mode) {
1257 f->btype = DBUF_TYPE_NULL;
1258 if(c->list_mode_include_file_id) {
1259 de_msg(c, "%d:%s", file_index, f->name);
1261 else {
1262 de_msg(c, "%s", f->name);
1264 goto done;
1267 if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && c->archive_fmt==DE_ARCHIVEFMT_TAR) {
1268 de_info(c, "Adding %s to TAR file", f->name);
1269 f->btype = DBUF_TYPE_ODBUF;
1270 // A dummy max_len_hard value. The parent will do the checking.
1271 f->max_len_hard = DE_DUMMY_MAX_FILE_SIZE;
1272 f->writing_to_tar_archive = 1;
1273 de_tar_start_member_file(c, f);
1275 else if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE) { // ZIP
1276 i64 initial_alloc;
1277 de_info(c, "Adding %s to ZIP file", f->name);
1278 f->btype = DBUF_TYPE_MEMBUF;
1279 f->max_len_hard = DE_MAX_MEMBUF_SIZE;
1280 if(is_directory) {
1281 // A directory entry is not expected to have any data associated
1282 // with it (besides the files it contains).
1283 initial_alloc = 16;
1285 else {
1286 initial_alloc = 65536;
1288 f->membuf_buf = de_malloc(c, initial_alloc);
1289 f->membuf_alloc = initial_alloc;
1290 f->write_memfile_to_zip_archive = 1;
1292 else if(c->output_style==DE_OUTPUTSTYLE_STDOUT) {
1293 de_info(c, "Writing %s to [stdout]", f->name);
1294 f->btype = DBUF_TYPE_STDOUT;
1295 // TODO: Should we increase f->max_len_hard?
1296 f->fp = stdout;
1298 else {
1299 de_info(c, "Writing %s", f->name);
1300 f->btype = DBUF_TYPE_OFILE;
1301 f->fp = de_fopen_for_write(c, f->name, msgbuf, sizeof(msgbuf),
1302 c->overwrite_mode, 0);
1304 if(!f->fp) {
1305 de_err(c, "Failed to write %s: %s", f->name, msgbuf);
1306 f->btype = DBUF_TYPE_NULL;
1307 c->serious_error_flag = 1;
1311 done:
1312 de_free(c, name_from_finfo);
1313 return f;
1316 static void do_on_dbuf_size_exceeded(dbuf *f)
1318 de_err(f->c, "Maximum %s size of %"I64_FMT" bytes exceeded",
1319 (f->btype==DBUF_TYPE_MEMBUF)?"membuf":"output file",
1320 f->max_len_hard);
1321 de_fatalerror(f->c);
1324 dbuf *dbuf_create_membuf(deark *c, i64 initialsize, unsigned int flags)
1326 dbuf *f;
1328 f = create_dbuf_lowlevel(c);
1329 f->btype = DBUF_TYPE_MEMBUF;
1330 f->max_len_hard = DE_MAX_MEMBUF_SIZE;
1332 if(initialsize>0) {
1333 if(initialsize > f->max_len_hard) {
1334 do_on_dbuf_size_exceeded(f);
1336 f->membuf_buf = de_malloc(c, initialsize);
1337 f->membuf_alloc = initialsize;
1340 if(flags&0x01) {
1341 dbuf_set_length_limit(f, initialsize);
1344 return f;
1347 static void membuf_append(dbuf *f, const u8 *m, i64 mlen)
1349 i64 new_alloc_size;
1351 if(f->has_len_limit) {
1352 if(f->len + mlen > f->len_limit) {
1353 mlen = f->len_limit - f->len;
1357 if(mlen<=0) return;
1359 if(mlen > f->membuf_alloc - f->len) {
1360 // Need to allocate more space
1361 new_alloc_size = (f->membuf_alloc + mlen)*2;
1362 if(new_alloc_size<1024) new_alloc_size=1024;
1363 if(new_alloc_size > f->max_len_hard) new_alloc_size = f->max_len_hard;
1364 if(f->c->debug_level>=4) {
1365 de_dbgx(f->c, 4, "increasing membuf size %"I64_FMT" -> %"I64_FMT,
1366 f->membuf_alloc, new_alloc_size);
1368 if(f->len + mlen > f->max_len_hard) {
1369 do_on_dbuf_size_exceeded(f);
1371 f->membuf_buf = de_realloc(f->c, f->membuf_buf, f->membuf_alloc, new_alloc_size);
1372 f->membuf_alloc = new_alloc_size;
1375 de_memcpy(&f->membuf_buf[f->len], m, (size_t)mlen);
1376 f->len += mlen;
1379 void dbuf_write(dbuf *f, const u8 *m, i64 len)
1381 if(len<=0) return;
1382 if(f->len + len > f->max_len_hard) {
1383 do_on_dbuf_size_exceeded(f);
1386 if(f->writelistener_cb) {
1387 // Note that the callback function can be changed at any time, so if we
1388 // ever decide to buffer these calls, precautions will be needed.
1389 f->writelistener_cb(f, f->userdata_for_writelistener, m, len);
1392 switch(f->btype) {
1393 case DBUF_TYPE_OFILE:
1394 case DBUF_TYPE_STDOUT:
1395 if(!f->fp) return;
1396 if(f->c->debug_level>=4) {
1397 de_dbgx(f->c, 4, "writing %"I64_FMT" bytes to %s", len, f->name);
1399 fwrite(m, 1, (size_t)len, f->fp);
1400 f->len += len;
1401 return;
1402 case DBUF_TYPE_MEMBUF:
1403 if(f->c->debug_level>=4 && f->name) {
1404 de_dbgx(f->c, 4, "appending %"I64_FMT" bytes to membuf %s", len, f->name);
1406 membuf_append(f, m, len);
1407 return;
1408 case DBUF_TYPE_ODBUF:
1409 dbuf_write(f->parent_dbuf, m, len);
1410 f->len += len;
1411 return;
1412 case DBUF_TYPE_CUSTOM:
1413 if(f->customwrite_fn) {
1414 f->customwrite_fn(f, f->userdata_for_customwrite, m, len);
1416 f->len += len;
1417 return;
1418 case DBUF_TYPE_NULL:
1419 f->len += len;
1420 return;
1423 de_internal_err_fatal(f->c, "Invalid output file type (%d)", f->btype);
1426 void dbuf_writebyte(dbuf *f, u8 n)
1428 dbuf_write(f, &n, 1);
1431 // Allowed only for membufs, and unmanaged output files.
1432 // For unmanaged output files, must be used with care, and should not be
1433 // mixed with dbuf_write().
1434 void dbuf_write_at(dbuf *f, i64 pos, const u8 *m, i64 len)
1436 if(len<1 || pos<0) return;
1438 if(pos + len > f->max_len_hard) {
1439 do_on_dbuf_size_exceeded(f);
1442 if(f->btype==DBUF_TYPE_MEMBUF) {
1443 i64 amt_overwrite, amt_newzeroes, amt_append;
1445 if(pos+len <= f->len) { // entirely within the current file
1446 amt_overwrite = len;
1447 amt_newzeroes = 0;
1448 amt_append = 0;
1450 else if(pos >= f->len) { // starts after the end of the current file
1451 amt_overwrite = 0;
1452 amt_newzeroes = pos - f->len;
1453 amt_append = len;
1455 else { // overlaps the end of the current file
1456 amt_overwrite = f->len - pos;
1457 amt_newzeroes = 0;
1458 amt_append = len - amt_overwrite;
1461 if(amt_overwrite>0) {
1462 de_memcpy(&f->membuf_buf[pos], m, (size_t)amt_overwrite);
1464 if(amt_newzeroes>0) {
1465 dbuf_write_zeroes(f, amt_newzeroes);
1468 if(amt_append>0) {
1469 membuf_append(f, &m[amt_overwrite], amt_append);
1472 else if(f->btype==DBUF_TYPE_OFILE && !f->is_managed) {
1473 i64 curpos = de_ftell(f->fp);
1474 if(pos != curpos) {
1475 de_fseek(f->fp, pos, SEEK_SET);
1477 fwrite(m, 1, (size_t)len, f->fp);
1478 if(pos+len > f->len) {
1479 f->len = pos+len;
1482 else if(f->btype==DBUF_TYPE_NULL) {
1483 if(pos+len > f->len) {
1484 f->len = pos+len;
1487 else {
1488 de_internal_err_fatal(f->c, "Attempt to seek on non-seekable stream");
1492 void dbuf_writebyte_at(dbuf *f, i64 pos, u8 n)
1494 if(f->btype==DBUF_TYPE_MEMBUF && pos>=0 && pos<f->len) {
1495 // Fast path when overwriting a byte in a membuf
1496 f->membuf_buf[pos] = n;
1497 return;
1500 dbuf_write_at(f, pos, &n, 1);
1503 void dbuf_write_run(dbuf *f, u8 n, i64 len)
1505 u8 buf[1024];
1506 i64 amt_left;
1507 i64 amt_to_write;
1509 de_memset(buf, n, (size_t)len<sizeof(buf) ? (size_t)len : sizeof(buf));
1510 amt_left = len;
1511 while(amt_left > 0) {
1512 if((size_t)amt_left<sizeof(buf))
1513 amt_to_write = amt_left;
1514 else
1515 amt_to_write = sizeof(buf);
1516 dbuf_write(f, buf, amt_to_write);
1517 amt_left -= amt_to_write;
1521 void dbuf_write_zeroes(dbuf *f, i64 len)
1523 dbuf_write_run(f, 0, len);
1526 // Make the membuf have exactly len bytes of content.
1527 void dbuf_truncate(dbuf *f, i64 desired_len)
1529 if(desired_len<0) desired_len=0;
1530 if(desired_len>f->len) {
1531 dbuf_write_zeroes(f, desired_len - f->len);
1533 else if(desired_len<f->len) {
1534 if(f->btype==DBUF_TYPE_MEMBUF || f->btype==DBUF_TYPE_CUSTOM) {
1535 f->len = desired_len;
1540 void de_writeu16le_direct(u8 *m, i64 n)
1542 m[0] = (u8)(n & 0x00ff);
1543 m[1] = (u8)((n & 0xff00)>>8);
1546 void de_writeu16be_direct(u8 *m, i64 n)
1548 m[0] = (u8)((n & 0xff00)>>8);
1549 m[1] = (u8)(n & 0x00ff);
1552 void dbuf_writeu16le(dbuf *f, i64 n)
1554 u8 buf[2];
1555 de_writeu16le_direct(buf, n);
1556 dbuf_write(f, buf, 2);
1559 void dbuf_writeu16be(dbuf *f, i64 n)
1561 u8 buf[2];
1562 de_writeu16be_direct(buf, n);
1563 dbuf_write(f, buf, 2);
1566 void dbuf_writei16le(dbuf *f, i64 n)
1568 if(n<0) {
1569 dbuf_writeu16le(f, n+65536);
1571 else {
1572 dbuf_writeu16le(f, n);
1576 void dbuf_writei16be(dbuf *f, i64 n)
1578 if(n<0) {
1579 dbuf_writeu16be(f, n+65536);
1581 else {
1582 dbuf_writeu16be(f, n);
1586 void de_writeu32be_direct(u8 *m, i64 n)
1588 m[0] = (u8)((n & 0xff000000)>>24);
1589 m[1] = (u8)((n & 0x00ff0000)>>16);
1590 m[2] = (u8)((n & 0x0000ff00)>>8);
1591 m[3] = (u8)(n & 0x000000ff);
1594 void dbuf_writeu32be(dbuf *f, i64 n)
1596 u8 buf[4];
1597 de_writeu32be_direct(buf, n);
1598 dbuf_write(f, buf, 4);
1601 void de_writeu32le_direct(u8 *m, i64 n)
1603 m[0] = (u8)(n & 0x000000ff);
1604 m[1] = (u8)((n & 0x0000ff00)>>8);
1605 m[2] = (u8)((n & 0x00ff0000)>>16);
1606 m[3] = (u8)((n & 0xff000000)>>24);
1609 void dbuf_writeu32le(dbuf *f, i64 n)
1611 u8 buf[4];
1612 de_writeu32le_direct(buf, n);
1613 dbuf_write(f, buf, 4);
1616 void dbuf_writei32le(dbuf *f, i64 n)
1618 if(n<0) {
1619 dbuf_writeu32le(f, n+0x100000000LL);
1621 else {
1622 dbuf_writeu32le(f, n);
1625 void dbuf_writei32be(dbuf *f, i64 n)
1627 if(n<0) {
1628 dbuf_writeu32be(f, n+0x100000000LL);
1630 else {
1631 dbuf_writeu32be(f, n);
1635 void de_writeu64le_direct(u8 *m, u64 n)
1637 de_writeu32le_direct(&m[0], (i64)(u32)(n&0xffffffffULL));
1638 de_writeu32le_direct(&m[4], (i64)(u32)(n>>32));
1641 void dbuf_writeu64le(dbuf *f, u64 n)
1643 u8 buf[8];
1644 de_writeu64le_direct(buf, n);
1645 dbuf_write(f, buf, 8);
1648 void dbuf_puts(dbuf *f, const char *sz)
1650 dbuf_write(f, (const u8*)sz, (i64)de_strlen(sz));
1653 // TODO: Remove the buffer size limitation?
1654 void dbuf_printf(dbuf *f, const char *fmt, ...)
1656 char buf[1024];
1657 va_list ap;
1659 va_start(ap, fmt);
1660 de_vsnprintf(buf, sizeof(buf), fmt, ap);
1661 va_end(ap);
1663 dbuf_puts(f, buf);
1666 void dbuf_flush(dbuf *f)
1668 if(f->btype==DBUF_TYPE_OFILE) {
1669 fflush(f->fp);
1673 dbuf *dbuf_open_input_file(deark *c, const char *fn)
1675 dbuf *f;
1676 unsigned int returned_flags = 0;
1677 char msgbuf[200];
1679 if(!fn) {
1680 c->serious_error_flag = 1;
1681 return NULL;
1683 f = create_dbuf_lowlevel(c);
1684 f->btype = DBUF_TYPE_IFILE;
1685 f->cache_policy = DE_CACHE_POLICY_ENABLED;
1687 f->fp = de_fopen_for_read(c, fn, &f->len, msgbuf, sizeof(msgbuf), &returned_flags);
1689 if(!f->fp) {
1690 de_err(c, "Can't read %s: %s", fn, msgbuf);
1691 de_free(c, f);
1692 c->serious_error_flag = 1;
1693 return NULL;
1696 if(returned_flags & 0x1) {
1697 // This "file" is actually a pipe.
1698 f->btype = DBUF_TYPE_FIFO;
1699 f->cache_policy = DE_CACHE_POLICY_NONE;
1700 populate_cache_from_pipe(f);
1703 if(!f->cache && f->cache_policy==DE_CACHE_POLICY_ENABLED) {
1704 populate_cache(f);
1707 return f;
1710 dbuf *dbuf_open_input_stdin(deark *c)
1712 dbuf *f;
1714 f = create_dbuf_lowlevel(c);
1715 f->btype = DBUF_TYPE_STDIN;
1717 // Set to NONE, to make sure we don't try to auto-populate the cache later.
1718 f->cache_policy = DE_CACHE_POLICY_NONE;
1720 populate_cache_from_pipe(f);
1722 return f;
1725 dbuf *dbuf_open_input_subfile(dbuf *parent, i64 offset, i64 size)
1727 dbuf *f;
1728 deark *c;
1730 c = parent->c;
1731 f = create_dbuf_lowlevel(c);
1732 f->btype = DBUF_TYPE_IDBUF;
1733 f->parent_dbuf = parent;
1734 f->offset_into_parent_dbuf = offset;
1735 f->len = size;
1736 return f;
1739 dbuf *dbuf_create_custom_dbuf(deark *c, i64 apparent_size, unsigned int flags)
1741 dbuf *f;
1743 f = create_dbuf_lowlevel(c);
1744 f->btype = DBUF_TYPE_CUSTOM;
1745 f->len = apparent_size;
1746 f->max_len_hard = DE_DUMMY_MAX_FILE_SIZE;
1747 return f;
1750 void dbuf_set_writelistener(dbuf *f, de_writelistener_cb_type fn, void *userdata)
1752 f->userdata_for_writelistener = userdata;
1753 f->writelistener_cb = fn;
1756 void dbuf_close(dbuf *f)
1758 deark *c;
1759 if(!f) return;
1760 c = f->c;
1762 if(f->btype==DBUF_TYPE_OFILE || f->btype==DBUF_TYPE_STDOUT) {
1763 c->total_output_size += f->len;
1766 if(f->btype==DBUF_TYPE_MEMBUF && f->write_memfile_to_zip_archive) {
1767 de_zip_add_file_to_archive(c, f);
1768 if(f->name) {
1769 de_dbg3(c, "closing memfile %s", f->name);
1772 else if(f->writing_to_tar_archive) {
1773 de_tar_end_member_file(c, f);
1776 switch(f->btype) {
1777 case DBUF_TYPE_IFILE:
1778 case DBUF_TYPE_OFILE:
1779 if(f->name) {
1780 de_dbg3(c, "closing file %s", f->name);
1782 de_fclose(f->fp);
1783 f->fp = NULL;
1785 if(f->btype==DBUF_TYPE_OFILE && f->is_managed) {
1786 de_update_file_attribs(f, c->preserve_file_times);
1788 break;
1789 case DBUF_TYPE_FIFO:
1790 de_fclose(f->fp);
1791 f->fp = NULL;
1792 break;
1793 case DBUF_TYPE_STDOUT:
1794 if(f->name && f->is_managed) {
1795 de_dbg3(c, "finished writing %s to stdout", f->name);
1797 else if(!f->is_managed) {
1798 de_dbg3(c, "finished writing %s", f->name);
1800 f->fp = NULL;
1801 break;
1802 case DBUF_TYPE_MEMBUF:
1803 case DBUF_TYPE_IDBUF:
1804 case DBUF_TYPE_ODBUF:
1805 case DBUF_TYPE_STDIN:
1806 case DBUF_TYPE_CUSTOM:
1807 case DBUF_TYPE_NULL:
1808 break;
1809 default:
1810 de_internal_err_nonfatal(c, "Don't know how to close this type of file (%d)", f->btype);
1813 de_free(c, f->membuf_buf);
1814 de_free(c, f->name);
1815 de_free(c, f->cache);
1816 if(f->fi_copy) de_finfo_destroy(c, f->fi_copy);
1817 de_free(c, f);
1819 if(c->total_output_size > c->max_total_output_size) {
1820 // FIXME: Since we only do this check when a file is closed, it can
1821 // potentially be subverted in the (rare) case that Deark has multiple
1822 // output files open simultanously.
1823 de_err(c, "Maximum total output size of %"I64_FMT" bytes exceeded",
1824 c->max_total_output_size);
1825 de_fatalerror(c);
1829 void dbuf_empty(dbuf *f)
1831 if(f->btype == DBUF_TYPE_MEMBUF) {
1832 f->len = 0;
1836 // Provides direct (presumably read-only) access to the memory in a membuf.
1837 // Use with care: The memory is still owned by the dbuf.
1838 // Note: Another, arguably safer, way to do this is to use dbuf_buffered_read().
1839 const u8 *dbuf_get_membuf_direct_ptr(dbuf *f)
1841 if(f->btype != DBUF_TYPE_MEMBUF) return NULL;
1842 return f->membuf_buf;
1845 // Search a section of a dbuf for a given byte.
1846 // 'haystack_len' is the number of bytes to search.
1847 // Returns 0 if not found.
1848 // If found, sets *foundpos to the position in the file where it was found
1849 // (not relative to startpos).
1850 int dbuf_search_byte(dbuf *f, const u8 b, i64 startpos,
1851 i64 haystack_len, i64 *foundpos)
1853 i64 i;
1855 for(i=0; i<haystack_len; i++) {
1856 if(b == dbuf_getbyte(f, startpos+i)) {
1857 *foundpos = startpos+i;
1858 return 1;
1861 return 0;
1864 struct search_ctx {
1865 const u8 *needle;
1866 i64 needle_len;
1867 int foundflag;
1868 i64 foundpos_rel;
1871 static int search_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
1872 i64 buf_len)
1874 struct search_ctx *sctx = (struct search_ctx*)brctx->userdata;
1875 i64 i;
1876 i64 num_starting_positions_to_check;
1878 if(buf_len < sctx->needle_len) return 0;
1879 num_starting_positions_to_check = buf_len + 1 - sctx->needle_len;
1881 for(i=0; i<num_starting_positions_to_check; i++) {
1882 if(sctx->needle[0]==buf[i] &&
1883 !de_memcmp(sctx->needle, &buf[i], (size_t)sctx->needle_len))
1885 sctx->foundpos_rel = brctx->offset+i;
1886 sctx->foundflag = 1;
1887 return 0;
1891 if(brctx->eof_flag) return 0;
1892 brctx->bytes_consumed = num_starting_positions_to_check;
1893 return 1;
1896 // Search a section of a dbuf for a given byte sequence.
1898 // This function is inefficient, but it's good enough for Deark's needs.
1899 // Maximum 'needle_len' is DE_BUFFERED_READ_MIN_BLKSIZE bytes, but it's expected to
1900 // be quite short. If it gets close to the maximum, the search could get very
1901 // inefficient.
1903 // 'haystack_len' is the number of bytes to search in (the sequence must be completely
1904 // within that range, not just start there).
1905 // Returns 0 if not found.
1906 // If found, sets *foundpos to the position in the file where it was found
1907 // (not relative to startpos).
1908 int dbuf_search(dbuf *f, const u8 *needle, i64 needle_len,
1909 i64 startpos, i64 haystack_len, i64 *foundpos)
1911 int retval = 0;
1912 struct search_ctx sctx;
1914 *foundpos = 0;
1916 if(startpos < 0) {
1917 haystack_len += startpos;
1918 if(haystack_len < 0) {
1919 goto done;
1921 startpos = 0;
1923 if(startpos > f->len) {
1924 goto done;
1926 if(haystack_len > f->len - startpos) {
1927 haystack_len = f->len - startpos;
1929 if(needle_len > haystack_len) {
1930 goto done;
1932 if(needle_len > DE_BUFFERED_READ_MIN_BLKSIZE) {
1933 goto done;
1935 if(needle_len<1) {
1936 retval = 1;
1937 *foundpos = startpos;
1938 goto done;
1941 de_zeromem(&sctx, sizeof(struct search_ctx));
1942 sctx.needle = needle;
1943 sctx.needle_len = needle_len;
1944 (void)dbuf_buffered_read(f, startpos, haystack_len, search_cbfn, (void*)&sctx);
1945 if(sctx.foundflag) {
1946 *foundpos = startpos + sctx.foundpos_rel;
1947 retval = 1;
1950 done:
1951 return retval;
1954 // Search for the aligned pair of 0x00 bytes that marks the end of a UTF-16 string.
1955 // Endianness doesn't matter, because we're only looking for 0x00 0x00.
1956 // The returned 'bytes_consumed' is in bytes, and includes the 2 bytes for the NUL
1957 // terminator.
1958 // Returns 0 if the NUL is not found, in which case *bytes_consumed is not
1959 // meaningful.
1960 int dbuf_get_utf16_NULterm_len(dbuf *f, i64 pos1, i64 bytes_avail,
1961 i64 *bytes_consumed)
1963 i64 x;
1964 i64 pos = pos1;
1966 *bytes_consumed = bytes_avail;
1967 while(1) {
1968 if(pos1+bytes_avail-pos < 2) {
1969 break;
1971 x = dbuf_getu16le(f, pos);
1972 pos += 2;
1973 if(x==0) {
1974 *bytes_consumed = pos - pos1;
1975 return 1;
1978 return 0;
1981 int dbuf_find_line(dbuf *f, i64 pos1, i64 *pcontent_len, i64 *ptotal_len)
1983 u8 b0, b1;
1984 i64 pos;
1985 i64 eol_pos = 0;
1986 i64 eol_size = 0;
1988 *pcontent_len = 0;
1989 *ptotal_len = 0;
1990 if(pos1<0 || pos1>=f->len) {
1991 return 0;
1994 pos = pos1;
1996 while(1) {
1997 if(pos>=f->len) {
1998 // No EOL.
1999 eol_pos = pos;
2000 eol_size = 0;
2001 break;
2004 b0 = dbuf_getbyte(f, pos);
2006 if(b0==0x0d) {
2007 eol_pos = pos;
2008 // Look ahead at the next byte.
2009 b1 = dbuf_getbyte(f, pos+1);
2010 if(b1==0x0a) {
2011 // CR+LF
2012 eol_size = 2;
2013 break;
2015 // LF
2016 eol_pos = pos;
2017 eol_size = 1;
2018 break;
2020 else if(b0==0x0a) {
2021 eol_pos = pos;
2022 eol_size = 1;
2023 break;
2026 pos++;
2029 *pcontent_len = eol_pos - pos1;
2030 *ptotal_len = *pcontent_len + eol_size;
2032 return (*ptotal_len > 0);
2035 // Enforce a maximum size when writing to a dbuf.
2036 // Attempting to write more than this is a silent no-op.
2037 // May be valid only for memory buffers.
2038 void dbuf_set_length_limit(dbuf *f, i64 max_len)
2040 f->has_len_limit = 1;
2041 f->len_limit = max_len;
2044 int dbuf_has_utf8_bom(dbuf *f, i64 pos)
2046 return !dbuf_memcmp(f, pos, "\xef\xbb\xbf", 3);
2049 // Write the contents of a dbuf to a file.
2050 // This function intended for use in development/debugging.
2051 int dbuf_dump_to_file(dbuf *inf, const char *fn)
2053 dbuf *outf;
2054 deark *c = inf->c;
2056 outf = dbuf_create_unmanaged_file(c, fn, DE_OVERWRITEMODE_STANDARD, 0);
2057 dbuf_copy(inf, 0, inf->len, outf);
2058 dbuf_close(outf);
2059 return 1;
2062 static void reverse_fourcc(u8 *buf, int nbytes)
2064 size_t k;
2066 for(k=0; k<((size_t)nbytes)/2; k++) {
2067 u8 tmpc;
2068 tmpc = buf[k];
2069 buf[k] = buf[(size_t)nbytes-1-k];
2070 buf[(size_t)nbytes-1-k] = tmpc;
2074 // Though we call it a "fourcc", we support 'nbytes' from 1 to 4.
2075 void dbuf_read_fourcc(dbuf *f, i64 pos, struct de_fourcc *fcc,
2076 int nbytes, unsigned int flags)
2078 if(nbytes<1 || nbytes>4) return;
2080 de_zeromem(fcc->bytes, 4);
2081 dbuf_read(f, fcc->bytes, pos, (i64)nbytes);
2082 if(flags&DE_4CCFLAG_REVERSED) {
2083 reverse_fourcc(fcc->bytes, nbytes);
2086 fcc->id = (u32)de_getu32be_direct(fcc->bytes);
2087 if(nbytes<4) {
2088 fcc->id >>= (4-(unsigned int)nbytes)*8;
2091 de_bytes_to_printable_sz(fcc->bytes, (i64)nbytes,
2092 fcc->id_sanitized_sz, sizeof(fcc->id_sanitized_sz),
2093 0, DE_ENCODING_ASCII);
2094 de_bytes_to_printable_sz(fcc->bytes, (i64)nbytes,
2095 fcc->id_dbgstr, sizeof(fcc->id_dbgstr),
2096 DE_CONVFLAG_ALLOW_HL, DE_ENCODING_ASCII);
2099 static int buffered_read_internal(struct de_bufferedreadctx *brctx,
2100 dbuf *f, i64 pos1, i64 len, de_buffered_read_cbfn cbfn)
2102 int retval = 0;
2103 i64 pos = pos1; // Absolute pos of next byte to read from f
2104 i64 offs_of_first_byte_in_buf; // Relative to pos1, where in f is buf[0]?
2105 i64 num_unconsumed_bytes_in_buf;
2106 #define BRBUFLEN 4096 // Must be >= DE_BUFFERED_READ_MIN_BLKSIZE
2107 u8 buf[BRBUFLEN];
2109 num_unconsumed_bytes_in_buf = 0;
2110 offs_of_first_byte_in_buf = 0;
2112 while(1) {
2113 i64 nbytes_avail_to_read;
2114 i64 bytestoread;
2115 int ret;
2117 nbytes_avail_to_read = pos1+len-pos;
2118 if(nbytes_avail_to_read<1 && num_unconsumed_bytes_in_buf<1) {
2119 break;
2122 // max bytes that will fit in buf:
2123 bytestoread = BRBUFLEN-num_unconsumed_bytes_in_buf;
2125 // max bytes available to read:
2126 if(bytestoread >= nbytes_avail_to_read) {
2127 bytestoread = nbytes_avail_to_read;
2128 brctx->eof_flag = 1;
2130 else {
2131 brctx->eof_flag = 0;
2134 dbuf_read(f, &buf[num_unconsumed_bytes_in_buf], pos, bytestoread);
2135 pos += bytestoread;
2136 num_unconsumed_bytes_in_buf += bytestoread;
2138 brctx->offset = offs_of_first_byte_in_buf;
2139 brctx->bytes_consumed = num_unconsumed_bytes_in_buf;
2140 ret = cbfn(brctx, buf, num_unconsumed_bytes_in_buf);
2141 if(!ret) goto done;
2142 if(brctx->bytes_consumed<1 || brctx->bytes_consumed>num_unconsumed_bytes_in_buf) {
2143 goto done;
2146 if(brctx->bytes_consumed < num_unconsumed_bytes_in_buf) {
2147 // cbfn didn't consume all bytes
2148 // TODO: For better efficiency, we could leave the buffer as it is until
2149 // the unconsumed byte count drops below DE_BUFFERED_READ_MIN_BLKSIZE.
2150 // But that's only useful if some consumers consume only a small number of bytes.
2151 de_memmove(buf, &buf[brctx->bytes_consumed],
2152 (size_t)(num_unconsumed_bytes_in_buf-brctx->bytes_consumed));
2153 num_unconsumed_bytes_in_buf -= brctx->bytes_consumed;
2155 else {
2156 num_unconsumed_bytes_in_buf = 0;
2158 offs_of_first_byte_in_buf += brctx->bytes_consumed;
2160 retval = 1;
2161 done:
2162 return retval;
2165 // Special case where all bytes are already in memory
2166 static int buffered_read_from_mem(struct de_bufferedreadctx *brctx,
2167 dbuf *f, const u8 *mem, i64 pos1, i64 len, de_buffered_read_cbfn cbfn)
2169 int retval = 0;
2170 i64 total_nbytes_consumed = 0;
2172 while(1) {
2173 int ret;
2174 i64 nbytes_to_send;
2176 nbytes_to_send = len - total_nbytes_consumed;
2177 if(nbytes_to_send<1) break;
2178 brctx->bytes_consumed = nbytes_to_send;
2179 brctx->offset = total_nbytes_consumed;
2180 brctx->eof_flag = 1;
2182 ret = cbfn(brctx, &mem[pos1+total_nbytes_consumed],
2183 nbytes_to_send);
2184 if(!ret) goto done;
2185 if(brctx->bytes_consumed<1 || brctx->bytes_consumed>nbytes_to_send) {
2186 goto done;
2188 total_nbytes_consumed += brctx->bytes_consumed;
2190 retval = 1;
2191 done:
2192 return retval;
2195 static int buffered_read_zero_len(struct de_bufferedreadctx *brctx,
2196 de_buffered_read_cbfn cbfn)
2198 const u8 dummybuf[1] = { 0 };
2199 int ret;
2201 brctx->offset = 0;
2202 brctx->eof_flag = 1;
2203 brctx->bytes_consumed = 0;
2204 ret = cbfn(brctx, dummybuf, 0);
2205 return ret?1:0;
2208 // dbuf_buffered_read:
2209 // Read a slice of a dbuf, and pass its data to a callback function, one
2210 // segment at a time.
2211 // cbfn: Caller-implemented callback function.
2212 // - It must be prepared for an arbitrarily large number of bytes to be passed
2213 // to it at once (though it does not have to consume them all).
2214 // - It must consume at least 1 byte, unless 0 bytes were passed to it.
2215 // - If it does not consume all the bytes passed to it, it must set
2216 // brctx->bytes_consumed.
2217 // - It must return nonzero normally, 0 to abort.
2218 // We guarantee that:
2219 // - brctx->eof_flag will be nonzero if and only if there is no data after this.
2220 // - If eof_flag is not set, at least DE_BUFFERED_READ_MIN_BLKSIZE bytes will
2221 // be provided.
2222 // - If the caller supplies 0 bytes of input data, the callback function will be
2223 // called exactly once. This is the only case where the callback will be
2224 // called with buf_len==0.
2225 // - If the source dbuf is a MEMBUF, and the requested bytes are all in range,
2226 // then all requested bytes will be provided in the first call to the callback
2227 // function.
2228 // Return value: 1 normally, 0 if the callback function ever returned 0.
2229 int dbuf_buffered_read(dbuf *f, i64 pos1, i64 len,
2230 de_buffered_read_cbfn cbfn, void *userdata)
2232 struct de_bufferedreadctx brctx;
2234 brctx.c = f->c;
2235 brctx.userdata = userdata;
2237 if(len<=0) { // Get this special case out of the way.
2238 return buffered_read_zero_len(&brctx, cbfn);
2241 // Use an optimized routine if all the data we need to read is already in memory.
2242 if(f->cache && (pos1>=0) && (pos1+len<=f->cache_bytes_used)) {
2243 return buffered_read_from_mem(&brctx, f, f->cache, pos1, len, cbfn);
2246 // Not an "optimization", since we promise this behavior for MEMBUFs.
2247 if(f->btype==DBUF_TYPE_MEMBUF && (pos1>=0) && (pos1+len<=f->len)) {
2248 return buffered_read_from_mem(&brctx, f, f->membuf_buf, pos1, len, cbfn);
2251 // The general case:
2252 return buffered_read_internal(&brctx, f, pos1, len, cbfn);
2255 int de_is_all_zeroes(const u8 *b, i64 n)
2257 i64 k;
2258 for(k=0; k<n; k++) {
2259 if(b[k]!=0) return 0;
2261 return 1;
2264 static int is_all_zeroes_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
2265 i64 buf_len)
2267 return de_is_all_zeroes(buf, buf_len);
2270 // Returns 1 if the given slice has only bytes with value 0.
2271 int dbuf_is_all_zeroes(dbuf *f, i64 pos, i64 len)
2273 return dbuf_buffered_read(f, pos, len, is_all_zeroes_cbfn, NULL);
2276 void de_bitbuf_lowlevel_add_byte(struct de_bitbuf_lowlevel *bbll, u8 n)
2278 if(bbll->nbits_in_bitbuf>56) return;
2279 if(bbll->is_lsb==0) {
2280 bbll->bit_buf = (bbll->bit_buf<<8) | n;
2282 else {
2283 bbll->bit_buf |= (u64)n << bbll->nbits_in_bitbuf;
2285 bbll->nbits_in_bitbuf += 8;
2288 u64 de_bitbuf_lowlevel_get_bits(struct de_bitbuf_lowlevel *bbll, UI nbits)
2290 u64 n;
2291 u64 mask;
2293 if(nbits > bbll->nbits_in_bitbuf) return 0;
2294 mask = ((u64)1 << nbits)-1;
2295 if(bbll->is_lsb==0) {
2296 bbll->nbits_in_bitbuf -= nbits;
2297 n = (bbll->bit_buf >> bbll->nbits_in_bitbuf) & mask;
2299 else {
2300 n = bbll->bit_buf & mask;
2301 bbll->bit_buf >>= nbits;
2302 bbll->nbits_in_bitbuf -= nbits;
2304 return n;
2307 void de_bitbuf_lowlevel_empty(struct de_bitbuf_lowlevel *bbll)
2309 bbll->bit_buf = 0;
2310 bbll->nbits_in_bitbuf = 0;
2313 u64 de_bitreader_getbits(struct de_bitreader *bitrd, UI nbits)
2315 if(bitrd->eof_flag) return 0;
2316 if(nbits==0) {
2317 // TODO: Decide if we always want to do this. Could risk infinite loops
2318 // with this successful no-op.
2319 return 0;
2321 if(nbits > 57) {
2322 bitrd->eof_flag = 1;
2323 return 0;
2326 while(bitrd->bbll.nbits_in_bitbuf < nbits) {
2327 u8 b;
2329 if(bitrd->curpos >= bitrd->endpos) {
2330 bitrd->eof_flag = 1;
2331 return 0;
2333 b = dbuf_getbyte_p(bitrd->f, &bitrd->curpos);
2334 de_bitbuf_lowlevel_add_byte(&bitrd->bbll, b);
2337 return de_bitbuf_lowlevel_get_bits(&bitrd->bbll, nbits);
2340 // Empty the bitbuffer, and set ->curpos to the position of the next byte with
2341 // entirely unprocessed bits.
2342 // In other words, make it okay for the caller to read or change the ->curpos
2343 // field.
2344 void de_bitreader_skip_to_byte_boundary(struct de_bitreader *bitrd)
2346 // This is unlikely to change anything, since the current bitreader
2347 // implementation reads no more bytes than needed.
2348 bitrd->curpos -= (i64)(bitrd->bbll.nbits_in_bitbuf/8);
2350 de_bitbuf_lowlevel_empty(&bitrd->bbll);
2353 // pos is the offset of the next whole byte that may be added to the bitbuf.
2354 char *de_bitbuf_describe_curpos(struct de_bitbuf_lowlevel *bbll, i64 pos1,
2355 char *buf, size_t buf_len)
2357 i64 curpos;
2358 UI nwholebytes;
2359 UI nbits;
2361 nwholebytes = (i64)(bbll->nbits_in_bitbuf / 8);
2362 nbits = bbll->nbits_in_bitbuf % 8;
2363 curpos = pos1 - (i64)nwholebytes;
2365 if(nbits==0) {
2366 de_snprintf(buf, buf_len, "%"I64_FMT, curpos);
2368 else {
2369 de_snprintf(buf, buf_len, "%"I64_FMT"+%ubits", curpos-1, (UI)(8-nbits));
2371 return buf;
2374 char *de_bitreader_describe_curpos(struct de_bitreader *bitrd, char *buf, size_t buf_len)
2376 return de_bitbuf_describe_curpos(&bitrd->bbll, bitrd->curpos, buf, buf_len);