Minor refactoring of the IFF and box-format parsers
[deark.git] / modules / arj.c
blob12cd9207b7fb4ccbd6fcaca2ee7ade9d71049665
1 // This file is part of Deark.
2 // Copyright (C) 2020 Jason Summers
3 // See the file COPYING for terms of use.
5 // ARJ compressed archive
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_arj);
12 struct member_data {
13 de_encoding input_encoding;
14 UI hdr_id;
15 #define ARJ_OBJTYPE_ARCHIVEHDR 1
16 #define ARJ_OBJTYPE_MEMBERFILE 2
17 #define ARJ_OBJTYPE_CHAPTERHDR 3
18 #define ARJ_OBJTYPE_EOA 4
19 u8 objtype;
20 u8 archiver_ver_num;
21 u8 min_ver_to_extract;
22 u8 os;
23 u8 flags;
24 u8 method;
25 u8 file_type;
26 UI file_mode;
27 u32 crc_reported;
28 i64 cmpr_len;
29 i64 orig_len;
30 i64 cmpr_pos;
31 struct de_timestamp tmstamp[DE_TIMESTAMPIDX_COUNT];
32 struct de_stringreaderdata *name_srd;
35 typedef struct localctx_struct {
36 de_encoding input_encoding; // if DE_ENCODING_UNKNOWN, autodetect for each member
37 u8 archive_flags;
38 u8 is_secured;
39 i64 entry_point;
40 i64 security_envelope_pos;
41 i64 security_envelope_len;
42 struct de_crcobj *crco;
43 } lctx;
45 static void read_arj_datetime(deark *c, lctx *d, i64 pos, struct de_timestamp *ts1, const char *name)
47 i64 dosdt, dostm;
48 char timestamp_buf[64];
50 dostm = de_getu16le(pos);
51 dosdt = de_getu16le(pos+2);
52 if(dostm==0 && dosdt==0) {
53 de_snprintf(timestamp_buf, sizeof(timestamp_buf), "[not set]");
55 else {
56 de_dos_datetime_to_timestamp(ts1, dosdt, dostm);
57 ts1->tzcode = DE_TZCODE_LOCAL;
58 de_timestamp_to_string(ts1, timestamp_buf, sizeof(timestamp_buf), 0);
60 de_dbg(c, "%s time: %s", name, timestamp_buf);
63 static void handle_comment(deark *c, lctx *d, struct member_data *md, i64 pos,
64 i64 nbytes_avail)
66 de_ucstring *s = NULL;
67 dbuf *outf = NULL;
69 if(nbytes_avail<2) goto done;
70 s = ucstring_create(c);
71 // The header containing the comment is limited to about 2.5KB, so we don't have
72 // check sizes here.
73 dbuf_read_to_ucstring(c->infile, pos, nbytes_avail, s, DE_CONVFLAG_STOP_AT_NUL,
74 DE_EXTENC_MAKE(md->input_encoding, DE_ENCSUBTYPE_HYBRID));
75 if(s->len<1) goto done;
76 de_dbg(c, "comment: \"%s\"", ucstring_getpsz_d(s));
78 if(c->extract_level>=2) {
79 const char *token;
81 if(md->objtype==ARJ_OBJTYPE_ARCHIVEHDR) token = "comment.txt";
82 else token = "fcomment.txt";
84 outf = dbuf_create_output_file(c, token, NULL, DE_CREATEFLAG_IS_AUX);
85 ucstring_write_as_utf8(c, s, outf, 1);
88 done:
89 dbuf_close(outf);
90 ucstring_destroy(s);
93 static const char *get_host_os_name(u8 n)
95 static const char *names[12] = { "MSDOS", "PRIMOS", "Unix", "Amiga", "MacOS",
96 "OS/2", "Apple GS", "Atari ST", "NeXT", "VMS", "Win95", "WIN32" };
98 if(n<=11) return names[(UI)n];
99 return "?";
102 static const char *get_file_type_name(struct member_data *md, u8 n)
104 const char *name = NULL;
106 if(md->objtype==ARJ_OBJTYPE_ARCHIVEHDR) {
107 if(n==2) name = "main header";
109 else {
110 switch(n) {
111 case 0: name = "binary"; break;
112 case 1: name = "text"; break;
113 case 2:
114 if(md->objtype==ARJ_OBJTYPE_CHAPTERHDR) {
115 name = "comment header";
117 break;
118 case 3: name = "directory"; break;
119 case 4: name = "volume label"; break;
120 case 5: name = "chapter label"; break;
123 return name?name:"?";
126 static void get_flags_descr(struct member_data *md, u8 n1, de_ucstring *s)
128 u8 n = n1;
130 if(n & 0x01) {
131 ucstring_append_flags_item(s, "GARBLED");
132 n -= 0x01;
135 if((n & 0x02) && (md->objtype==ARJ_OBJTYPE_ARCHIVEHDR)) {
136 if(md->os==10 || md->os==11) {
137 ucstring_append_flags_item(s, "ANSIPAGE");
138 n -= 0x02;
142 if(n & 0x04) {
143 ucstring_append_flags_item(s, "VOLUME");
144 n -= 0x04;
147 if(n & 0x08) {
148 if(md->objtype==ARJ_OBJTYPE_ARCHIVEHDR) {
149 ucstring_append_flags_item(s, "ARJPROT");
151 else {
152 ucstring_append_flags_item(s, "EXTFILE");
154 n -= 0x08;
157 if(n & 0x10) {
158 ucstring_append_flags_item(s, "PATHSYM");
159 n -= 0x10;
162 if((n & 0x40) && (md->objtype==ARJ_OBJTYPE_ARCHIVEHDR)) {
163 ucstring_append_flags_item(s, "SECURED");
164 n -= 0x40;
167 if((n & 0x80) && (md->objtype==ARJ_OBJTYPE_ARCHIVEHDR)) {
168 ucstring_append_flags_item(s, "ALTNAME");
169 n -= 0x80;
172 if(n!=0) {
173 ucstring_append_flags_itemf(s, "0x%02x", (UI)n);
177 struct method4_ctx {
178 i64 nbytes_written;
179 int stop_flag;
180 struct de_dfilter_out_params *dcmpro;
181 struct de_bitreader bitrd;
184 static void method4_lz77buf_writebytecb(struct de_lz77buffer *rb, const u8 n)
186 struct method4_ctx *cctx = (struct method4_ctx*)rb->userdata;
188 if(cctx->stop_flag) return;
189 if(cctx->dcmpro->len_known) {
190 if(cctx->nbytes_written >= cctx->dcmpro->expected_len) {
191 cctx->stop_flag = 1;
192 return;
196 dbuf_writebyte(cctx->dcmpro->f, n);
197 cctx->nbytes_written++;
200 static UI method4_read_a_length_code(struct method4_ctx *cctx)
202 UI onescount = 0;
203 UI n;
205 // Read up to 7 bits, counting the number of 1 bits, stopping after the first 0.
206 while(1) {
207 n = (UI)de_bitreader_getbits(&cctx->bitrd, 1);
208 if(n==0) break;
209 onescount++;
210 if(onescount>=7) break;
213 // However many ones there were, read that number of bits.
214 if(onescount==0) return 0;
215 n = (UI)de_bitreader_getbits(&cctx->bitrd, onescount);
216 return (1U<<onescount)-1 + n;
219 static UI method4_read_an_offset(struct method4_ctx *cctx)
221 UI onescount = 0;
222 UI n;
224 // Read up to 4 bits, counting the number of 1 bits, stopping after the first 0.
225 while(1) {
226 n = (UI)de_bitreader_getbits(&cctx->bitrd, 1);
227 if(n==0) break;
228 onescount++;
229 if(onescount>=4) break;
232 // Read {9 + the number of 1 bits} more bits.
233 n = (UI)de_bitreader_getbits(&cctx->bitrd, 9+onescount);
234 return (1U<<(9+onescount))-512 + n;
237 static void decompress_method_4(deark *c, lctx *d, struct member_data *md,
238 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
239 struct de_dfilter_results *dres)
241 struct method4_ctx *cctx = NULL;
242 struct de_lz77buffer *ringbuf = NULL;
244 cctx = de_malloc(c, sizeof(struct method4_ctx));
245 cctx->dcmpro = dcmpro;
246 cctx->bitrd.f = dcmpri->f;
247 cctx->bitrd.curpos = dcmpri->pos;
248 cctx->bitrd.endpos = dcmpri->pos + dcmpri->len;
250 // The maximum offset that can be encoded is 15871, so a 16K history is enough.
251 ringbuf = de_lz77buffer_create(c, 16384);
252 ringbuf->writebyte_cb = method4_lz77buf_writebytecb;
253 ringbuf->userdata = (void*)cctx;
255 while(1) {
256 UI len_code;
258 if(cctx->bitrd.eof_flag) goto done;
259 if(cctx->stop_flag) goto done;
260 if(cctx->dcmpro->len_known && (cctx->nbytes_written >= cctx->dcmpro->expected_len)) {
261 goto done;
264 len_code = method4_read_a_length_code(cctx);
265 if(len_code==0) {
266 u8 b;
268 b = (u8)de_bitreader_getbits(&cctx->bitrd, 8);
269 de_lz77buffer_add_literal_byte(ringbuf, b);
271 else {
272 UI offs;
274 offs = method4_read_an_offset(cctx);
275 de_lz77buffer_copy_from_hist(ringbuf, ringbuf->curpos-1-offs, len_code+2);
279 done:
280 dres->bytes_consumed_valid = 1;
281 dres->bytes_consumed = cctx->bitrd.curpos - dcmpri->pos;
282 de_lz77buffer_destroy(c, ringbuf);
283 de_free(c, cctx);
286 static void decompress_method_1(deark *c, lctx *d, struct member_data *md,
287 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
288 struct de_dfilter_results *dres)
290 struct de_lh5x_params lzhparams;
292 de_zeromem(&lzhparams, sizeof(struct de_lh5x_params));
293 lzhparams.fmt = DE_LH5X_FMT_LH6;
295 // ARJ does not appear to allow LZ77 offsets that point to data before
296 // the beginning of the file, so it doesn't matter what we initialize the
297 // history buffer to.
298 lzhparams.history_fill_val = 0x00;
300 lzhparams.zero_codes_block_behavior = DE_LH5X_ZCB_65536;
301 lzhparams.warn_about_zero_codes_block = 1;
302 fmtutil_decompress_lh5x(c, dcmpri, dcmpro, dres, &lzhparams);
305 static void our_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
307 struct de_crcobj *crco = (struct de_crcobj*)userdata;
308 de_crcobj_addbuf(crco, buf, buf_len);
311 static void extract_member_file(deark *c, lctx *d, struct member_data *md)
313 de_finfo *fi = NULL;
314 dbuf *outf = NULL;
315 size_t k;
316 int is_normal_file;
317 int is_dir;
318 u32 crc_calc;
319 struct de_dfilter_in_params dcmpri;
320 struct de_dfilter_out_params dcmpro;
321 struct de_dfilter_results dres;
323 if(md->objtype!=ARJ_OBJTYPE_MEMBERFILE) goto done;
324 if(!md->name_srd) goto done;
326 is_normal_file = (md->file_type==0 || md->file_type==1);
327 is_dir = (md->file_type==3);
328 if(!is_normal_file && !is_dir) {
329 goto done; // Special file type, not extracting
332 if((md->flags & 0x01) && (md->orig_len!=0)) {
333 de_err(c, "%s: Garbled files are not supported",
334 ucstring_getpsz_d(md->name_srd->str));
335 goto done;
338 if(is_normal_file && (md->method>4) && (md->orig_len!=0)) {
339 de_err(c, "%s: Compression method %u is not supported",
340 ucstring_getpsz_d(md->name_srd->str), (UI)md->method);
341 goto done;
344 fi = de_finfo_create(c);
346 de_finfo_set_name_from_ucstring(c, fi, md->name_srd->str, DE_SNFLAG_FULLPATH);
347 fi->original_filename_flag = 1;
349 if(is_dir) {
350 fi->is_directory = 1;
353 for(k=0; k<DE_TIMESTAMPIDX_COUNT; k++) {
354 fi->timestamp[k] = md->tmstamp[k];
357 outf = dbuf_create_output_file(c, NULL, fi, 0);
359 if(is_dir) goto done;
361 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
362 dcmpri.f = c->infile;
363 dcmpri.pos = md->cmpr_pos;
364 dcmpri.len = md->cmpr_len;
365 dcmpro.f = outf;
366 dcmpro.len_known = 1;
367 dcmpro.expected_len = md->orig_len;
369 de_crcobj_reset(d->crco);
370 dbuf_set_writelistener(outf, our_writelistener_cb, (void*)d->crco);
372 if(md->orig_len==0) {
375 else if(md->method==0) {
376 fmtutil_decompress_uncompressed(c, &dcmpri, &dcmpro, &dres, 0);
378 else if(md->method>=1 && md->method<=3) {
379 decompress_method_1(c, d, md, &dcmpri, &dcmpro, &dres);
381 else if(md->method==4) {
382 decompress_method_4(c, d, md, &dcmpri, &dcmpro, &dres);
385 if(dres.errcode) {
386 de_err(c, "%s: Decompression failed: %s", ucstring_getpsz_d(md->name_srd->str),
387 de_dfilter_get_errmsg(c, &dres));
388 goto done;
391 crc_calc = de_crcobj_getval(d->crco);
392 de_dbg(c, "crc (calculated): 0x%08x", (UI)crc_calc);
393 if(crc_calc != md->crc_reported) {
394 de_err(c, "%s: CRC check failed", ucstring_getpsz_d(md->name_srd->str));
395 goto done;
398 done:
399 dbuf_close(outf);
400 if(fi) de_finfo_destroy(c, fi);
403 static const char *get_objtype_name(u8 t) {
404 const char *name = NULL;
406 switch(t) {
407 case ARJ_OBJTYPE_ARCHIVEHDR: name="archive header"; break;
408 case ARJ_OBJTYPE_MEMBERFILE: name="member file"; break;
409 case ARJ_OBJTYPE_CHAPTERHDR: name="chapter header"; break;
410 case ARJ_OBJTYPE_EOA: name="end of archive"; break;
412 return name?name:"?";
415 static void fixup_path(de_ucstring *s)
417 i64 i;
419 for(i=0; i<s->len; i++) {
420 if(s->str[i]=='\\') {
421 s->str[i] = '/';
426 // If successfully parsed, sets *pbytes_consumed.
427 // Returns 1 normally, 2 if this is the EOA marker, 0 on fatal error.
428 static int do_header_or_member(deark *c, lctx *d, i64 pos1, int expecting_archive_hdr,
429 i64 *pbytes_consumed)
431 i64 pos = pos1;
432 i64 basic_hdr_size;
433 i64 first_hdr_size;
434 i64 first_hdr_endpos;
435 i64 first_ext_hdr_size;
436 i64 extra_data_len;
437 i64 nbytes_avail;
438 i64 n;
439 i64 basic_hdr_endpos;
440 u32 basic_hdr_crc_reported;
441 u32 basic_hdr_crc_calc;
442 struct member_data *md = NULL;
443 de_ucstring *flags_descr = NULL;
444 int retval = 0;
445 int saved_indent_level;
446 u8 b;
448 de_dbg_indent_save(c, &saved_indent_level);
449 md = de_malloc(c, sizeof(struct member_data));
451 md->hdr_id = (UI)de_getu16le_p(&pos);
452 if(expecting_archive_hdr) {
453 if(md->hdr_id==0xea60) {
454 md->objtype = ARJ_OBJTYPE_ARCHIVEHDR;
456 else {
457 de_err(c, "Not an ARJ file");
458 goto done;
461 else if(md->hdr_id==0xea60) {
462 md->objtype = ARJ_OBJTYPE_MEMBERFILE; // tentative?
464 else if(md->hdr_id==0x6000) {
465 md->objtype = ARJ_OBJTYPE_CHAPTERHDR;
467 else {
468 de_err(c, "ARJ member not found at %"I64_FMT, pos1);
469 goto done;
472 de_dbg(c, "object at %"I64_FMT, pos1);
473 de_dbg_indent(c, 1);
475 basic_hdr_size = de_getu16le_p(&pos);
476 de_dbg(c, "basic header size: %"I64_FMT, basic_hdr_size);
477 if(basic_hdr_size==0) {
478 md->objtype = ARJ_OBJTYPE_EOA;
480 de_dbg(c, "object type: %s", get_objtype_name(md->objtype));
482 if(basic_hdr_size==0) {
483 *pbytes_consumed = 4;
484 goto done;
487 if(basic_hdr_size>2600) {
488 de_err(c, "Bad header size");
489 goto done;
492 de_dbg(c, "[basic header]");
493 de_dbg_indent(c, 1);
495 de_dbg(c, "[first header]");
496 de_dbg_indent(c, 1);
498 basic_hdr_endpos = pos1 + 4 + basic_hdr_size;
499 first_hdr_size = (i64)de_getbyte_p(&pos);
500 de_dbg(c, "first header size: %"I64_FMT, first_hdr_size);
501 first_hdr_endpos = pos1 + 4 + first_hdr_size;
502 md->archiver_ver_num = de_getbyte_p(&pos);
503 de_dbg(c, "archiver version: %u", (UI)md->archiver_ver_num);
504 md->min_ver_to_extract = de_getbyte_p(&pos);
505 de_dbg(c, "min ver to extract: %u", (UI)md->min_ver_to_extract);
507 md->os = de_getbyte_p(&pos);
508 de_dbg(c, "host OS: %u (%s)", (UI)md->os, get_host_os_name(md->os));
510 md->flags = de_getbyte_p(&pos);
511 flags_descr = ucstring_create(c);
512 get_flags_descr(md, md->flags, flags_descr);
513 de_dbg(c, "flags: 0x%02x (%s)", (UI)md->flags, ucstring_getpsz_d(flags_descr));
514 if(md->objtype==ARJ_OBJTYPE_ARCHIVEHDR) {
515 d->archive_flags = md->flags;
516 if(d->archive_flags & 0x40) d->is_secured = 1;
519 // Now we have enough information to choose a character encoding.
520 md->input_encoding = d->input_encoding;
521 if(md->input_encoding==DE_ENCODING_UNKNOWN) {
522 if((d->archive_flags&0x02) && (md->os==10 || md->os==11)) {
523 md->input_encoding = DE_ENCODING_WINDOWS1252;
525 else {
526 md->input_encoding = DE_ENCODING_CP437;
530 if(md->objtype==ARJ_OBJTYPE_ARCHIVEHDR) {
531 b = de_getbyte_p(&pos);
532 de_dbg(c, "security version: %u", (UI)b);
534 else {
535 md->method = de_getbyte_p(&pos);
536 de_dbg(c, "cmpr method: %u", (UI)md->method);
539 md->file_type = de_getbyte_p(&pos);
540 de_dbg(c, "file type: %u (%s)", (UI)md->file_type, get_file_type_name(md, md->file_type));
541 if(expecting_archive_hdr && md->file_type!=2) {
542 de_err(c, "Invalid or missing archive header");
543 goto done;
546 pos++; // reserved
548 if(md->objtype==ARJ_OBJTYPE_ARCHIVEHDR) {
549 read_arj_datetime(c, d, pos, &md->tmstamp[DE_TIMESTAMPIDX_CREATE], "archive creation");
550 pos += 4;
552 else if(md->objtype==ARJ_OBJTYPE_CHAPTERHDR) {
553 read_arj_datetime(c, d, pos, &md->tmstamp[DE_TIMESTAMPIDX_CREATE], "creation");
554 pos += 4;
556 else {
557 read_arj_datetime(c, d, pos, &md->tmstamp[DE_TIMESTAMPIDX_MODIFY], "mod");
558 pos += 4;
561 if(md->objtype==ARJ_OBJTYPE_ARCHIVEHDR) {
562 read_arj_datetime(c, d, pos, &md->tmstamp[DE_TIMESTAMPIDX_MODIFY], "archive mod");
563 pos += 4;
565 else if(md->objtype==ARJ_OBJTYPE_MEMBERFILE) {
566 md->cmpr_len = de_getu32le_p(&pos);
567 de_dbg(c, "compressed size: %"I64_FMT, md->cmpr_len);
569 else {
570 pos += 4;
573 if(md->objtype==ARJ_OBJTYPE_MEMBERFILE) {
574 md->orig_len = de_getu32le_p(&pos);
575 de_dbg(c, "original size: %"I64_FMT, md->orig_len);
577 else {
578 pos += 4;
581 if(md->objtype==ARJ_OBJTYPE_ARCHIVEHDR) {
582 n = de_getu32le_p(&pos);
583 if(d->is_secured) {
584 d->security_envelope_pos = n;
585 de_dbg(c, "security envelope pos: %"I64_FMT, d->security_envelope_pos);
588 else {
589 md->crc_reported = (u32)de_getu32le_p(&pos);
590 de_dbg(c, "crc (reported): 0x%08x", (UI)md->crc_reported);
593 n = de_getu16le_p(&pos);
594 de_dbg(c, "filespec pos in filename: %d", (int)n);
596 if(md->objtype==ARJ_OBJTYPE_ARCHIVEHDR) {
597 n = de_getu16le_p(&pos);
598 if(d->is_secured) {
599 d->security_envelope_len = n;
600 de_dbg(c, "security envelope len: %"I64_FMT, d->security_envelope_len);
603 else {
604 de_ucstring *mode_descr;
606 md->file_mode = (UI)de_getu16le_p(&pos);
607 mode_descr = ucstring_create(c);
608 de_describe_dos_attribs(c, md->file_mode, mode_descr, 0);
609 de_dbg(c, "access mode: 0x%02x (%s)", md->file_mode, ucstring_getpsz_d(mode_descr));
610 ucstring_destroy(mode_descr);
613 pos++; // first chapter / encryption ver
614 pos++; // last chapter
616 extra_data_len = first_hdr_endpos - pos;
617 if(extra_data_len>0) {
618 de_dbg(c, "extra data: %"I64_FMT" bytes at %"I64_FMT"", extra_data_len, pos);
619 de_dbg_indent(c, 1);
621 if(md->objtype==ARJ_OBJTYPE_ARCHIVEHDR) {
622 if(extra_data_len>=1) {
623 b = de_getbyte_p(&pos);
624 de_dbg(c, "protection factor: %u", (UI)b);
626 if(extra_data_len>=2) {
627 b = de_getbyte_p(&pos);
628 de_dbg(c, "flags (2nd set): 0x%02x", (UI)b);
631 else if(md->objtype==ARJ_OBJTYPE_MEMBERFILE) {
632 if(extra_data_len>=4) {
633 n = de_getu32le_p(&pos);
634 de_dbg(c, "ext. file pos: %"I64_FMT, n);
636 if(extra_data_len>=12) {
637 read_arj_datetime(c, d, pos, &md->tmstamp[DE_TIMESTAMPIDX_ACCESS], "access");
638 pos += 4;
639 read_arj_datetime(c, d, pos, &md->tmstamp[DE_TIMESTAMPIDX_CREATE], "create");
640 pos += 4;
642 if(extra_data_len>=16) {
643 n = de_getu32le_p(&pos);
644 de_dbg(c, "ext. orig size: %"I64_FMT, n);
648 de_dbg_indent(c, -1);
651 de_dbg_indent(c, -1);
652 pos = first_hdr_endpos; // Now at the offset of the filename field
653 nbytes_avail = basic_hdr_endpos - pos;
654 md->name_srd = dbuf_read_string(c->infile, pos, nbytes_avail, 256, DE_CONVFLAG_STOP_AT_NUL,
655 md->input_encoding);
656 if(!(md->flags & 0x10)) {
657 // "PATHSYM" flag missing, need to convert '\' to '/'
658 fixup_path(md->name_srd->str);
660 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->name_srd->str));
662 if(md->name_srd->found_nul) {
663 pos += md->name_srd->bytes_consumed;
664 nbytes_avail = basic_hdr_endpos - pos;
665 handle_comment(c, d, md, pos, nbytes_avail);
668 de_dbg_indent(c, -1);
669 pos = basic_hdr_endpos; // Now at the offset just after the 'comment' field
670 basic_hdr_crc_reported = (u32)de_getu32le_p(&pos);
671 de_dbg(c, "basic hdr crc (reported): 0x%08x", (UI)basic_hdr_crc_reported);
673 de_crcobj_reset(d->crco);
674 de_crcobj_addslice(d->crco, c->infile, pos1+4, basic_hdr_size);
675 basic_hdr_crc_calc = de_crcobj_getval(d->crco);
676 de_dbg(c, "basic hdr crc (calculated): 0x%08x", (UI)basic_hdr_crc_calc);
677 if(basic_hdr_crc_calc != basic_hdr_crc_reported) {
678 de_warn(c, "Header CRC check failed");
681 first_ext_hdr_size = de_getu16le_p(&pos);
682 de_dbg(c, "first ext header size: %"I64_FMT, first_ext_hdr_size);
683 if(first_ext_hdr_size != 0) {
684 pos += 4; // first ext hdr crc
687 if(md->objtype==ARJ_OBJTYPE_MEMBERFILE) {
688 md->cmpr_pos = pos;
689 de_dbg(c, "compressed data at %"I64_FMT, md->cmpr_pos);
690 de_dbg_indent(c, 1);
691 extract_member_file(c, d, md);
692 de_dbg_indent(c, -1);
693 pos += md->cmpr_len;
696 *pbytes_consumed = pos - pos1;
697 retval = 1;
699 done:
700 ucstring_destroy(flags_descr);
701 if(md) {
702 de_destroy_stringreaderdata(c, md->name_srd);
703 de_free(c, md);
705 de_dbg_indent_restore(c, saved_indent_level);
706 return retval;
709 static void do_member_sequence(deark *c, lctx *d, i64 pos1)
711 i64 pos = pos1;
712 i64 num_extra_bytes;
714 while(1) {
715 int ret;
716 i64 bytes_consumed = 0;
718 if(pos+2 > c->infile->len) goto done;
720 ret = do_header_or_member(c, d, pos, 0, &bytes_consumed);
721 if(ret==0 || bytes_consumed<2) goto done;
722 if(ret==2) { // End of archive
723 break;
725 pos += bytes_consumed;
728 num_extra_bytes = c->infile->len - pos;
729 if(num_extra_bytes>1) {
730 de_dbg(c, "[%"I64_FMT" extra bytes at EOF, starting at %"I64_FMT"]", num_extra_bytes, pos);
732 done:
736 static void do_security_envelope(deark *c, lctx *d)
738 if(d->security_envelope_len==0) return;
739 de_dbg(c, "security envelope at %"I64_FMT", len=%"I64_FMT, d->security_envelope_pos,
740 d->security_envelope_len);
741 de_dbg_indent(c, 1);
742 de_dbg_hexdump(c, c->infile, d->security_envelope_pos, d->security_envelope_len,
743 256, NULL, 0x0);
744 de_dbg_indent(c, -1);
747 static void de_help_arj(deark *c)
749 de_msg(c, "-opt arj:entrypoint=<n> : Offset of archive header");
752 static void de_run_arj(deark *c, de_module_params *mparams)
754 lctx *d = NULL;
755 i64 pos;
756 i64 bytes_consumed = 0;
757 const char *s;
759 d = de_malloc(c, sizeof(lctx));
761 de_declare_fmt(c, "ARJ");
762 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_UNKNOWN);
764 // Useful with self-extracting archives, at least until we can handle them
765 // automatically. "-start" doesn't work right, because the security envelope
766 // offset is an absolute offset.
767 s = de_get_ext_option(c, "arj:entrypoint");
768 if(s) {
769 d->entry_point = de_atoi64(s);
772 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC32_IEEE);
773 pos = d->entry_point;
774 if(do_header_or_member(c, d, pos, 1, &bytes_consumed) != 1) goto done;
775 pos += bytes_consumed;
776 if(d->is_secured) {
777 do_security_envelope(c, d);
780 do_member_sequence(c, d, pos);
782 done:
783 if(d) {
784 de_crcobj_destroy(d->crco);
785 de_free(c, d);
789 static int de_identify_arj(deark *c)
791 i64 basic_hdr_size;
793 if(dbuf_memcmp(c->infile, 0, "\x60\xea", 2)) return 0;
794 basic_hdr_size = de_getu16le(2);
795 if(basic_hdr_size>2600) return 0;
796 if(de_input_file_has_ext(c, "arj")) return 100;
797 return 75;
800 void de_module_arj(deark *c, struct deark_module_info *mi)
802 mi->id = "arj";
803 mi->desc = "ARJ";
804 mi->run_fn = de_run_arj;
805 mi->identify_fn = de_identify_arj;
806 mi->help_fn = de_help_arj;