fnt: Improved error handling, etc.
[deark.git] / modules / rar.c
blob682192d3f05ab6ab84d235deeb81b65dcfabbcf4
1 // This file is part of Deark.
2 // Copyright (C) 2022 Jason Summers
3 // See the file COPYING for terms of use.
5 // RAR
7 #include <deark-private.h>
8 #include <deark-fmtutil-arch.h>
9 DE_DECLARE_MODULE(de_module_rar);
11 static const u8 *g_rar_oldsig = (const u8*)"RE\x7e\x5e";
12 static const u8 *g_rar4_sig = (const u8*)"Rar!\x1a\x07\x00";
13 static const u8 *g_rar5_sig = (const u8*)"Rar!\x1a\x07\x01\x00";
15 // 0=stored through 5=max/best
16 static const char *get_generic_rar_cmpr_name(UI n)
18 const char *names[6] = { "stored", "fastest", "fast",
19 "normal", "good", "best" };
21 if(n<=5) {
22 return names[n];
24 return "?";
27 static void rar_handle_noncmpr_comment(deark *c, de_arch_lctx *d, i64 pos, i64 len,
28 de_encoding enc, int is_file_comment)
30 de_ucstring *comment = NULL;
31 const char *ext;
33 if(len<1) goto done;
35 ext = is_file_comment?"fcomment.txt":"comment.txt";
37 if(c->extract_level>=2) {
38 dbuf_create_file_from_slice(c->infile, pos, len, ext,
39 NULL, DE_CREATEFLAG_IS_AUX);
41 else {
42 comment = ucstring_create(c);
43 dbuf_read_to_ucstring_n(c->infile, pos, len, DE_DBG_MAX_STRLEN,
44 comment, 0, DE_EXTENC_MAKE(enc, DE_ENCSUBTYPE_HYBRID));
45 de_dbg(c, "%scomment: \"%s\"", (is_file_comment?"file ":""),
46 ucstring_getpsz_d(comment));
49 done:
50 ucstring_destroy(comment);
53 static void do_rar_old_member(deark *c, de_arch_lctx *d, struct de_arch_member_data *md)
55 i64 n;
56 u8 b;
57 i64 pos = md->member_hdr_pos;
58 i64 hdrlen;
59 i64 fnlen;
60 int saved_indent_level;
62 de_dbg_indent_save(c, &saved_indent_level);
63 de_dbg(c, "member file at %"I64_FMT, md->member_hdr_pos);
64 de_dbg_indent(c, 1);
66 de_arch_read_field_cmpr_len_p(md, &pos);
67 de_arch_read_field_orig_len_p(md, &pos);
69 // Note: This is a checksum of the decompressed bytes. Algorithm:
70 // Initialize ck = 0x0000
71 // For each byte b:
72 // * ck = ck + b
73 // * Rotate ck left 1 bit,
74 // i.e. ck = (ck & 0x7fff)<<1 | (ck & 0x8000)>>15)
75 n = de_getu16le_p(&pos);
76 de_dbg(c, "checksum: 0x%04x", (UI)n);
78 hdrlen = de_getu16le_p(&pos);
79 de_dbg(c, "hdr len: %u", (int)hdrlen);
81 if(hdrlen < 12) {
82 d->fatalerrflag = 1;
83 goto done;
86 md->member_total_size = hdrlen + md->cmpr_len;
88 de_arch_read_field_dttm_p(d, &md->fi->timestamp[DE_TIMESTAMPIDX_MODIFY], "mod",
89 DE_ARCH_TSTYPE_DOS_TD, &pos);
90 de_arch_read_field_dos_attr_p(md, &pos);
92 md->file_flags = (UI)de_getbyte_p(&pos); // status flags
93 de_dbg(c, "flags: 0x%02x", md->file_flags);
95 b = de_getbyte_p(&pos);
96 de_dbg(c, "min ver needed to unpack: %u", (UI)b);
98 fnlen = (i64)de_getbyte_p(&pos);
100 md->cmpr_meth = (UI)de_getbyte_p(&pos);
101 de_dbg(c, "cmpr. method: %u (%s)", md->cmpr_meth, get_generic_rar_cmpr_name(md->cmpr_meth));
103 // Spec says the filename occurs *after* the comment, but (for v1.40.2)
104 // it just isn't true.
105 dbuf_read_to_ucstring(c->infile, pos, fnlen, md->filename, 0,
106 d->input_encoding);
107 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->filename));
108 pos += fnlen;
110 if(md->file_flags & 0x08) {
111 i64 cmtlen;
113 cmtlen = de_getu16le_p(&pos);
114 de_dbg(c, "file comment at %"I64_FMT", len=%"I64_FMT, pos, cmtlen);
115 rar_handle_noncmpr_comment(c, d, pos, cmtlen, d->input_encoding, 1);
116 pos += cmtlen;
119 md->cmpr_pos = md->member_hdr_pos + hdrlen;
120 de_dbg(c, "cmpr. data at %"I64_FMT", len=%"I64_FMT, md->cmpr_pos, md->cmpr_len);
122 done:
123 de_dbg_indent_restore(c, saved_indent_level);
126 // Intended to work for, at least, RAR v1.40.2 (RAR1_402.EXE).
127 // Ref: Search for a file named RAR140DC.EXE, containing technote.doc.
128 static void do_rar_old(deark *c, de_arch_lctx *d)
130 i64 pos = d->data_startpos;
131 i64 hdrpos;
132 i64 hdrlen;
133 struct de_arch_member_data *md = NULL;
134 int saved_indent_level;
136 de_dbg_indent_save(c, &saved_indent_level);
137 de_declare_fmt(c, "RAR (<v1.50)");
138 hdrpos = pos;
139 de_dbg(c, "archive header at %"I64_FMT, hdrpos);
140 de_dbg_indent(c, 1);
141 pos += 4; // header ID
142 hdrlen = de_getu16le_p(&pos);
143 de_dbg(c, "hdr len: %"I64_FMT, hdrlen);
144 d->archive_flags = (UI)de_getbyte_p(&pos);
145 de_dbg(c, "flags: 0x%02x", d->archive_flags);
147 if(d->archive_flags & 0x02) {
148 i64 cmtlen;
150 cmtlen = de_getu16le_p(&pos);
151 de_dbg(c, "archive comment at %"I64_FMT", len=%"I64_FMT", compressed=%d",
152 pos, cmtlen, (int)((d->archive_flags & 0x10)!=0));
153 if((d->archive_flags & 0x10)==0) {
154 // The old format supports a non-compressed archive comment, though for
155 // v1.4.0+ it is always(?) compressed.
156 rar_handle_noncmpr_comment(c, d, pos, cmtlen, d->input_encoding, 0);
158 pos += cmtlen;
161 if(d->archive_flags & 0x20) {
162 i64 ext1len;
164 ext1len = de_getu16le_p(&pos);
165 de_dbg(c, "EXT1 field at %"I64_FMT", len=%"I64_FMT, pos, ext1len);
166 pos += ext1len;
169 de_dbg_indent_restore(c, saved_indent_level);
171 pos = hdrpos + hdrlen;
172 while(1) {
173 if(pos >= c->infile->len) break;
174 if(md) {
175 de_arch_destroy_md(c, md);
176 md = NULL;
178 md = de_arch_create_md(c, d);
179 md->member_hdr_pos = pos;
180 do_rar_old_member(c, d, md);
182 if(d->fatalerrflag) goto done;
183 if(md->member_total_size <= 0) goto done;
184 pos += md->member_total_size;
187 done:
188 if(md) {
189 de_arch_destroy_md(c, md);
191 de_dbg_indent_restore(c, saved_indent_level);
194 struct rar4_block {
195 i64 block_pos;
196 i64 block_max_endpos;
197 u32 crc_reported;
198 UI flags;
199 u8 type;
200 u8 parent_type; // 0 for none
201 u8 parsed_ok;
202 u8 last_block;
203 i64 data1_pos;
204 i64 block_size_1;
205 i64 data2_pos;
206 i64 block_size_2;
207 i64 block_size_high;
208 i64 block_size_full;
211 struct rar5_block {
212 i64 block_pos;
213 i64 block_size_full;
214 u32 crc_reported;
215 UI type;
216 UI hdr_flags;
217 i64 extra_area_pos;
218 i64 extra_area_size;
219 i64 data_area_pos;
220 i64 data_area_size;
221 i64 pos_after_standard_fields;
224 static void rar4_free_block(deark *c, struct rar4_block *rb)
226 if(!rb) return;
227 de_free(c, rb);
230 static void rar5_free_block(deark *c, struct rar5_block *rb)
232 if(!rb) return;
233 de_free(c, rb);
236 static const char *rar_get_v4_blktype_name(u8 n)
238 const char *name = NULL;
240 switch(n) {
241 case 0x72: name = "marker"; break;
242 case 0x73: name = "archive header"; break;
243 case 0x74: name = "file header"; break;
244 case 0x75: name = "comment"; break;
245 case 0x76: name = "extra info"; break;
246 case 0x77: name = "subblock (old)"; break;
247 case 0x78: name = "recovery record"; break;
248 case 0x79: name = "auth info"; break;
249 case 0x7a: name = "subblock (new)"; break;
250 case 0x7b: name = "end of archive"; break;
253 return name?name:"?";
256 static const char *rar4_get_OS_name(u8 n)
258 const char *name = NULL;
260 switch(n) {
261 case 0: name = "DOS"; break;
262 case 1: name = "OS/2"; break;
263 case 2: name = "Windows"; break;
264 case 3: name = "Unix"; break;
265 case 4: name = "Mac"; break;
267 return name?name:"?";
270 static const char *rar4_get_cmpr_name(u8 n)
272 return get_generic_rar_cmpr_name((UI)(n-0x30));
275 static void rar_read_v4_block(deark *c, de_arch_lctx *d, struct rar4_block *rb);
277 // Header type 0x73
278 static void do_rar4_block_archiveheader(deark *c, de_arch_lctx *d, struct rar4_block *rb)
280 i64 pos;
282 pos = rb->data1_pos;
283 pos += 2; // reserved1
284 pos += 4; // reserved2
286 if(rb->flags & 0x0002) {
287 struct rar4_block *cmt_rb;
289 cmt_rb = de_malloc(c, sizeof(struct rar4_block));
290 cmt_rb->block_pos = pos;
291 cmt_rb->block_max_endpos = rb->data2_pos;
292 cmt_rb->parent_type = rb->type;
293 rar_read_v4_block(c, d, cmt_rb);
294 rar4_free_block(c, cmt_rb);
298 // Header type 0x74 or 0x7a
299 static void do_rar4_block_fileheader(deark *c, de_arch_lctx *d, struct rar4_block *rb)
301 struct de_arch_member_data *md = NULL;
302 i64 pos;
303 i64 fnlen;
304 u32 filecrc_reported;
305 UI attribs;
306 u8 os;
307 u8 b;
309 md = de_arch_create_md(c, d);
311 pos = rb->data1_pos;
313 md->cmpr_pos = rb->data2_pos;
314 md->cmpr_len = rb->block_size_2 + rb->block_size_high;
315 de_arch_read_field_orig_len_p(md, &pos);
317 os = de_getbyte_p(&pos);
318 de_dbg(c, "OS: %u (%s)", (UI)os, rar4_get_OS_name(os));
320 filecrc_reported = (u32)de_getu32le_p(&pos);
321 de_dbg(c, "file crc: 0x%08x", (UI)filecrc_reported);
323 de_arch_read_field_dttm_p(d, &md->fi->timestamp[DE_TIMESTAMPIDX_MODIFY], "mod",
324 DE_ARCH_TSTYPE_DOS_TD, &pos);
326 b = de_getbyte_p(&pos);
327 de_dbg(c, "min ver needed to unpack: %u", (UI)b);
329 md->cmpr_meth = (UI)de_getbyte_p(&pos);
330 de_dbg(c, "cmpr. method: 0x%02x (%s)", md->cmpr_meth, rar4_get_cmpr_name((u8)md->cmpr_meth));
332 fnlen = de_getu16le_p(&pos);
334 attribs = (UI)de_getu32le_p(&pos);
335 de_dbg(c, "attribs: 0x%08x", attribs);
336 if(os==0 || os==1 || os==2) {
337 de_dbg_indent(c, 1);
338 de_arch_handle_field_dos_attr(md, (attribs & 0xff));
339 de_dbg_indent(c, -1);
342 if(rb->flags & 0x0100) {
343 pos += 4; // HIGH_PACK_SIZE, already read
344 pos += 4; // TODO: HIGH_UNP_SIZE
347 // TODO: Handle Unicode filenames (flags & 0x0200).
348 // - If the name field contains a NUL byte, a Unicode name follows (but how
349 // is it encoded?).
350 // - If there is no NUL byte, the entire filename is UTF-8.
351 dbuf_read_to_ucstring_n(c->infile, pos, fnlen, 2048, md->filename,
352 DE_CONVFLAG_STOP_AT_NUL, d->input_encoding);
353 de_dbg(c, "%sname: \"%s\"", (rb->type==0x7a?"":"file"), ucstring_getpsz_d(md->filename));
354 pos += fnlen;
356 if(rb->flags & 0x0400) {
357 pos += 8; // salt (first documented in v3.00)
360 if((rb->flags & 0x1008)==0x1008) {
361 // Ext time and comment shouldn't both be present.
362 // Ext time first documented in v3.40, which is also when the docs
363 // started saying that RAR 3.x doesn't set the 0x0008 flag.
364 goto done;
366 if(rb->type==0x7a && (rb->flags & 0x0008)) {
367 // A new subblock can't have an old comment
368 goto done;
371 // TODO: ext time (rb->flags & 0x1000)
373 // Old-style comment -- A nested block
374 if(rb->flags & 0x0008) {
375 struct rar4_block *cmt_rb;
377 cmt_rb = de_malloc(c, sizeof(struct rar4_block));
378 cmt_rb->block_pos = pos;
379 cmt_rb->block_max_endpos = rb->data2_pos;
380 cmt_rb->parent_type = rb->type;
381 rar_read_v4_block(c, d, cmt_rb);
382 pos += cmt_rb->block_size_full;
383 rar4_free_block(c, cmt_rb);
386 de_dbg(c, "cmpr. data at %"I64_FMT", len=%"I64_FMT, md->cmpr_pos, md->cmpr_len);
388 done:
389 de_arch_destroy_md(c, md);
392 // Header type 0x75, expected to be nested in type 0x73 or 0x74
393 static void do_rar4_block_oldcomment(deark *c, de_arch_lctx *d, struct rar4_block *rb)
395 i64 cmpr_len;
396 i64 orig_len;
397 i64 pos = rb->data1_pos;
398 u8 ver_needed;
399 u8 cmpr_meth;
400 u32 crc_reported;
401 int is_file_comment;
403 is_file_comment = (rb->parent_type==0x74);
404 orig_len = de_getu16le_p(&pos);
405 de_dbg(c, "uncompr. comment len: %"I64_FMT, orig_len);
406 ver_needed = de_getbyte_p(&pos);
407 de_dbg(c, "ver needed to read comment: %u", (UI)ver_needed);
408 cmpr_meth = de_getbyte_p(&pos);
409 de_dbg(c, "cmpr. method: 0x%02x (%s)", (UI)cmpr_meth, rar4_get_cmpr_name(cmpr_meth));
410 crc_reported = (u32)de_getu16le_p(&pos);
411 de_dbg(c, "crc (reported): 0x%04x", (UI)crc_reported);
413 cmpr_len = rb->data2_pos - pos;
414 de_dbg(c, "%s comment at %"I64_FMT", len=%"I64_FMT,
415 (is_file_comment?"file":"archive"), pos, cmpr_len);
416 if(cmpr_len<1) goto done;
417 if(cmpr_meth!=48) goto done; // compressed
418 rar_handle_noncmpr_comment(c, d, pos, cmpr_len, d->input_encoding, is_file_comment);
420 done:
424 static const char *rar4_get_oldsubblock_name(UI t)
426 const char *name = NULL;
428 if(t==0x100) name="OS/2 ext attribs";
429 return name?name:"?";
432 static void do_rar4_block_oldsubblock(deark *c, de_arch_lctx *d, struct rar4_block *rb)
434 UI sbtype;
435 i64 pos = rb->data1_pos;
437 sbtype = (UI)de_getu16le_p(&pos);
438 de_dbg(c, "subblock type: 0x%04x (%s)", sbtype, rar4_get_oldsubblock_name(sbtype));
441 // Caller supplies descr
442 static void get_rar4_flags_descr(struct rar4_block *rb, de_ucstring *s)
444 UI bf = rb->flags;
445 UI x;
447 ucstring_empty(s);
448 if(rb->type==0x73) { // archive hdr
449 if(bf & 0x0001) {
450 ucstring_append_flags_item(s, "volume");
451 bf -= 0x0001;
453 if(bf & 0x0002) {
454 ucstring_append_flags_item(s, "has comment (old)");
455 bf -= 0x0002;
457 if(bf & 0x0004) {
458 ucstring_append_flags_item(s, "locked");
459 bf -= 0x0004;
461 if(bf & 0x0008) {
462 ucstring_append_flags_item(s, "solid");
463 bf -= 0x0008;
465 if(bf & 0x0020) {
466 ucstring_append_flags_item(s, "has auth info (old)");
467 bf -= 0x0020;
469 if(bf & 0x0040) {
470 ucstring_append_flags_item(s, "has recovery record");
471 bf -= 0x0040;
474 else if(rb->type==0x74 || rb->type==0x7a) { // file hdr or new subblock
475 if(bf & 0x0001) {
476 ucstring_append_flags_item(s, "continued from prev vol");
477 bf -= 0x0001;
479 if(bf & 0x0002) {
480 ucstring_append_flags_item(s, "continued in next vol");
481 bf -= 0x0002;
483 if(bf & 0x0004) {
484 ucstring_append_flags_item(s, "encrypted");
485 bf -= 0x0004;
487 if(bf & 0x0008) {
488 ucstring_append_flags_item(s, "has comment (old)");
489 bf -= 0x0008;
491 if(bf & 0x0010) {
492 ucstring_append_flags_item(s, "solid");
493 bf -= 0x0010;
496 x = bf & 0x00e0;
497 bf -= x;
498 x >>= 5;
499 if(x==0x7) {
500 ucstring_append_flags_item(s, "directory");
502 else {
503 ucstring_append_flags_itemf(s, "dict=%uK", (UI)(64<<x));
506 if(bf & 0x0100) {
507 ucstring_append_flags_item(s, "large");
508 bf -= 0x0100;
510 if(bf & 0x0200) {
511 ucstring_append_flags_item(s, "Unicode filename");
512 bf -= 0x0200;
514 if(bf & 0x0400) {
515 ucstring_append_flags_item(s, "salt");
516 bf -= 0x0400;
518 if(bf & 0x0800) {
519 ucstring_append_flags_item(s, "old version");
520 bf -= 0x0800;
522 if(bf & 0x1000) {
523 ucstring_append_flags_item(s, "has ext time field");
524 bf -= 0x1000;
528 if(bf & 0x4000) {
529 ucstring_append_flags_item(s, "unsafe to copy");
530 bf -= 0x4000;
532 if(bf & 0x8000) {
533 ucstring_append_flags_item(s, "long block");
534 bf -= 0x8000;
536 if(bf!=0) {
537 ucstring_append_flags_itemf(s, "0x%04x", bf);
541 // Caller allocates/frees rb, and sets:
542 // ->block_pos,
543 // ->block_max_endpos,
544 // ->parent_type if applicable
545 static void rar_read_v4_block(deark *c, de_arch_lctx *d, struct rar4_block *rb)
547 int saved_indent_level;
548 i64 pos;
549 i64 n;
550 u32 crc_calc;
551 de_ucstring *descr = NULL;
553 de_dbg_indent_save(c, &saved_indent_level);
554 pos = rb->block_pos;
556 de_dbg(c, "block at %"I64_FMT, rb->block_pos);
557 de_dbg_indent(c, 1);
558 rb->crc_reported = (u32)de_getu16le_p(&pos);
559 de_dbg(c, "crc (reported): 0x%04x", (UI)rb->crc_reported);
561 rb->type = de_getbyte_p(&pos);
562 de_dbg(c, "block type: 0x%02x (%s)", (UI)rb->type, rar_get_v4_blktype_name(rb->type));
564 // The only nested block allowed is a comment block.
565 if(rb->parent_type!=0 && rb->type!=0x75) goto done;
567 if(rb->type==0x7b) {
568 rb->last_block = 1;
571 rb->flags = (UI)de_getu16le_p(&pos);
572 descr = ucstring_create(c);
573 get_rar4_flags_descr(rb, descr);
574 de_dbg(c, "block flags: 0x%04x (%s)", (UI)rb->flags, ucstring_getpsz_d(descr));
576 rb->block_size_1 = de_getu16le_p(&pos);
577 de_dbg(c, "block size (part 1): %"I64_FMT, rb->block_size_1);
578 if(rb->block_pos + rb->block_size_1 > rb->block_max_endpos) goto done;
580 if(rb->type==0x75) n = 11; // Special case for old-style comment blocks
581 else n = rb->block_size_1-2;
582 de_crcobj_reset(d->crco);
583 de_crcobj_addslice(d->crco, c->infile, rb->block_pos+2, n);
584 crc_calc = de_crcobj_getval(d->crco);
585 crc_calc &= 0xffff;
586 de_dbg(c, "crc (calculated): 0x%04x", (UI)crc_calc);
588 if(rb->flags & 0x8000) {
589 rb->block_size_2 = de_getu32le_p(&pos);
590 de_dbg(c, "block size (part 2): %"I64_FMT, rb->block_size_2);
593 if((rb->type==0x74 || rb->type==0x7a) && (rb->flags & 0x0100)) {
594 rb->block_size_high = de_getu32le(pos+17); // HIGH_PACK_SIZE
595 if(rb->block_size_high > 0x7ffffffe) {
596 // Legal, but we can't allow integer overflow (after <<32
597 // then adding 0xffffffff + 0xffff).
598 goto done;
600 rb->block_size_high <<= 32;
603 rb->data1_pos = pos;
604 rb->data2_pos = rb->block_pos + rb->block_size_1;
606 rb->block_size_full = rb->block_size_1 + rb->block_size_2 + rb->block_size_high;
607 de_dbg(c, "block size (total): %"I64_FMT, rb->block_size_full);
608 if(rb->block_pos + rb->block_size_full > rb->block_max_endpos) goto done;
609 rb->parsed_ok = 1;
611 switch(rb->type) {
612 case 0x73:
613 do_rar4_block_archiveheader(c, d, rb);
614 break;
615 case 0x74:
616 case 0x7a:
617 do_rar4_block_fileheader(c, d, rb);
618 break;
619 case 0x75:
620 do_rar4_block_oldcomment(c, d, rb);
621 break;
622 case 0x77:
623 do_rar4_block_oldsubblock(c, d, rb);
624 break;
627 done:
628 ucstring_destroy(descr);
629 de_dbg_indent_restore(c, saved_indent_level);
632 static void do_rar_v4(deark *c, de_arch_lctx *d)
634 struct rar4_block *rb = NULL;
635 i64 pos = d->data_startpos;
637 de_declare_fmt(c, "RAR (v1.50-4.20)");
638 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC32_IEEE);
640 while(1) {
641 if(pos >= c->infile->len) break;
643 if(rb) {
644 rar4_free_block(c, rb);
645 rb = NULL;
647 rb = de_malloc(c, sizeof(struct rar4_block));
648 rb->block_pos = pos;
649 rb->block_max_endpos = c->infile->len;
650 rb->parent_type = 0;
651 rar_read_v4_block(c, d, rb);
652 if(!rb->parsed_ok) goto done;
653 if(rb->last_block) goto done;
654 if(rb->block_size_full <= 0) goto done;
655 pos += rb->block_size_full;
658 done:
659 rar4_free_block(c, rb);
662 static u64 rar_get_vint_p(de_arch_lctx *d, dbuf *f, i64 *ppos)
664 u64 val = 0;
665 UI nbits_set = 0;
667 // TODO: Better handling of errors & oversized ints
668 while(1) {
669 u8 b;
671 if(nbits_set>=64) { val = 0; break; }
672 b = dbuf_getbyte_p(f, ppos);
673 if(nbits_set < 64) {
674 val |= (((u64)(b&0x7f))<<nbits_set);
675 nbits_set += 7;
677 if((b&0x80)==0) break;
679 return val;
682 // RAR's "vint" type is always unsigned.
683 // This function returns it in a signed i64, for convenience.
684 static i64 rar_get_vint_i64_p(de_arch_lctx *d, dbuf *f, i64 *ppos)
686 u64 v1u;
687 i64 v1i;
689 v1u = rar_get_vint_p(d, f, ppos);
690 v1i = (i64)v1u;
691 if(v1i<0) v1i = 0;
692 return v1i;
695 #define RAR5_HDRTYPE_ARCHIVE 1
696 #define RAR5_HDRTYPE_FILE 2
697 #define RAR5_HDRTYPE_SERVICE 3
698 #define RAR5_HDRTYPE_A_ENCR 4
699 #define RAR5_HDRTYPE_EOA 5
701 static const char *rar_get_v5_hdrtype_name(UI n)
703 const char *name = NULL;
705 switch(n) {
706 case RAR5_HDRTYPE_ARCHIVE: name = "archive header"; break;
707 case RAR5_HDRTYPE_FILE: name = "file header"; break;
708 case RAR5_HDRTYPE_SERVICE: name = "service header"; break;
709 case 4: name = "encryption header"; break;
710 case RAR5_HDRTYPE_EOA: name = "end of archive"; break;
713 return name?name:"?";
716 static void on_rar5_file_end(deark *c, de_arch_lctx *d)
718 if(!d->cur_md) return;
719 de_arch_destroy_md(c, d->cur_md);
720 d->cur_md = NULL;
723 static void on_rar5_file_begin(deark *c, de_arch_lctx *d)
725 on_rar5_file_end(c, d);
726 d->cur_md = de_arch_create_md(c, d);
729 struct rar5_extra_data {
730 u8 have_timestamps;
731 struct de_timestamp tmstamp[DE_TIMESTAMPIDX_COUNT];
734 struct rar5_file_or_svc_hdr_data {
735 UI file_flags;
736 u64 attribs;
737 i64 orig_len;
738 u32 crc_reported;
739 UI cmpr_info;
740 UI cmpr_meth;
741 UI os;
742 struct de_timestamp mtime1;
743 struct de_stringreaderdata *name_srd;
746 static void do_rar5_comment(deark *c, de_arch_lctx *d, struct rar5_block *rb,
747 struct rar5_file_or_svc_hdr_data *hd)
749 i64 cmt_len;
750 de_ucstring *comment = NULL;
752 if(hd->cmpr_meth!=0) goto done;
753 cmt_len = de_min_int(rb->data_area_size, hd->orig_len);
754 if(cmt_len<1) goto done;
756 rar_handle_noncmpr_comment(c, d, rb->data_area_pos, cmt_len, DE_ENCODING_UTF8, 0);
758 done:
759 ucstring_destroy(comment);
762 static const char *get_rar5_extra_record_name(struct rar5_block *rb, UI t)
764 const char *name = NULL;
766 if(rb->type==RAR5_HDRTYPE_FILE || rb->type==RAR5_HDRTYPE_SERVICE) {
767 switch(t) {
768 case 1: name="encryption"; break;
769 case 2: name="hash"; break;
770 case 3: name="timestamps"; break;
771 case 4: name="version"; break;
772 case 5: name="redirection"; break;
773 case 6: name="owner (Unix)"; break;
774 case 7: name="service data"; break;
777 else if(rb->type==RAR5_HDRTYPE_ARCHIVE) {
778 if(t==1) name="locator";
780 return name?name:"?";
783 static void do_rar5_extrarec_timestamps(deark *c, de_arch_lctx *d, struct rar5_extra_data *ed,
784 i64 pos1, i64 len)
786 UI flags;
787 enum de_arch_tstype_enum tstype;
788 i64 pos = pos1;
789 i64 n;
790 double subsec;
792 if(len<1) goto done;
793 ed->have_timestamps = 1;
794 flags = (UI)rar_get_vint_p(d, c->infile, &pos);
795 de_dbg(c, "flags: 0x%x", flags);
796 tstype = (flags & 0x1) ? DE_ARCH_TSTYPE_UNIX_U : DE_ARCH_TSTYPE_FILETIME;
797 if(flags & 0x2) {
798 de_arch_read_field_dttm_p(d, &ed->tmstamp[DE_TIMESTAMPIDX_MODIFY], "mod",
799 tstype, &pos);
801 if(flags & 0x4) {
802 de_arch_read_field_dttm_p(d, &ed->tmstamp[DE_TIMESTAMPIDX_CREATE], "create",
803 tstype, &pos);
805 if(flags & 0x8) {
806 de_arch_read_field_dttm_p(d, &ed->tmstamp[DE_TIMESTAMPIDX_ACCESS], "access",
807 tstype, &pos);
810 // Unix time w/nanosecond precision.
811 // FIXME: This ought to be shown in the debug message above.
812 if((flags & 0x13)==0x13) {
813 n = de_getu32le_p(&pos);
814 subsec = ((double)n) / 1000000000.0;
815 de_timestamp_set_subsec(&ed->tmstamp[DE_TIMESTAMPIDX_MODIFY], subsec);
817 if((flags & 0x15)==0x15) {
818 n = de_getu32le_p(&pos);
819 subsec = ((double)n) / 1000000000.0;
820 de_timestamp_set_subsec(&ed->tmstamp[DE_TIMESTAMPIDX_CREATE], subsec);
822 if((flags & 0x19)==0x19) {
823 n = de_getu32le_p(&pos);
824 subsec = ((double)n) / 1000000000.0;
825 de_timestamp_set_subsec(&ed->tmstamp[DE_TIMESTAMPIDX_ACCESS], subsec);
828 done:
832 static void do_rar5_extra_area(deark *c, de_arch_lctx *d, struct rar5_block *rb)
834 int saved_indent_level;
835 i64 pos = rb->extra_area_pos;
836 i64 endpos = rb->data_area_pos;
837 struct rar5_extra_data *ed = NULL;
839 de_dbg_indent_save(c, &saved_indent_level);
840 ed = de_malloc(c, sizeof(struct rar5_extra_data));
841 if(rb->extra_area_size<1) goto done;
843 de_dbg(c, "extra area at %"I64_FMT", len=%"I64_FMT, rb->extra_area_pos,
844 rb->extra_area_size);
845 de_dbg_indent(c, 1);
846 while(1) {
847 i64 reclen;
848 i64 rec_dpos;
849 i64 rec_dlen;
850 i64 next_record_pos;
851 UI rectype;
852 int decoded;
854 if(pos >= endpos) break;
855 de_dbg(c, "record at %"I64_FMT, pos);
856 de_dbg_indent(c, 1);
857 reclen = rar_get_vint_i64_p(d, c->infile, &pos);
858 de_dbg(c, "record len: %"I64_FMT, reclen);
860 // Extra checks like the following are to guard against integer overflow.
861 if(reclen > rb->extra_area_size) goto done;
863 next_record_pos = pos + reclen;
864 if(next_record_pos > endpos) goto done;
865 rectype = (UI)rar_get_vint_p(d, c->infile, &pos);
866 de_dbg(c, "record type: %u (%s)", rectype,
867 get_rar5_extra_record_name(rb, rectype));
869 rec_dpos = pos;
870 rec_dlen = next_record_pos - rec_dpos;
871 de_dbg(c, "record dpos: %"I64_FMT", len: %"I64_FMT, rec_dpos, rec_dlen);
873 decoded = 0;
874 if(rb->type==RAR5_HDRTYPE_FILE || rb->type==RAR5_HDRTYPE_SERVICE) {
875 if(rectype==3) {
876 do_rar5_extrarec_timestamps(c, d, ed, rec_dpos, rec_dlen);
877 decoded = 1;
881 if(!decoded && rec_dlen>0) {
882 de_dbg_hexdump(c, c->infile, pos, rec_dlen, 256, NULL, 0x1);
885 pos = next_record_pos;
886 de_dbg_indent(c, -1);
889 done:
890 de_free(c, ed);
891 de_dbg_indent_restore(c, saved_indent_level);
894 static void do_rar5_file_or_service_hdr(deark *c, de_arch_lctx *d, struct rar5_block *rb)
896 UI u;
897 i64 namelen;
898 i64 pos;
899 struct rar5_file_or_svc_hdr_data *hd = NULL;
901 hd = de_malloc(c, sizeof(struct rar5_file_or_svc_hdr_data));
902 pos = rb->pos_after_standard_fields;
904 if(rb->type==RAR5_HDRTYPE_FILE) {
905 on_rar5_file_begin(c, d);
908 hd->file_flags = (UI)rar_get_vint_p(d, c->infile, &pos);
909 de_dbg(c, "file flags: 0x%x", hd->file_flags);
910 hd->orig_len = rar_get_vint_i64_p(d, c->infile, &pos);
911 de_dbg(c, "original size: %"I64_FMT, hd->orig_len);
912 de_sanitize_length(&hd->orig_len);
913 hd->attribs = rar_get_vint_p(d, c->infile, &pos);
914 de_dbg(c, "attribs: 0x%"U64_FMTx, hd->attribs);
916 if(hd->file_flags & 0x2) { // TODO: Test this
917 de_arch_read_field_dttm_p(d, &hd->mtime1, "mod", DE_ARCH_TSTYPE_UNIX_U, &pos);
919 if(hd->file_flags & 0x4) {
920 hd->crc_reported = (u32)de_getu32le_p(&pos);
921 de_dbg(c, "data crc: 0x%08x", (UI)hd->crc_reported);
924 hd->cmpr_info = (UI)rar_get_vint_p(d, c->infile, &pos);
925 de_dbg(c, "cmpr info: 0x%x", hd->cmpr_info);
926 de_dbg_indent(c, 1);
927 u = hd->cmpr_info & 0x3f;
928 de_dbg(c, "version: %u", u);
929 u = (hd->cmpr_info >> 6) & 0x1;
930 de_dbg(c, "solid: %u", u);
931 hd->cmpr_meth = (hd->cmpr_info >> 7) & 0x7;
932 de_dbg(c, "method: %u", hd->cmpr_meth);
933 u = (hd->cmpr_info >> 10) & 0xf;
934 de_dbg(c, "dict size: %u (%uk)", u, (UI)(128<<u));
935 de_dbg_indent(c, -1);
937 hd->os = (UI)rar_get_vint_p(d, c->infile, &pos);
938 de_dbg(c, "os: %u", hd->os);
940 namelen = rar_get_vint_i64_p(d, c->infile, &pos);
941 #define RAR_MAX_NAMELEN 65535
942 if(namelen > RAR_MAX_NAMELEN) goto done;
944 hd->name_srd = dbuf_read_string(c->infile, pos, namelen, namelen, 0,
945 DE_ENCODING_UTF8);
946 de_dbg(c, "name: \"%s\"", ucstring_getpsz_d(hd->name_srd->str));
948 if(rb->type==RAR5_HDRTYPE_SERVICE) {
949 if(!de_strcmp(hd->name_srd->sz, "CMT")) {
950 do_rar5_comment(c, d, rb, hd);
953 done:
954 if(hd) {
955 de_destroy_stringreaderdata(c, hd->name_srd);
956 de_free(c, hd);
960 static void rar_read_v5_block(deark *c, de_arch_lctx *d, struct rar5_block *rb, i64 pos1)
962 i64 pos;
963 i64 hdr_size;
964 i64 pos_of_hdr_type_field;
965 u32 crc_calc;
966 int saved_indent_level;
968 de_dbg_indent_save(c, &saved_indent_level);
969 rb->block_pos = pos1;
970 pos = rb->block_pos;
972 de_dbg(c, "block at %"I64_FMT, rb->block_pos);
973 de_dbg_indent(c, 1);
974 rb->crc_reported = (u32)de_getu32le_p(&pos);
975 de_dbg(c, "hdr crc (reported): 0x%08x", (UI)rb->crc_reported);
977 hdr_size = rar_get_vint_i64_p(d, c->infile, &pos);
978 de_dbg(c, "hdr size: %"I64_FMT, hdr_size);
979 if(hdr_size > 0x1fffff) goto done;
981 pos_of_hdr_type_field = pos;
983 rb->type = (UI)rar_get_vint_p(d, c->infile, &pos);
984 de_dbg(c, "hdr type: %u (%s)", rb->type, rar_get_v5_hdrtype_name(rb->type));
985 if(rb->type==RAR5_HDRTYPE_EOA) {
986 d->stop_flag = 1;
989 rb->hdr_flags = (UI)rar_get_vint_p(d, c->infile, &pos);
990 // TODO: Describe the flags
991 de_dbg(c, "hdr flags: %u", rb->hdr_flags);
993 if(rb->hdr_flags & 0x1) {
994 rb->extra_area_size = rar_get_vint_i64_p(d, c->infile, &pos);
995 de_dbg(c, "extra area len: %"I64_FMT, rb->extra_area_size);
996 // Extra checks like the following are to guard against integer overflow.
997 if(rb->extra_area_size > c->infile->len) goto done;
1000 if(rb->hdr_flags & 0x2) {
1001 rb->data_area_size = rar_get_vint_i64_p(d, c->infile, &pos);
1002 de_dbg(c, "data area len: %"I64_FMT, rb->data_area_size);
1003 if(rb->data_area_size > c->infile->len) goto done;
1006 rb->pos_after_standard_fields = pos;
1008 // (If there's no data area, then this is the end of the block.)
1009 rb->data_area_pos = pos_of_hdr_type_field + hdr_size;
1010 if(rb->data_area_pos + rb->data_area_size > c->infile->len) goto done;
1012 de_crcobj_reset(d->crco);
1013 de_crcobj_addslice(d->crco, c->infile, rb->block_pos+4, rb->data_area_pos-(rb->block_pos+4));
1014 crc_calc = de_crcobj_getval(d->crco);
1015 de_dbg(c, "hdr crc (calculated): 0x%08x", (UI)crc_calc);
1016 if(crc_calc != rb->crc_reported) goto done;
1018 rb->block_size_full = (rb->data_area_pos + rb->data_area_size) - rb->block_pos;
1020 rb->extra_area_pos = rb->data_area_pos - rb->extra_area_size;
1021 if(rb->hdr_flags & 0x1) {
1022 de_dbg(c, "extra area pos %"I64_FMT, rb->extra_area_pos);
1025 if(rb->hdr_flags & 0x2) {
1026 de_dbg(c, "data area pos: %"I64_FMT, rb->data_area_pos);
1029 switch(rb->type) {
1030 case RAR5_HDRTYPE_FILE:
1031 case RAR5_HDRTYPE_SERVICE:
1032 do_rar5_file_or_service_hdr(c, d, rb);
1033 break;
1034 case RAR5_HDRTYPE_A_ENCR:
1035 // The rest of the archive is encrypted.
1036 d->stop_flag = 1;
1037 break;
1040 do_rar5_extra_area(c, d, rb);
1042 done:
1043 if(rb->block_size_full==0) {
1044 d->fatalerrflag = 1;
1046 de_dbg_indent_restore(c, saved_indent_level);
1049 static void do_rar_v5(deark *c, de_arch_lctx *d)
1051 struct rar5_block *rb = NULL;
1052 i64 pos = d->data_startpos;
1054 de_declare_fmt(c, "RAR 5.0");
1055 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC32_IEEE);
1056 pos += 8;
1058 while(1) {
1059 if(pos >= c->infile->len) break;
1061 if(rb) {
1062 rar5_free_block(c, rb);
1063 rb = NULL;
1065 rb = de_malloc(c, sizeof(struct rar5_block));
1066 rar_read_v5_block(c, d, rb, pos);
1067 if(d->fatalerrflag || d->stop_flag) goto done;
1068 if(rb->block_size_full <= 0) goto done;
1069 pos += rb->block_size_full;
1072 done:
1073 on_rar5_file_end(c, d);
1074 rar5_free_block(c, rb);
1077 static int rar_get_fmtver(dbuf *f, i64 pos)
1079 u8 buf[8];
1081 dbuf_read(f, buf, pos, sizeof(buf));
1082 if(!de_memcmp(buf, g_rar4_sig, 7)) {
1083 return 4; // ver 1.5x-4.xx
1085 if(!de_memcmp(buf, g_rar5_sig, 8)) {
1086 return 5;
1088 if(!de_memcmp(buf, g_rar_oldsig, 4)) {
1089 return 1; // ver < 1.50
1091 return 0;
1094 static int rar_search_for_archive(deark *c, de_arch_lctx *d, i64 *pfoundpos)
1096 int ret;
1098 // Search for the common prefix of g_rar4_sig & g_rar5_sig
1099 ret = dbuf_search(c->infile, g_rar4_sig, 6, 0, c->infile->len, pfoundpos);
1100 if(ret) return 1;
1102 ret = dbuf_search(c->infile, g_rar_oldsig, 4, 0, c->infile->len, pfoundpos);
1103 if(ret) return 1;
1104 return 0;
1107 static void de_run_rar(deark *c, de_module_params *mparams)
1109 de_arch_lctx *d = NULL;
1111 d = de_arch_create_lctx(c);
1112 d->is_le = 1;
1113 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
1114 d->data_startpos = 0;
1116 d->fmtver = rar_get_fmtver(c->infile, d->data_startpos);
1117 if(d->fmtver==0 && c->module_disposition==DE_MODDISP_EXPLICIT) {
1118 if(rar_search_for_archive(c, d, &d->data_startpos)) {
1119 de_dbg(c, "likely RAR data found at %"I64_FMT, d->data_startpos);
1120 d->fmtver = rar_get_fmtver(c->infile, d->data_startpos);
1124 if(d->fmtver==0) {
1125 de_err(c, "Not a RAR file");
1126 goto done;
1129 if(d->fmtver==1) {
1130 do_rar_old(c, d);
1132 else if(d->fmtver==4) {
1133 do_rar_v4(c, d);
1135 else {
1136 do_rar_v5(c, d);
1139 done:
1140 de_arch_destroy_lctx(c, d);
1143 static int de_identify_rar(deark *c)
1145 int v;
1147 v = rar_get_fmtver(c->infile, 0);
1148 return v?100:0;
1151 void de_module_rar(deark *c, struct deark_module_info *mi)
1153 mi->id = "rar";
1154 mi->desc = "RAR archive";
1155 mi->run_fn = de_run_rar;
1156 mi->identify_fn = de_identify_rar;
1157 mi->flags |= DE_MODFLAG_WARNPARSEONLY;