Fixed a typo
[deark.git] / modules / lha.c
blob2c26a7aa29c2222745cc878c10868ca80caea6a7
1 // This file is part of Deark.
2 // Copyright (C) 2017 Jason Summers
3 // See the file COPYING for terms of use.
5 // LHA/LZH compressed archive format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_lha);
11 DE_DECLARE_MODULE(de_module_car_lha);
12 DE_DECLARE_MODULE(de_module_arx);
14 #define MAX_SUBDIR_LEVEL 32
16 #define CODE_S_LH0 0x204c4830 // SAR
17 #define CODE_S_LH5 0x204c4835 // SAR
18 #define CODE_ah0 0x2d616830U // MAR
19 #define CODE_ari 0x2d617269U // MAR
20 #define CODE_hf0 0x2d686630U // MAR
21 #define CODE_lZ0 0x2d6c5a30U // PUT
22 #define CODE_lZ1 0x2d6c5a31U // PUT
23 #define CODE_lZ5 0x2d6c5a35U // PUT
24 #define CODE_lh0 0x2d6c6830U
25 #define CODE_lh1 0x2d6c6831U
26 #define CODE_lh2 0x2d6c6832U
27 #define CODE_lh3 0x2d6c6833U
28 #define CODE_lh4 0x2d6c6834U
29 #define CODE_lh5 0x2d6c6835U
30 #define CODE_lh6 0x2d6c6836U
31 #define CODE_lh7 0x2d6c6837U // standard, or LHARK
32 #define CODE_lh8 0x2d6c6838U
33 #define CODE_lh9 0x2d6c6839U
34 #define CODE_lha 0x2d6c6861U
35 #define CODE_lhb 0x2d6c6862U
36 #define CODE_lhc 0x2d6c6863U
37 #define CODE_lhd 0x2d6c6864U
38 #define CODE_lhe 0x2d6c6865U
39 #define CODE_lhx 0x2d6c6878U
40 #define CODE_lx1 0x2d6c7831U
41 #define CODE_lz2 0x2d6c7a32U
42 #define CODE_lz3 0x2d6c7a33U
43 #define CODE_lz4 0x2d6c7a34U
44 #define CODE_lz5 0x2d6c7a35U
45 #define CODE_lz7 0x2d6c7a37U
46 #define CODE_lz8 0x2d6c7a38U
47 #define CODE_lzs 0x2d6c7a73U
48 #define CODE_pc1 0x2d706331U
49 #define CODE_pm0 0x2d706d30U
50 #define CODE_pm1 0x2d706d31U
51 #define CODE_pm2 0x2d706d32U
53 #define TIMESTAMPIDX_INVALID (-1)
54 struct timestamp_data {
55 struct de_timestamp ts; // The best timestamp of this type found so far
56 int quality;
59 struct cmpr_meth_info;
61 struct member_data {
62 u8 hlev; // header level
63 de_encoding encoding;
64 i64 member_pos;
65 i64 total_size;
66 struct cmpr_meth_info *cmi;
67 u8 is_dir;
68 u8 is_special;
69 u8 is_nonexecutable;
70 u8 is_executable;
71 i64 orig_size;
72 UI hdr_checksum_calc;
74 u8 have_hdr_crc_reported;
75 u32 hdr_crc_reported;
76 u32 hdr_crc_calc;
77 i64 hdr_crc_field_pos;
79 u32 crc16;
80 u8 os_id;
81 i64 compressed_data_pos; // relative to beginning of file
82 i64 compressed_data_len;
83 de_ucstring *dirname;
84 de_ucstring *filename;
85 de_ucstring *fullfilename;
86 struct timestamp_data tsdata[DE_TIMESTAMPIDX_COUNT];
89 typedef struct localctx_struct {
90 de_encoding input_encoding;
91 u8 hlev_of_first_member;
92 u8 lhark_fmt;
93 u8 unsupp_warned;
94 u8 lh7_success_flag;
95 u8 lh7_failed_flag;
96 int member_count;
97 struct de_crcobj *crco;
98 } lctx;
100 typedef void (*decompressor_fn)(deark *c, lctx *d, struct member_data *md,
101 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
102 struct de_dfilter_results *dres);
104 struct cmpr_meth_info {
105 u8 is_recognized;
106 u32 uniq_id;
107 decompressor_fn decompressor;
108 u8 id_raw[5];
109 char id_printable_sz[6];
110 char descr[80];
113 struct exthdr_type_info_struct;
115 typedef void (*exthdr_decoder_fn)(deark *c, lctx *d, struct member_data *md,
116 u8 id, const struct exthdr_type_info_struct *e,
117 i64 pos, i64 dlen);
119 struct exthdr_type_info_struct {
120 u8 id;
121 u8 flags;
122 const char *name;
123 exthdr_decoder_fn decoder_fn;
126 static int lha_isdigit(u8 x)
128 return (x>='0' && x<='9');
131 static int lha_isalpha(u8 x)
133 return ((x>='A' && x<='Z') || (x>='a' && x<='z'));
136 static int lha_isalnum(u8 x)
138 return (lha_isdigit(x) || lha_isalpha(x));
141 static int is_possible_cmpr_meth(const u8 m[5])
143 if(m[0]!=m[4]) return 0;
144 if(m[0]==' ' && m[1]=='L' && m[2]=='H' && lha_isdigit(m[3])) return 1;
145 if(m[0]!='-') return 0;
146 if(!lha_isalpha(m[1]) ||
147 !lha_isalnum(m[2]) ||
148 !lha_isalnum(m[3]))
150 return 0;
152 return 1;
155 static void apply_timestamp(deark *c, lctx *d, struct member_data *md,
156 int tsidx, const struct de_timestamp *ts, int quality)
158 if(!ts->is_valid) return;
159 if(tsidx<0 || tsidx>=DE_TIMESTAMPIDX_COUNT) return;
160 if(quality < md->tsdata[tsidx].quality) return;
161 md->tsdata[tsidx].ts = *ts;
162 md->tsdata[tsidx].quality = quality;
165 static void read_msdos_modtime(deark *c, lctx *d, struct member_data *md,
166 i64 pos, const char *name)
168 i64 mod_time_raw, mod_date_raw;
169 char timestamp_buf[64];
170 struct de_timestamp tmp_timestamp;
172 mod_time_raw = de_getu16le(pos);
173 mod_date_raw = de_getu16le(pos+2);
174 if(mod_time_raw==0 && mod_date_raw==0) {
175 de_dbg(c, "%s: (not set)", name);
176 return;
178 de_dos_datetime_to_timestamp(&tmp_timestamp, mod_date_raw, mod_time_raw);
179 tmp_timestamp.tzcode = DE_TZCODE_LOCAL;
180 de_timestamp_to_string(&tmp_timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
181 de_dbg(c, "%s: %s", name, timestamp_buf);
182 apply_timestamp(c, d, md, DE_TIMESTAMPIDX_MODIFY, &tmp_timestamp, 10);
185 static void read_windows_FILETIME(deark *c, lctx *d, struct member_data *md,
186 i64 pos, int tsidx, const char *name)
188 i64 t_FILETIME;
189 char timestamp_buf[64];
190 struct de_timestamp tmp_timestamp;
192 t_FILETIME = de_geti64le(pos);
193 de_FILETIME_to_timestamp(t_FILETIME, &tmp_timestamp, 0x1);
194 if(t_FILETIME<=0) tmp_timestamp.is_valid = 0;
195 de_timestamp_to_string(&tmp_timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
196 de_dbg(c, "%s: %"I64_FMT" (%s)", name, t_FILETIME, timestamp_buf);
197 apply_timestamp(c, d, md, tsidx, &tmp_timestamp, 90);
200 static void read_unix_timestamp(deark *c, lctx *d, struct member_data *md,
201 i64 pos, int tsidx, const char *name)
203 i64 t;
204 char timestamp_buf[64];
205 struct de_timestamp tmp_timestamp;
207 t = de_geti32le(pos);
208 de_unix_time_to_timestamp(t, &tmp_timestamp, 0x1);
209 de_timestamp_to_string(&tmp_timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
210 de_dbg(c, "%s: %d (%s)", name, (int)t, timestamp_buf);
211 apply_timestamp(c, d, md, tsidx, &tmp_timestamp, 50);
214 static void rp_add_component(deark *c, lctx *d, struct member_data *md,
215 dbuf *f, i64 pos, i64 len, struct de_strarray *sa, de_ucstring *tmpstr)
217 if(len<1) return;
218 ucstring_empty(tmpstr);
219 dbuf_read_to_ucstring(f, pos, len, tmpstr, 0, md->encoding);
220 de_strarray_push(sa, tmpstr);
223 static void read_path_to_strarray(deark *c, lctx *d, struct member_data *md,
224 dbuf *inf, i64 pos, i64 len, struct de_strarray *sa, int is_exthdr_dirname)
226 dbuf *tmpdbuf = NULL;
227 de_ucstring *tmpstr = NULL;
228 i64 component_startpos;
229 i64 component_len;
230 i64 i;
232 tmpstr = ucstring_create(c);
234 tmpdbuf = dbuf_create_membuf(c, len, 0);
235 dbuf_copy(inf, pos, len, tmpdbuf);
237 component_startpos = 0;
238 component_len = 0;
240 for(i=0; i<len; i++) {
241 u8 ch;
243 ch = dbuf_getbyte(tmpdbuf, i);
244 if(ch==0x00) break; // Tolerate NUL termination
245 if((is_exthdr_dirname && ch==0xff) ||
246 (!is_exthdr_dirname && (ch=='\\' || ch=='/')))
248 component_len = i - component_startpos;
249 rp_add_component(c, d, md, tmpdbuf, component_startpos, component_len, sa, tmpstr);
250 component_startpos = i+1;
251 component_len = 0;
253 else {
254 component_len++;
257 rp_add_component(c, d, md, tmpdbuf, component_startpos, component_len, sa, tmpstr);
259 dbuf_close(tmpdbuf);
260 ucstring_destroy(tmpstr);
263 static void read_filename_hlev0(deark *c, lctx *d, struct member_data *md,
264 i64 pos, i64 len)
266 struct de_strarray *sa = NULL;
268 if(md->filename) {
269 ucstring_empty(md->filename);
271 else {
272 md->filename = ucstring_create(c);
275 sa = de_strarray_create(c, MAX_SUBDIR_LEVEL+2);
276 read_path_to_strarray(c, d, md, c->infile, pos, len, sa, 0);
278 de_strarray_make_path(sa, md->filename, DE_MPFLAG_NOTRAILINGSLASH);
279 de_dbg(c, "filename (parsed): \"%s\"", ucstring_getpsz_d(md->filename));
281 de_strarray_destroy(sa);
284 static void read_filename_hlev1_or_exthdr(deark *c, lctx *d, struct member_data *md,
285 i64 pos, i64 len)
287 i64 i;
289 if(md->filename) {
290 ucstring_empty(md->filename);
292 else {
293 md->filename = ucstring_create(c);
296 // Some files seem to assume NUL termination is allowed.
297 dbuf_read_to_ucstring(c->infile, pos, len,
298 md->filename, DE_CONVFLAG_STOP_AT_NUL, md->encoding);
299 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->filename));
301 // I don't think slashes are allowed
302 for(i=0; i<md->filename->len; i++) {
303 if(md->filename->str[i]=='/') {
304 md->filename->str[i]='_';
309 static void exthdr_common(deark *c, lctx *d, struct member_data *md,
310 u8 id, const struct exthdr_type_info_struct *e,
311 i64 pos, i64 dlen)
313 if(dlen<2) return;
314 md->hdr_crc_reported = (u32)de_getu16le(pos);
315 md->have_hdr_crc_reported = 1;
316 md->hdr_crc_field_pos = pos;
317 de_dbg(c, "header crc (reported): 0x%04x", (unsigned int)md->hdr_crc_reported);
318 // TODO: Additional information
321 static void exthdr_filename(deark *c, lctx *d, struct member_data *md,
322 u8 id, const struct exthdr_type_info_struct *e,
323 i64 pos, i64 dlen)
325 read_filename_hlev1_or_exthdr(c, d, md, pos, dlen);
328 static void exthdr_dirname(deark *c, lctx *d, struct member_data *md,
329 u8 id, const struct exthdr_type_info_struct *e,
330 i64 pos, i64 dlen)
332 struct de_strarray *dirname_sa = NULL;
334 if(md->dirname) {
335 ucstring_empty(md->dirname);
337 else {
338 md->dirname = ucstring_create(c);
341 dirname_sa = de_strarray_create(c, MAX_SUBDIR_LEVEL+2);
342 // 0xff is used as the path separator. Don't know what happens if a directory
343 // name contains an actual 0xff byte.
344 read_path_to_strarray(c, d, md, c->infile, pos, dlen, dirname_sa, 1);
345 de_strarray_make_path(dirname_sa, md->dirname, DE_MPFLAG_NOTRAILINGSLASH);
346 de_dbg(c, "%s (parsed): \"%s\"", e->name, ucstring_getpsz_d(md->dirname));
348 de_strarray_destroy(dirname_sa);
351 static void exthdr_msdosattribs(deark *c, lctx *d, struct member_data *md,
352 u8 id, const struct exthdr_type_info_struct *e,
353 i64 pos, i64 dlen)
355 u32 attribs;
356 de_ucstring *descr = NULL;
358 if(dlen<2) goto done;
359 attribs = (u32)de_getu16le(pos);
360 descr = ucstring_create(c);
361 de_describe_dos_attribs(c, (UI)attribs, descr, 0);
362 de_dbg(c, "%s: 0x%04x (%s)", e->name, (UI)attribs, ucstring_getpsz_d(descr));
363 done:
364 ucstring_destroy(descr);
367 static void exthdr_filesize(deark *c, lctx *d, struct member_data *md,
368 u8 id, const struct exthdr_type_info_struct *e,
369 i64 pos, i64 dlen)
371 // TODO: Support this
372 de_warn(c, "Unsupported \"file size\" extended header found. This may prevent "
373 "the rest of the file from being processed correctly.");
376 static void exthdr_windowstimestamp(deark *c, lctx *d, struct member_data *md,
377 u8 id, const struct exthdr_type_info_struct *e,
378 i64 pos, i64 dlen)
380 if(dlen<24) return;
381 read_windows_FILETIME(c, d, md, pos, DE_TIMESTAMPIDX_CREATE, "create time");
382 read_windows_FILETIME(c, d, md, pos+8, DE_TIMESTAMPIDX_MODIFY, "mod time ");
383 read_windows_FILETIME(c, d, md, pos+16, DE_TIMESTAMPIDX_ACCESS, "access time");
386 static void interpret_unix_perms(deark *c, lctx *d, struct member_data *md, unsigned int mode)
388 if(mode & 0100000) { // regular file
389 if(mode & 0111) { // executable
390 md->is_executable = 1;
392 else {
393 md->is_nonexecutable = 1;
397 if((mode & 0170000) == 0120000) {
398 md->is_special = 1; // symlink
402 static void exthdr_unixperms(deark *c, lctx *d, struct member_data *md,
403 u8 id, const struct exthdr_type_info_struct *e,
404 i64 pos, i64 dlen)
406 unsigned int mode;
408 if(dlen<2) return;
409 mode = (unsigned int)de_getu16le(pos);
410 de_dbg(c, "mode: octal(%06o)", mode);
411 interpret_unix_perms(c, d, md, mode);
414 static void exthdr_unixuidgid(deark *c, lctx *d, struct member_data *md,
415 u8 id, const struct exthdr_type_info_struct *e,
416 i64 pos, i64 dlen)
418 i64 uid, gid;
419 if(dlen<4) return;
421 // It's strange that the GID comes first, while the UID comes first in the
422 // level-0 "extended area".
423 gid = de_getu16le(pos);
424 de_dbg(c, "gid: %d", (int)gid);
425 uid = de_getu16le(pos+2);
426 de_dbg(c, "uid: %d", (int)uid);
429 static void exthdr_unixtimestamp(deark *c, lctx *d, struct member_data *md,
430 u8 id, const struct exthdr_type_info_struct *e,
431 i64 pos, i64 dlen)
433 if(dlen<4) return;
434 read_unix_timestamp(c, d, md, pos, DE_TIMESTAMPIDX_MODIFY, "last-modified");
437 static void exthdr_lev3newattribs2(deark *c, lctx *d, struct member_data *md,
438 u8 id, const struct exthdr_type_info_struct *e,
439 i64 pos, i64 dlen)
441 if(dlen<20) return;
443 // TODO: Permission
444 // TODO: GID/UID
446 // [Documented as "creation time", but this is a Unix-style header, so I
447 // wonder if someone mistranslated "ctime" (=change time).]
448 read_unix_timestamp(c, d, md, pos+12, TIMESTAMPIDX_INVALID, "create(?) time");
450 read_unix_timestamp(c, d, md, pos+16, DE_TIMESTAMPIDX_ACCESS, "access time ");
453 static void exthdr_codepage(deark *c, lctx *d, struct member_data *md,
454 u8 id, const struct exthdr_type_info_struct *e,
455 i64 pos, i64 dlen)
457 int n_codepage;
458 de_encoding n_encoding;
459 char descr[100];
461 if(dlen!=4) return;
462 n_codepage = (int)de_geti32le(pos);
463 n_encoding = de_windows_codepage_to_encoding(c, n_codepage, descr, sizeof(descr), 0);
464 de_dbg(c, "codepage: %d (%s)", n_codepage, descr);
465 if(n_encoding != DE_ENCODING_UNKNOWN) {
466 md->encoding = n_encoding;
470 static const struct exthdr_type_info_struct exthdr_type_info_arr[] = {
471 { 0x00, 0, "common", exthdr_common },
472 { 0x01, 0, "filename", exthdr_filename },
473 { 0x02, 0, "dir name", exthdr_dirname },
474 { 0x39, 0, "multi-disc", NULL },
475 { 0x3f, 0, "comment", NULL },
476 { 0x40, 0, "MS-DOS file attribs", exthdr_msdosattribs },
477 { 0x41, 0, "Windows timestamp", exthdr_windowstimestamp },
478 { 0x42, 0, "MS-DOS file size", exthdr_filesize },
479 { 0x43, 0, "time zone", NULL },
480 { 0x44, 0, "UTF-16 filename", NULL },
481 { 0x45, 0, "UTF-16 dir name", NULL },
482 { 0x46, 0, "codepage", exthdr_codepage },
483 { 0x50, 0, "Unix perms", exthdr_unixperms },
484 { 0x51, 0, "Unix UID/GID", exthdr_unixuidgid },
485 { 0x52, 0, "Unix group name", NULL },
486 { 0x53, 0, "Unix username", NULL },
487 { 0x54, 0, "Unix timestamp", exthdr_unixtimestamp },
488 { 0x7d, 0, "capsule", NULL },
489 { 0x7e, 0, "OS/2 extended attribs", NULL },
490 { 0x7f, 0, "level 3 new attribs type-1", NULL }, // (OS/2 only)
491 { 0xff, 0, "level 3 new attribs type-2", exthdr_lev3newattribs2 }
494 static void destroy_member_data(deark *c, struct member_data *md)
496 if(!md) return;
497 ucstring_destroy(md->dirname);
498 ucstring_destroy(md->filename);
499 ucstring_destroy(md->fullfilename);
500 de_free(c, md->cmi);
501 de_free(c, md);
504 static const struct exthdr_type_info_struct *get_exthdr_type_info(u8 id)
506 size_t i;
508 for(i=0; i<DE_ARRAYCOUNT(exthdr_type_info_arr); i++) {
509 if(id == exthdr_type_info_arr[i].id) {
510 return &exthdr_type_info_arr[i];
513 return NULL;
516 static void do_read_ext_header(deark *c, lctx *d, struct member_data *md,
517 i64 pos1, i64 len, i64 dlen)
519 u8 id = 0;
520 const char *name;
521 const struct exthdr_type_info_struct *e = NULL;
523 if(dlen>=1) {
524 id = de_getbyte(pos1);
525 e = get_exthdr_type_info(id);
527 name = e ? e->name : "?";
529 de_dbg(c, "ext header at %d, len=%d (1+%d+%d), id=0x%02x (%s)", (int)pos1, (int)len,
530 (int)(dlen-1), (int)(len-dlen), (unsigned int)id, name);
532 if(dlen<1) return; // Invalid header, too short to even have an id field
534 if(e && e->decoder_fn) {
535 de_dbg_indent(c, 1);
536 e->decoder_fn(c, d, md, id, e, pos1+1, dlen-1);
537 de_dbg_indent(c, -1);
539 else {
540 if(c->debug_level>=2) {
541 de_dbg_hexdump(c, c->infile, pos1+1, dlen-1, 256, NULL, 0x1);
546 static const char *get_os_name(u8 id)
548 const char *name = NULL;
549 switch(id) {
550 case ' ': name="unspecified"; break;
551 case '2': name="OS/2"; break;
552 case '3': name="OS/386?"; break;
553 case '9': name="OS-9"; break;
554 case 'A': name="Amiga"; break;
555 case 'C': name="CP/M"; break;
556 case 'F': name="FLEX"; break;
557 case 'H': name="Human68K"; break;
558 case 'J': name="JVM"; break;
559 case 'K': name="OS-9/68K"; break;
560 case 'M': name="DOS"; break;
561 case 'R': name="RUNser"; break;
562 case 'T': name="TownsOS"; break;
563 case 'U': name="Unix"; break;
564 case 'W': name="Windows NT"; break;
565 case 'a': name="Atari ST?"; break;
566 case 'm': name="Macintosh"; break;
567 case 'w': name="Windows"; break;
569 return name?name:"?";
572 static void do_lev0_ext_area(deark *c, lctx *d, struct member_data *md,
573 i64 pos1, i64 len)
575 if(len<1) return;
576 md->os_id = de_getbyte(pos1);
577 de_dbg(c, "OS id: %d ('%c') (%s)", (int)md->os_id,
578 de_byte_to_printable_char(md->os_id), get_os_name(md->os_id));
580 // TODO: Finish this
581 if(md->os_id=='U') {
582 unsigned int mode;
583 i64 uid, gid;
585 if(len<12) goto done;
587 read_unix_timestamp(c, d, md, pos1+2, DE_TIMESTAMPIDX_MODIFY, "last-modified");
589 mode = (unsigned int)de_getu16le(pos1+6);
590 de_dbg(c, "mode: octal(%06o)", mode);
591 interpret_unix_perms(c, d, md, mode);
593 uid = de_getu16le(pos1+8);
594 de_dbg(c, "uid: %d", (int)uid);
595 gid = de_getu16le(pos1+10);
596 de_dbg(c, "gid: %d", (int)gid);
599 done: ;
602 // AFAICT, we're expected to think of the extended headers as a kind of linked
603 // list. The last field in each node is the "size of next node" (instead of
604 // "pointer to next node", as a real linked list would have). A size of 0 is
605 // like a "nil" pointer, and marks the end of the list.
606 // The "size of the first node" field (analogous to the "head" pointer) is
607 // conceptually not part of the extended headers section.
609 // Note that if we simply shift our frame of reference, this format is identical
610 // to a more typical length-prefixed format. But our code follows the
611 // linked-list model, to make it more consistent with most LHA documentation,
612 // and the various "size" fields.
614 // A return value of 0 means we failed to calculate the size of the
615 // extended headers segment.
616 static int do_read_ext_headers(deark *c, lctx *d, struct member_data *md,
617 i64 pos1, i64 len, i64 first_ext_hdr_size, i64 *tot_bytes_consumed)
619 i64 pos = pos1;
620 i64 this_ext_hdr_size, next_ext_hdr_size;
621 int retval = 0;
622 i64 size_of_size_field;
624 *tot_bytes_consumed = 0;
626 if(first_ext_hdr_size==0) {
627 return 1;
630 de_dbg(c, "ext headers section at %d", (int)pos);
631 de_dbg_indent(c, 1);
633 size_of_size_field = (md->hlev==3) ? 4 : 2;
635 next_ext_hdr_size = first_ext_hdr_size;
636 while(1) {
637 this_ext_hdr_size = next_ext_hdr_size;
638 if(this_ext_hdr_size==0) {
639 retval = 1;
640 *tot_bytes_consumed = pos - pos1;
641 goto done;
643 if(this_ext_hdr_size<size_of_size_field) goto done;
644 if(pos+this_ext_hdr_size > pos1+len) goto done;
646 do_read_ext_header(c, d, md, pos, this_ext_hdr_size, this_ext_hdr_size-size_of_size_field);
648 // Each ext header ends with a "size of next header" field.
649 // We'll read it at this level, instead of in do_read_ext_header().
650 pos += this_ext_hdr_size-size_of_size_field;
651 if(size_of_size_field==2) {
652 next_ext_hdr_size = de_getu16le(pos);
654 else {
655 next_ext_hdr_size = de_getu32le(pos);
657 pos += size_of_size_field;
660 done:
661 if(retval) {
662 de_dbg(c, "size of ext headers section: %d", (int)*tot_bytes_consumed);
664 else {
665 de_dbg(c, "failed to parse all extended headers");
667 de_dbg_indent(c, -1);
668 return retval;
671 static void make_fullfilename(deark *c, lctx *d, struct member_data *md)
673 if(md->fullfilename) return;
675 if(!md->filename) {
676 md->filename = ucstring_create(c);
678 md->fullfilename = ucstring_create(c);
680 if(md->hlev==0) {
681 ucstring_append_ucstring(md->fullfilename, md->filename);
683 else {
684 if(md->is_dir) {
685 ucstring_append_ucstring(md->fullfilename, md->dirname);
687 else {
688 if(ucstring_isnonempty(md->dirname)) {
689 ucstring_append_ucstring(md->fullfilename, md->dirname);
690 ucstring_append_sz(md->fullfilename, "/", DE_ENCODING_LATIN1);
692 if(ucstring_isnonempty(md->filename)) {
693 ucstring_append_ucstring(md->fullfilename, md->filename);
695 else {
696 ucstring_append_char(md->fullfilename, '_');
702 static void decompress_uncompressed(deark *c, lctx *d, struct member_data *md,
703 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
704 struct de_dfilter_results *dres)
706 fmtutil_decompress_uncompressed(c, dcmpri, dcmpro, dres, 0);
709 static void decompress_lhark_lh7(deark *c, lctx *d, struct member_data *md,
710 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
711 struct de_dfilter_results *dres)
713 struct de_lzh_params lzhparams;
715 de_zeromem(&lzhparams, sizeof(struct de_lzh_params));
716 lzhparams.fmt = DE_LZH_FMT_LHARK;
717 lzhparams.zero_codes_block_behavior = DE_LZH_ZCB_65536;
718 lzhparams.warn_about_zero_codes_block = 1;
719 fmtutil_decompress_lzh(c, dcmpri, dcmpro, dres, &lzhparams);
722 // Compression method will be selected based on id_raw[3], which
723 // should be '4'...'8'.
724 static void decompress_lh5x(deark *c, lctx *d, struct member_data *md,
725 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
726 struct de_dfilter_results *dres)
728 struct de_lzh_params lzhparams;
730 if(d->lhark_fmt && md->cmi->id_raw[3]=='7') {
731 decompress_lhark_lh7(c, d, md, dcmpri, dcmpro, dres);
732 return;
735 de_zeromem(&lzhparams, sizeof(struct de_lzh_params));
736 lzhparams.fmt = DE_LZH_FMT_LH5LIKE;
737 lzhparams.subfmt = md->cmi->id_raw[3];
738 lzhparams.zero_codes_block_behavior = DE_LZH_ZCB_65536;
739 lzhparams.warn_about_zero_codes_block = 1;
740 fmtutil_decompress_lzh(c, dcmpri, dcmpro, dres, &lzhparams);
743 static void decompress_lh5(deark *c, lctx *d, struct member_data *md,
744 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
745 struct de_dfilter_results *dres)
747 struct de_lzh_params lzhparams;
749 de_zeromem(&lzhparams, sizeof(struct de_lzh_params));
750 lzhparams.fmt = DE_LZH_FMT_LH5LIKE;
751 lzhparams.subfmt = '5';
752 lzhparams.zero_codes_block_behavior = DE_LZH_ZCB_65536;
753 lzhparams.warn_about_zero_codes_block = 1;
754 fmtutil_decompress_lzh(c, dcmpri, dcmpro, dres, &lzhparams);
757 static void decompress_lz5(deark *c, lctx *d, struct member_data *md,
758 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
759 struct de_dfilter_results *dres)
761 fmtutil_decompress_szdd(c, dcmpri, dcmpro, dres, 0x1);
764 struct cmpr_meth_array_item {
765 unsigned int flags;
766 u32 uniq_id;
767 const char *descr;
768 decompressor_fn decompressor;
771 // Compression methods with a decompressor or a description are usually
772 // listed here, but note that it is also possible for get_cmpr_meth_info()
773 // to handle them procedurally.
774 static const struct cmpr_meth_array_item cmpr_meth_arr[] = {
775 { 0x00, CODE_lhd, "directory", NULL },
776 { 0x00, CODE_lh0, "uncompressed", decompress_uncompressed },
777 { 0x00, CODE_lh1, "LZ77-4K, adaptive Huffman", NULL },
778 { 0x00, CODE_lh4, NULL, decompress_lh5x },
779 { 0x00, CODE_lh5, "LZ77-8K, static Huffman", decompress_lh5 },
780 { 0x00, CODE_lh6, "LZ77-32K, static Huffman", decompress_lh5x },
781 { 0x00, CODE_lh7, NULL, decompress_lh5x },
782 { 0x00, CODE_lh8, NULL, decompress_lh5x },
783 { 0x00, CODE_lz4, "uncompressed (LArc)", decompress_uncompressed },
784 { 0x00, CODE_lz5, "LZSS-4K (LArc)", decompress_lz5 },
785 { 0x00, CODE_pm0, "uncompressed (PMArc)", decompress_uncompressed },
786 { 0x00, CODE_lZ0, "uncompressed (MicroFox PUT)", decompress_uncompressed },
787 { 0x00, CODE_lZ1, "MicroFox PUT lZ1", NULL },
788 { 0x00, CODE_lZ5, "MicroFox PUT lZ5", decompress_lh5 },
789 { 0x00, CODE_S_LH0, "uncompressed (SAR)", decompress_uncompressed },
790 { 0x00, CODE_S_LH5, "SAR LH5", decompress_lh5 }
793 static const u32 other_known_cmpr_methods[] = {
794 CODE_ah0, CODE_ari, CODE_hf0,
795 CODE_lh2, CODE_lh3, CODE_lh4, CODE_lh7, CODE_lh8, CODE_lh9,
796 CODE_lha, CODE_lhb, CODE_lhc, CODE_lhe, CODE_lhx, CODE_lx1,
797 CODE_lz2, CODE_lz3, CODE_lz7, CODE_lz8, CODE_lzs,
798 CODE_pc1, CODE_pm1, CODE_pm2 };
800 // Only call this after is_possible_cmpr_meth() return nonzero.
801 // Caller allocates cmi, and initializes to zeroes.
802 static void get_cmpr_meth_info(const u8 idbuf[5], struct cmpr_meth_info *cmi)
804 size_t k;
805 const struct cmpr_meth_array_item *cmai = NULL;
807 // The first 4 bytes are unique for all known methods.
808 cmi->uniq_id = (u32)de_getu32be_direct(idbuf);
810 de_memcpy(cmi->id_raw, idbuf, 5);
812 // All "possible" methods only use printable characters.
813 de_memcpy(cmi->id_printable_sz, idbuf, 5);
814 cmi->id_printable_sz[5] = '\0';
816 for(k=0; k<DE_ARRAYCOUNT(cmpr_meth_arr); k++) {
817 if(cmpr_meth_arr[k].uniq_id == cmi->uniq_id) {
818 cmai = &cmpr_meth_arr[k];
819 break;
823 if(cmai) {
824 cmi->is_recognized = 1;
825 cmi->decompressor = cmai->decompressor;
827 else {
828 for(k=0; k<DE_ARRAYCOUNT(other_known_cmpr_methods); k++) {
829 if(other_known_cmpr_methods[k] == cmi->uniq_id) {
830 cmi->is_recognized = 1;
831 break;
836 if(cmai && cmai->descr) {
837 de_strlcpy(cmi->descr, cmai->descr, sizeof(cmi->descr));
839 else if(cmi->is_recognized) {
840 de_strlcpy(cmi->descr, "recognized, but no info avail.", sizeof(cmi->descr));
842 else {
843 de_strlcpy(cmi->descr, "?", sizeof(cmi->descr));
847 static void our_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
849 struct de_crcobj *crco = (struct de_crcobj*)userdata;
850 de_crcobj_addbuf(crco, buf, buf_len);
853 static void do_extract_file(deark *c, lctx *d, struct member_data *md)
855 de_finfo *fi = NULL;
856 dbuf *outf = NULL;
857 u32 crc_calc;
858 int tsidx;
859 u8 dcmpr_attempted = 0;
860 u8 dcmpr_ok = 0;
861 struct de_dfilter_in_params dcmpri;
862 struct de_dfilter_out_params dcmpro;
863 struct de_dfilter_results dres;
865 if(!md->cmi) goto done;
866 if(md->is_special) {
867 de_dbg(c, "[not extracting special file]");
868 goto done;
870 else if(md->is_dir) {
873 else if(!(md->cmi->decompressor)) {
874 if(!d->unsupp_warned) {
875 de_info(c, "Note: LHA support is incomplete. Some common "
876 "compression methods are not supported.");
877 d->unsupp_warned = 1;
879 de_err(c, "%s: Unsupported compression method '%s'",
880 ucstring_getpsz_d(md->fullfilename), md->cmi->id_printable_sz);
881 goto done;
884 fi = de_finfo_create(c);
886 for(tsidx=0; tsidx<DE_TIMESTAMPIDX_COUNT; tsidx++) {
887 if(md->tsdata[tsidx].ts.is_valid) {
888 fi->timestamp[tsidx] = md->tsdata[tsidx].ts;
892 if(md->is_dir) {
893 fi->is_directory = 1;
895 else if(md->is_executable) {
896 fi->mode_flags |= DE_MODEFLAG_EXE;
898 else if(md->is_nonexecutable) {
899 fi->mode_flags |= DE_MODEFLAG_NONEXE;
902 de_finfo_set_name_from_ucstring(c, fi, md->fullfilename, DE_SNFLAG_FULLPATH);
903 fi->original_filename_flag = 1;
905 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
906 de_crcobj_reset(d->crco);
907 dbuf_set_writelistener(outf, our_writelistener_cb, (void*)d->crco);
909 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
910 dcmpri.f = c->infile;
911 dcmpri.pos = md->compressed_data_pos;
912 dcmpri.len = md->compressed_data_len;
913 dcmpro.f = outf;
914 dcmpro.expected_len = md->orig_size;
915 dcmpro.len_known = 1;
917 if(md->is_dir) goto done; // For directories, we're done.
919 dcmpr_attempted = 1;
920 if(md->cmi->decompressor) {
921 md->cmi->decompressor(c, d, md, &dcmpri, &dcmpro, &dres);
924 if(dres.errcode) {
925 de_err(c, "%s: Decompression failed: %s", ucstring_getpsz_d(md->fullfilename),
926 de_dfilter_get_errmsg(c, &dres));
927 goto done;
930 crc_calc = de_crcobj_getval(d->crco);
931 de_dbg(c, "crc (calculated): 0x%04x", (unsigned int)crc_calc);
932 if(crc_calc != md->crc16) {
933 de_err(c, "%s: CRC check failed", ucstring_getpsz_d(md->fullfilename));
934 goto done;
937 dcmpr_ok = 1;
939 done:
940 if(dcmpr_attempted && md->cmi && md->cmi->uniq_id==CODE_lh7) {
941 if(dcmpr_ok)
942 d->lh7_success_flag = 1;
943 else
944 d->lh7_failed_flag = 1;
946 dbuf_close(outf);
947 de_finfo_destroy(c, fi);
950 static int cksum_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf, i64 buf_len)
952 UI *pcksum = (UI*)brctx->userdata;
953 i64 i;
955 for(i=0; i<buf_len; i++) {
956 *pcksum = (*pcksum + buf[i]) & 0xff;
958 return 1;
961 static void do_check_header_crc(deark *c, lctx *d, struct member_data *md)
963 // LHA members don't have to have a header CRC field, though it's probably
964 // considered best practice to have one when the checksum field doesn't
965 // exist, or there are any extended headers.
966 if(!md->have_hdr_crc_reported) return;
967 de_crcobj_reset(d->crco);
969 // Everything before the CRC field:
970 de_crcobj_addslice(d->crco, c->infile, md->member_pos,
971 md->hdr_crc_field_pos - md->member_pos);
973 // The zeroed-out CRC field:
974 de_crcobj_addzeroes(d->crco, 2);
976 // Everything after the CRC field:
977 de_crcobj_addslice(d->crco, c->infile, md->hdr_crc_field_pos+2,
978 md->compressed_data_pos - (md->hdr_crc_field_pos+2));
980 md->hdr_crc_calc = de_crcobj_getval(d->crco);
981 de_dbg(c, "header crc (calculated): 0x%04x", (UI)md->hdr_crc_calc);
982 if(md->hdr_crc_calc != md->hdr_crc_reported) {
983 de_err(c, "Wrong header CRC: reported=0x%04x, calculated=0x%04x",
984 (UI)md->hdr_crc_reported, (UI)md->hdr_crc_calc);
988 enum lha_whats_next_enum {
989 LHA_WN_MEMBER,
990 LHA_WN_TRAILER,
991 LHA_WN_TRAILER_AND_JUNK,
992 LHA_WN_JUNK,
993 LHA_WN_NOTHING
996 static enum lha_whats_next_enum lha_classify_whats_next(deark *c, lctx *d, i64 pos, i64 len)
998 u8 b[21];
1000 if(len<=0) return LHA_WN_NOTHING;
1001 b[0] = de_getbyte(pos);
1002 if(b[0]==0 && len<=2) return LHA_WN_TRAILER;
1003 if(b[0]==0 && len<21) return LHA_WN_TRAILER_AND_JUNK;
1004 de_read(&b[1], pos+1, sizeof(b)-1);
1005 if(b[0]==0 && b[1]==0) return LHA_WN_TRAILER_AND_JUNK;
1006 if(b[0]==0 && b[20]!=2) return LHA_WN_TRAILER_AND_JUNK;
1007 if(b[20]>3) return LHA_WN_JUNK;
1008 if(is_possible_cmpr_meth(&b[2])) return LHA_WN_MEMBER;
1009 return LHA_WN_JUNK;
1012 // This single function parses all the different header formats, using lots of
1013 // "if" statements. It is messy, but it's a no-win situation.
1014 // The alternative of four separate functions would be have a lot of redundant
1015 // code, and be harder to maintain.
1017 // Caller allocates and initializes md.
1018 // If the member was successfully parsed, sets md->total_size and returns nonzero.
1019 static int do_read_member(deark *c, lctx *d, struct member_data *md)
1021 int retval = 0;
1022 i64 lev0_header_size = 0;
1023 i64 lev1_base_header_size = 0;
1024 i64 lev1_skip_size = 0;
1025 i64 lev2_total_header_size = 0;
1026 i64 lev3_header_size = 0;
1027 i64 pos1 = md->member_pos;
1028 i64 pos = pos1;
1029 i64 nbytes_avail;
1030 i64 exthdr_bytes_consumed = 0;
1031 i64 fnlen = 0;
1032 UI attribs;
1033 UI hdr_checksum_reported = 0;
1034 u8 has_hdr_checksum = 0;
1035 int is_compressed;
1036 int ret;
1037 enum lha_whats_next_enum wn;
1038 u8 cmpr_meth_raw[5];
1039 int saved_indent_level;
1041 de_dbg_indent_save(c, &saved_indent_level);
1043 nbytes_avail = c->infile->len - pos1;
1044 wn = lha_classify_whats_next(c, d, pos1, nbytes_avail);
1045 if(wn!=LHA_WN_MEMBER) {
1046 if(d->member_count==0) {
1047 de_err(c, "Not an LHA file");
1049 else if(wn==LHA_WN_TRAILER || wn==LHA_WN_TRAILER_AND_JUNK) {
1050 de_dbg(c, "trailer at %"I64_FMT, pos1);
1051 if(wn==LHA_WN_TRAILER_AND_JUNK) {
1052 de_info(c, "Note: %"I64_FMT" extra bytes at end of file (offset %"I64_FMT")",
1053 nbytes_avail-1, pos1+1);
1056 else if(wn==LHA_WN_JUNK) {
1057 de_warn(c, "%"I64_FMT" bytes of non-LHA data found at end of file (offset %"I64_FMT")",
1058 nbytes_avail, pos1);
1060 goto done;
1063 de_dbg(c, "member at %"I64_FMT, pos1);
1064 de_dbg_indent(c, 1);
1066 // Look ahead to figure out the header format version.
1067 // This byte was originally the high byte of the "MS-DOS file attribute" field,
1068 // which happened to always be zero.
1069 // In later LHA versions, it is overloaded to identify the header format
1070 // version (called "header level" in LHA jargon).
1071 md->hlev = de_getbyte(pos1+20);
1072 de_dbg(c, "header level: %d", (int)md->hlev);
1073 if(md->hlev>3) {
1074 goto done; // Shouldn't be possible; checked in lha_classify_whats_next().
1077 if(d->member_count==0) {
1078 d->hlev_of_first_member = md->hlev;
1081 if(md->hlev==0) {
1082 lev0_header_size = (i64)de_getbyte_p(&pos);
1083 de_dbg(c, "header size: (2+)%d", (int)lev0_header_size);
1084 hdr_checksum_reported = (UI)de_getbyte_p(&pos);
1085 has_hdr_checksum = 1;
1086 dbuf_buffered_read(c->infile, pos, lev0_header_size, cksum_cbfn, (void*)&md->hdr_checksum_calc);
1088 else if(md->hlev==1) {
1089 lev1_base_header_size = (i64)de_getbyte_p(&pos);
1090 de_dbg(c, "base header size: %d", (int)lev1_base_header_size);
1091 hdr_checksum_reported = (UI)de_getbyte_p(&pos);
1092 has_hdr_checksum = 1;
1093 dbuf_buffered_read(c->infile, pos, lev1_base_header_size, cksum_cbfn, (void*)&md->hdr_checksum_calc);
1095 else if(md->hlev==2) {
1096 lev2_total_header_size = de_getu16le_p(&pos);
1097 de_dbg(c, "total header size: %d", (int)lev2_total_header_size);
1099 else if(md->hlev==3) {
1100 i64 lev3_word_size;
1101 lev3_word_size = de_getu16le_p(&pos);
1102 de_dbg(c, "word size: %d", (int)lev3_word_size);
1103 if(lev3_word_size!=4) {
1104 de_err(c, "Unsupported word size: %d", (int)lev3_word_size);
1105 goto done;
1109 if(has_hdr_checksum) {
1110 de_dbg(c, "header checksum (reported): 0x%02x", hdr_checksum_reported);
1111 de_dbg(c, "header checksum (calculated): 0x%02x", md->hdr_checksum_calc);
1112 if(md->hdr_checksum_calc != hdr_checksum_reported) {
1113 de_err(c, "Wrong header checksum: reported=0x%02x, calculated=0x%02x",
1114 hdr_checksum_reported, md->hdr_checksum_calc);
1118 de_read(cmpr_meth_raw, pos, 5);
1119 md->cmi = de_malloc(c, sizeof(struct cmpr_meth_info));
1120 get_cmpr_meth_info(cmpr_meth_raw, md->cmi);
1121 de_dbg(c, "cmpr method: '%s' (%s)", md->cmi->id_printable_sz, md->cmi->descr);
1122 pos+=5;
1124 if(md->cmi->uniq_id == CODE_lhd) {
1125 is_compressed = 0;
1126 md->is_dir = 1;
1128 else if(md->cmi->decompressor == decompress_uncompressed) {
1129 is_compressed = 0;
1131 else {
1132 is_compressed = 1;
1135 if(md->hlev==1) {
1136 // lev1_skip_size is the distance from the third byte of the extended
1137 // header section, to the end of the compressed data.
1138 lev1_skip_size = de_getu32le_p(&pos);
1139 de_dbg(c, "skip size: %u", (unsigned int)lev1_skip_size);
1140 md->total_size = 2 + lev1_base_header_size + lev1_skip_size;
1142 else {
1143 md->compressed_data_len = de_getu32le(pos);
1144 de_dbg(c, "compressed size: %"I64_FMT, md->compressed_data_len);
1145 pos += 4;
1147 if(md->hlev==0) {
1148 md->total_size = 2 + lev0_header_size + md->compressed_data_len;
1150 else if(md->hlev==2) {
1151 md->total_size = lev2_total_header_size + md->compressed_data_len;
1155 md->orig_size = de_getu32le(pos);
1156 de_dbg(c, "original size: %u", (unsigned int)md->orig_size);
1157 pos += 4;
1159 if(md->hlev==0 || md->hlev==1) {
1160 read_msdos_modtime(c, d, md, pos, "last-modified");
1161 pos += 4; // modification time/date (MS-DOS)
1163 else if(md->hlev==2 || md->hlev==3) {
1164 read_unix_timestamp(c, d, md, pos, DE_TIMESTAMPIDX_MODIFY, "last-modified");
1165 pos += 4; // Unix time
1168 attribs = (UI)de_getbyte_p(&pos);
1169 if(md->hlev==0) {
1170 de_ucstring *attr_descr;
1172 // This is a 2-byte field, but the high byte must be 0 here because it's
1173 // also the header level.
1174 attr_descr = ucstring_create(c);
1175 de_describe_dos_attribs(c, attribs, attr_descr, 0);
1176 de_dbg(c, "attribs: 0x%04x (%s)", attribs, ucstring_getpsz_d(attr_descr));
1177 ucstring_destroy(attr_descr);
1179 else {
1180 de_dbg(c, "obsolete attribs low byte: 0x%02x", attribs);
1182 pos++; // header level or high byte of attribs, already handled
1184 if(md->hlev<=1) {
1185 fnlen = de_getbyte(pos++);
1186 de_dbg(c, "filename len: %d", (int)fnlen);
1187 if(md->hlev==0) {
1188 read_filename_hlev0(c, d, md, pos, fnlen);
1190 else {
1191 read_filename_hlev1_or_exthdr(c, d, md, pos, fnlen);
1193 pos += fnlen;
1196 md->crc16 = (u32)de_getu16le_p(&pos);
1197 de_dbg(c, "crc16 (reported): 0x%04x", (unsigned int)md->crc16);
1199 if(md->hlev==1 || md->hlev==2 || md->hlev==3) {
1200 md->os_id = de_getbyte_p(&pos);
1201 de_dbg(c, "OS id: %d ('%c') (%s)", (int)md->os_id,
1202 de_byte_to_printable_char(md->os_id), get_os_name(md->os_id));
1205 if(md->hlev==3) {
1206 lev3_header_size = de_getu32le_p(&pos);
1207 md->total_size = lev3_header_size + md->compressed_data_len;
1210 if(md->hlev==0) {
1211 i64 ext_headers_size = (2+lev0_header_size) - (pos-pos1);
1212 md->compressed_data_pos = pos1 + 2 + lev0_header_size;
1213 if(ext_headers_size>0) {
1214 de_dbg(c, "extended header area at %d, len=%d", (int)pos, (int)ext_headers_size);
1215 de_dbg_indent(c, 1);
1216 do_lev0_ext_area(c, d, md, pos, ext_headers_size);
1217 de_dbg_indent(c, -1);
1220 else if(md->hlev==1) {
1221 i64 first_ext_hdr_size;
1223 // The last two bytes of the base header are the size of the first ext. header.
1224 pos = pos1 + 2 + lev1_base_header_size - 2;
1225 // TODO: sanitize pos?
1226 first_ext_hdr_size = de_getu16le_p(&pos);
1227 de_dbg(c, "first ext hdr size: %d", (int)first_ext_hdr_size);
1229 ret = do_read_ext_headers(c, d, md, pos, lev1_skip_size, first_ext_hdr_size,
1230 &exthdr_bytes_consumed);
1232 if(!ret) {
1233 de_err(c, "Error parsing extended headers at %d. Cannot extract this file.",
1234 (int)pos);
1235 retval = 1;
1236 goto done;
1239 pos += exthdr_bytes_consumed;
1240 md->compressed_data_pos = pos;
1241 md->compressed_data_len = lev1_skip_size - exthdr_bytes_consumed;
1243 else if(md->hlev==2) {
1244 i64 first_ext_hdr_size;
1246 if(md->os_id=='K') {
1247 // So that some lhasa test files will work.
1248 // TODO: The extended headers section is (usually?) self-terminating, so we
1249 // should be able to parse it and figure out if this bug is present. That
1250 // would be better than just guessing.
1251 lev2_total_header_size += 2;
1252 md->total_size = lev2_total_header_size + md->compressed_data_len;
1253 de_dbg(c, "attempting bug workaround: changing total header size to %d",
1254 (int)lev2_total_header_size);
1257 md->compressed_data_pos = pos1+lev2_total_header_size;
1259 first_ext_hdr_size = de_getu16le_p(&pos);
1260 de_dbg(c, "first ext hdr size: %d", (int)first_ext_hdr_size);
1262 do_read_ext_headers(c, d, md, pos, pos1+lev2_total_header_size-pos,
1263 first_ext_hdr_size, &exthdr_bytes_consumed);
1265 else if(md->hlev==3) {
1266 i64 first_ext_hdr_size;
1268 md->compressed_data_pos = pos1+lev3_header_size;
1270 first_ext_hdr_size = de_getu32le_p(&pos);
1271 de_dbg(c, "first ext hdr size: %d", (int)first_ext_hdr_size);
1273 do_read_ext_headers(c, d, md, pos, pos1+lev3_header_size-pos,
1274 first_ext_hdr_size, &exthdr_bytes_consumed);
1277 do_check_header_crc(c, d, md);
1279 de_dbg(c, "member data (%scompressed) at %"I64_FMT", len=%"I64_FMT,
1280 is_compressed?"":"un",
1281 md->compressed_data_pos, md->compressed_data_len);
1283 make_fullfilename(c, d, md);
1285 de_dbg_indent(c, 1);
1286 do_extract_file(c, d, md);
1287 de_dbg_indent(c, -1);
1289 retval = 1;
1290 done:
1291 de_dbg_indent_restore(c, saved_indent_level);
1292 return retval;
1295 static void de_run_lha(deark *c, de_module_params *mparams)
1297 lctx *d = NULL;
1298 i64 pos;
1299 struct member_data *md = NULL;
1301 d = de_malloc(c, sizeof(lctx));
1303 d->lhark_fmt = (u8)de_get_ext_option_bool(c, "lha:lhark", 0);
1305 // It's not really safe to guess CP437, because Japanese-encoded (CP932?)
1306 // filenames are common.
1307 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_ASCII);
1309 d->hlev_of_first_member = 0xff;
1310 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
1312 pos = 0;
1313 while(1) {
1314 if(pos >= c->infile->len) break;
1316 md = de_malloc(c, sizeof(struct member_data));
1317 md->encoding = d->input_encoding;
1318 md->member_pos = pos;
1319 if(!do_read_member(c, d, md)) goto done;
1320 if(md->total_size<1) goto done;
1322 d->member_count++;
1323 pos += md->total_size;
1325 destroy_member_data(c, md);
1326 md = NULL;
1329 done:
1330 destroy_member_data(c, md);
1331 if(d) {
1332 if(!d->lhark_fmt && d->hlev_of_first_member==1 && d->lh7_failed_flag &&
1333 !d->lh7_success_flag)
1335 de_info(c, "Note: 'lh7' decompression failed. Maybe this file uses "
1336 "LHARK compression. Try \"-opt lha:lhark\".");
1339 de_crcobj_destroy(d->crco);
1340 de_free(c, d);
1344 static int de_identify_lha(deark *c)
1346 int has_ext = 0;
1347 u8 b[22];
1348 struct cmpr_meth_info cmi;
1350 de_read(b, 0, sizeof(b));
1351 if(b[20]>3) return 0; // header level
1353 if(!is_possible_cmpr_meth(&b[2])) return 0;
1355 if(b[20]==0) {
1356 if(b[0]<22) return 0;
1357 if(22 + (int)b[21] + 2 > 2 + (int)b[0]) return 0;
1359 else if(b[20]==1) {
1360 if(b[0]<25) return 0;
1361 if(22 + (int)b[21] + 5 > 2 + (int)b[0]) return 0;
1363 else if(b[20]==2) {
1364 i64 hsize = de_getu16le_direct(&b[0]);
1365 if(hsize < 26) return 0;
1367 else if(b[20]==3) {
1368 if((b[0]!=4 && b[0]!=8) || b[1]!=0) return 0;
1371 de_zeromem(&cmi, sizeof(struct cmpr_meth_info));
1372 get_cmpr_meth_info(&b[2], &cmi);
1373 if(!cmi.is_recognized) {
1374 return 0;
1377 if(de_input_file_has_ext(c, "lzh") ||
1378 de_input_file_has_ext(c, "lha"))
1380 has_ext = 1;
1383 if(has_ext) return 100;
1384 return 80; // Must be less than car_lha
1387 static void de_help_lha(deark *c)
1389 de_msg(c, "-opt lha:lhark : Enable LHARK mode (for 'lh7' compression)");
1392 void de_module_lha(deark *c, struct deark_module_info *mi)
1394 mi->id = "lha";
1395 mi->desc = "LHA/LZH/PMA archive";
1396 mi->run_fn = de_run_lha;
1397 mi->identify_fn = de_identify_lha;
1398 mi->help_fn = de_help_lha;
1401 /////////////////////// CAR (MylesHi!)
1403 struct car_member_data {
1404 i64 member_pos;
1405 i64 total_size;
1406 UI hdr_checksum_calc;
1409 struct car_ctx {
1410 dbuf *hdr_tmp;
1411 dbuf *lha_outf;
1414 static int looks_like_car_member(deark *c, i64 pos)
1416 u8 b[16];
1418 de_read(b, pos, 16);
1419 if(b[2]!='-' || b[3]!='l'|| b[4]!='h' || b[6]!='-') return 0;
1420 if(b[5]!='0' && b[5]!='5') return 0;
1421 if((int)b[0] != (int)b[15] + 25) return 0;
1422 if(dbuf_memcmp(c->infile, pos + (i64)b[15] + 24, (const u8*)"\x20\x00\x00", 3)) return 0;
1423 return 1;
1426 static int do_car_member(deark *c, struct car_ctx *d, struct car_member_data *md)
1428 i64 lev1_base_header_size;
1429 i64 fnlen;
1430 i64 hdr_endpos;
1431 i64 compressed_data_len;
1432 i64 pos1 = md->member_pos;
1433 int retval = 0;
1434 int saved_indent_level;
1436 de_dbg_indent_save(c, &saved_indent_level);
1437 de_dbg(c, "member at %"I64_FMT, pos1);
1438 de_dbg_indent(c, 1);
1440 // Figure out where everything is...
1441 lev1_base_header_size = (i64)de_getbyte(pos1);
1442 de_dbg(c, "base header size: %d", (int)lev1_base_header_size);
1443 hdr_endpos = pos1 + 2 + lev1_base_header_size;
1444 fnlen = lev1_base_header_size - 25;
1445 de_dbg(c, "implied filename len: %d", (int)fnlen);
1446 if(fnlen<0) goto done;
1448 compressed_data_len = de_getu32le(pos1 + 7);
1449 de_dbg(c, "compressed size: %"I64_FMT, compressed_data_len);
1450 if(hdr_endpos + compressed_data_len > c->infile->len) goto done;
1452 // Convert to an LHA level-1 header
1453 dbuf_empty(d->hdr_tmp);
1455 // Fields through uncmpr_size are the same (we'll patch the checksum later)
1456 dbuf_copy(c->infile, pos1, 15, d->hdr_tmp);
1458 dbuf_copy(c->infile, hdr_endpos-7, 4, d->hdr_tmp); // timestamp
1460 // attribute (low byte)
1461 dbuf_copy(c->infile, hdr_endpos-9, 1, d->hdr_tmp);
1462 dbuf_writebyte(d->hdr_tmp, 0x01); // level identifier
1464 // Fields starting with filename length, through crc
1465 dbuf_copy(c->infile, pos1+15, 1+fnlen+2, d->hdr_tmp);
1467 dbuf_writebyte(d->hdr_tmp, 77); // OS ID = 'M' = MS-DOS
1469 // Recalculate checksum
1470 dbuf_buffered_read(d->hdr_tmp, 2, lev1_base_header_size, cksum_cbfn,
1471 (void*)&md->hdr_checksum_calc);
1472 de_dbg(c, "header checksum (calculated): 0x%02x", md->hdr_checksum_calc);
1473 dbuf_writebyte_at(d->hdr_tmp, 1, (u8)md->hdr_checksum_calc);
1474 dbuf_truncate(d->hdr_tmp, 2+lev1_base_header_size);
1476 // Write everything out
1477 dbuf_copy(d->hdr_tmp, 0, d->hdr_tmp->len, d->lha_outf);
1478 de_dbg(c, "member data at %"I64_FMT", len=%"I64_FMT, hdr_endpos, compressed_data_len);
1479 dbuf_copy(c->infile, hdr_endpos, compressed_data_len, d->lha_outf);
1480 md->total_size = (hdr_endpos-md->member_pos) + compressed_data_len;
1481 retval = 1;
1483 done:
1484 de_dbg_indent_restore(c, saved_indent_level);
1485 return retval;
1488 static void de_run_car_lha(deark *c, de_module_params *mparams)
1490 struct car_ctx *d = NULL;
1491 struct car_member_data *md = NULL;
1492 int ok = 0;
1493 i64 pos = 0;
1495 d = de_malloc(c, sizeof(struct car_ctx));
1497 if(!looks_like_car_member(c, 0)) {
1498 de_err(c, "Not a CAR file");
1499 goto done;
1502 d->lha_outf = dbuf_create_output_file(c, "lha", NULL, 0);
1503 d->hdr_tmp = dbuf_create_membuf(c, 0, 0);
1505 md = de_malloc(c, sizeof(struct car_member_data));
1506 while(1) {
1507 if(de_getbyte(pos)==0) {
1508 de_dbg(c, "trailer at %"I64_FMT, pos);
1509 dbuf_writebyte(d->lha_outf, 0);
1510 ok = 1;
1511 break;
1513 if(pos+27 > c->infile->len) goto done;
1514 if(!looks_like_car_member(c, pos)) goto done;
1516 de_zeromem(md, sizeof(struct car_member_data));
1517 md->member_pos = pos;
1518 if(!do_car_member(c, d, md)) goto done;
1519 pos += md->total_size;
1522 done:
1523 de_free(c, md);
1524 if(d) {
1525 if(d->lha_outf) {
1526 dbuf_close(d->lha_outf);
1527 if(!ok) {
1528 de_err(c, "Conversion to LHA format failed");
1531 dbuf_close(d->hdr_tmp);
1532 de_free(c, d);
1536 static int de_identify_car_lha(deark *c)
1538 if(!de_input_file_has_ext(c, "car")) return 0;
1539 if(looks_like_car_member(c, 0)) {
1540 return 95;
1542 return 0;
1545 void de_module_car_lha(deark *c, struct deark_module_info *mi)
1547 mi->id = "car_lha";
1548 mi->desc = "CAR (MylesHi!) LHA-like archive";
1549 mi->run_fn = de_run_car_lha;
1550 mi->identify_fn = de_identify_car_lha;
1553 /////////////////////// ARX
1555 struct arx_member_data {
1556 i64 member_pos;
1557 i64 total_size;
1560 struct arx_ctx {
1561 dbuf *hdr_tmp;
1562 dbuf *lha_outf;
1565 static int looks_like_arx_member(deark *c, i64 pos)
1567 u8 b[22];
1569 de_read(b, pos, sizeof(b));
1570 if(b[2]!='-' || b[3]!='l'|| b[4]!='h' || b[6]!='-') return 0;
1571 if(b[21]!=0) return 0;
1572 return 1;
1575 static int do_arx_member(deark *c, struct arx_ctx *d, struct arx_member_data *md)
1577 i64 lev0_header_size;
1578 i64 hdr_endpos;
1579 i64 compressed_data_len;
1580 i64 unc_data_len;
1581 i64 pos1 = md->member_pos;
1582 UI hdr_checksum_calc = 0;
1583 int is_uncompressed = 0;
1584 int retval = 0;
1585 int saved_indent_level;
1587 de_dbg_indent_save(c, &saved_indent_level);
1588 de_dbg(c, "member at %"I64_FMT, pos1);
1589 de_dbg_indent(c, 1);
1591 lev0_header_size = (i64)de_getbyte(pos1);
1592 de_dbg(c, "header size: %d", (int)lev0_header_size);
1593 if(lev0_header_size<22) goto done;
1594 hdr_endpos = pos1 + 2 + lev0_header_size;
1596 compressed_data_len = de_getu32le(pos1+8);
1597 de_dbg(c, "compressed size: %"I64_FMT, compressed_data_len);
1599 unc_data_len = de_getu32le(pos1+12);
1600 de_dbg(c, "uncmpr. size: %"I64_FMT, unc_data_len);
1602 if(compressed_data_len==0) {
1603 is_uncompressed = 1;
1604 compressed_data_len = unc_data_len;
1607 if(hdr_endpos + compressed_data_len > c->infile->len) goto done;
1609 // Convert to an LHA header
1610 dbuf_empty(d->hdr_tmp);
1612 // Fields through cmpr meth. (We'll patch the checksum, and
1613 // compression method if necessary, later.)
1614 dbuf_copy(c->infile, pos1, 7, d->hdr_tmp);
1616 dbuf_writeu32le(d->hdr_tmp, compressed_data_len);
1617 dbuf_writeu32le(d->hdr_tmp, unc_data_len);
1619 /// Rest of the header can be copied as-is.
1620 dbuf_copy(c->infile, pos1+8+8, lev0_header_size-6-8, d->hdr_tmp);
1622 // No source for the low byte of CRC. ARX doesn't save it.
1623 // (The extra byte after the compression method is not it.)
1624 // Until we support lh1 decompression, we have no way to recalculate it.
1625 // TODO: We could recalculate it for uncompressed files.
1626 dbuf_writebyte(d->hdr_tmp, 00);
1628 if(is_uncompressed) {
1629 dbuf_writebyte_at(d->hdr_tmp, 5, '0'); // lh1 -> lh0
1632 // Recalculate checksum
1633 dbuf_buffered_read(d->hdr_tmp, 2, lev0_header_size, cksum_cbfn,
1634 (void*)&hdr_checksum_calc);
1635 de_dbg(c, "header checksum (calculated): 0x%02x", hdr_checksum_calc);
1636 dbuf_writebyte_at(d->hdr_tmp, 1, (u8)hdr_checksum_calc);
1637 dbuf_truncate(d->hdr_tmp, 2+lev0_header_size);
1639 // Write everything out
1640 dbuf_copy(d->hdr_tmp, 0, d->hdr_tmp->len, d->lha_outf);
1641 de_dbg(c, "member data at %"I64_FMT", len=%"I64_FMT, hdr_endpos, compressed_data_len);
1642 dbuf_copy(c->infile, hdr_endpos, compressed_data_len, d->lha_outf);
1643 md->total_size = 2 + lev0_header_size + compressed_data_len;
1644 retval = 1;
1646 done:
1647 de_dbg_indent_restore(c, saved_indent_level);
1648 return retval;
1651 static void de_run_arx(deark *c, de_module_params *mparams)
1653 struct arx_ctx *d = NULL;
1654 struct arx_member_data *md = NULL;
1655 int ok = 0;
1656 i64 pos = 0;
1658 d = de_malloc(c, sizeof(struct arx_ctx));
1660 if(!looks_like_arx_member(c, 0)) {
1661 de_err(c, "Not an ARX file");
1662 goto done;
1665 d->lha_outf = dbuf_create_output_file(c, "lha", NULL, 0);
1666 d->hdr_tmp = dbuf_create_membuf(c, 0, 0);
1668 md = de_malloc(c, sizeof(struct arx_member_data));
1669 while(1) {
1670 if(de_getbyte(pos)==0) {
1671 de_dbg(c, "trailer at %"I64_FMT, pos);
1672 dbuf_writebyte(d->lha_outf, 0);
1673 ok = 1;
1674 break;
1676 if(pos+27 > c->infile->len) goto done;
1677 if(!looks_like_arx_member(c, pos)) goto done;
1679 de_zeromem(md, sizeof(struct arx_member_data));
1680 md->member_pos = pos;
1681 if(!do_arx_member(c, d, md)) goto done;
1682 pos += md->total_size;
1685 done:
1686 de_free(c, md);
1687 if(d) {
1688 if(d->lha_outf) {
1689 dbuf_close(d->lha_outf);
1690 if(ok) {
1691 de_info(c, "Note: Conversion from ARX to LHA is not fully implemented. "
1692 "The CRC fields will be incorrect.");
1694 else {
1695 de_err(c, "Conversion to LHA format failed");
1698 dbuf_close(d->hdr_tmp);
1699 de_free(c, d);
1703 static int de_identify_arx(deark *c)
1705 if(dbuf_memcmp(c->infile, 2, "-lh1-", 5)) return 0;
1706 if(de_getbyte(20)!=0x20 || de_getbyte(21)!=0x00) return 0;
1707 if(de_input_file_has_ext(c, "arx")) return 100;
1708 return 70;
1711 void de_module_arx(deark *c, struct deark_module_info *mi)
1713 mi->id = "arx";
1714 mi->desc = "ARX LHA-like archive";
1715 mi->run_fn = de_run_arx;
1716 mi->identify_fn = de_identify_arx;