Added support for cp932 (Shift-JIS) conversion
[deark.git] / modules / lha.c
blobb76a40e41d95a8c343cb89ff8ed53d28ae162c9b
1 // This file is part of Deark.
2 // Copyright (C) 2017 Jason Summers
3 // See the file COPYING for terms of use.
5 // LHA/LZH compressed archive format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_lha);
11 DE_DECLARE_MODULE(de_module_swg);
12 DE_DECLARE_MODULE(de_module_pakleo);
13 DE_DECLARE_MODULE(de_module_car_lha);
14 DE_DECLARE_MODULE(de_module_arx);
15 DE_DECLARE_MODULE(de_module_ar001);
16 DE_DECLARE_MODULE(de_module_lharc_sfx_com);
18 #define MAX_SUBDIR_LEVEL 32
20 #define CODE_S_LH0 0x204c4830 // SAR
21 #define CODE_S_LH5 0x204c4835 // SAR
22 #define CODE_ah0 0x2d616830U // MAR
23 #define CODE_ari 0x2d617269U // MAR
24 #define CODE_hf0 0x2d686630U // MAR
25 #define CODE_lZ0 0x2d6c5a30U // PUT
26 #define CODE_lZ1 0x2d6c5a31U // PUT
27 #define CODE_lZ5 0x2d6c5a35U // PUT
28 #define CODE_lh0 0x2d6c6830U
29 #define CODE_lh1 0x2d6c6831U
30 #define CODE_lh2 0x2d6c6832U
31 #define CODE_lh3 0x2d6c6833U
32 #define CODE_lh4 0x2d6c6834U
33 #define CODE_lh5 0x2d6c6835U
34 #define CODE_lh6 0x2d6c6836U
35 #define CODE_lh7 0x2d6c6837U // standard, or LHARK
36 #define CODE_lh8 0x2d6c6838U
37 #define CODE_lh9 0x2d6c6839U
38 #define CODE_lha 0x2d6c6861U
39 #define CODE_lhb 0x2d6c6862U
40 #define CODE_lhc 0x2d6c6863U
41 #define CODE_lhd 0x2d6c6864U
42 #define CODE_lhe 0x2d6c6865U
43 #define CODE_lhx 0x2d6c6878U
44 #define CODE_ll0 0x2d6c6c30U
45 #define CODE_ll1 0x2d6c6c31U
46 #define CODE_lx1 0x2d6c7831U
47 #define CODE_lz2 0x2d6c7a32U
48 #define CODE_lz3 0x2d6c7a33U
49 #define CODE_lz4 0x2d6c7a34U
50 #define CODE_lz5 0x2d6c7a35U
51 #define CODE_lz7 0x2d6c7a37U
52 #define CODE_lz8 0x2d6c7a38U
53 #define CODE_lzs 0x2d6c7a73U
54 #define CODE_pm0 0x2d706d30U
55 #define CODE_pm1 0x2d706d31U
56 #define CODE_pm2 0x2d706d32U
57 #define CODE_sw0 0x2d737730U
58 #define CODE_sw1 0x2d737731U
60 enum lha_basefmt_enum {
61 BASEFMT_LHA = 0, // LHarc/LHA and other formats that are parsed the same
62 BASEFMT_SWG,
63 BASEFMT_PAKLEO
66 #define TIMESTAMPIDX_INVALID (-1)
67 struct timestamp_data {
68 struct de_timestamp ts; // The best timestamp of this type found so far
69 int quality;
72 struct cmpr_meth_info;
74 struct member_data {
75 u8 hlev; // header level
76 de_encoding encoding;
77 i64 member_pos;
78 i64 total_size;
79 struct cmpr_meth_info *cmi;
80 u8 is_dir;
81 u8 is_special;
82 u8 is_nonexecutable;
83 u8 is_executable;
84 i64 orig_size;
85 u32 hdr_checksum_calc;
87 u8 have_hdr_crc_reported;
88 u32 hdr_crc_reported;
89 u32 hdr_crc_calc;
90 i64 hdr_crc_field_pos;
92 u32 crc_reported;
93 u8 os_id;
94 i64 compressed_data_pos; // relative to beginning of file
95 i64 compressed_data_len;
96 de_ucstring *dirname;
97 de_ucstring *filename;
98 de_ucstring *fullfilename;
99 struct timestamp_data tsdata[DE_TIMESTAMPIDX_COUNT];
102 typedef struct localctx_struct {
103 de_encoding input_encoding;
104 int lhark_policy; // -1=detect, 0=no, 1=yes
105 int lhark_req;
106 enum lha_basefmt_enum basefmt;
107 const char *basefmt_name;
108 u8 hlev_of_first_member;
109 u8 lh7_success_flag; // currently unused
110 u8 lh7_failed_flag; // currently unused
111 u8 trailer_found;
112 int member_count;
113 i64 trailer_pos;
114 struct de_crcobj *crco;
115 struct de_crcobj *crco_cksum;
116 } lctx;
118 typedef void (*decompressor_fn)(deark *c, lctx *d, struct member_data *md,
119 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
120 struct de_dfilter_results *dres);
122 struct cmpr_meth_info {
123 u8 is_recognized;
124 u32 uniq_id;
125 decompressor_fn decompressor;
126 u8 id_raw[5];
127 char id_printable_sz[6];
128 char descr[80];
131 struct exthdr_type_info_struct;
133 typedef void (*exthdr_decoder_fn)(deark *c, lctx *d, struct member_data *md,
134 u8 id, const struct exthdr_type_info_struct *e,
135 i64 pos, i64 dlen);
137 struct exthdr_type_info_struct {
138 u8 id;
139 u8 flags;
140 const char *name;
141 exthdr_decoder_fn decoder_fn;
144 static int lha_isdigit(u8 x)
146 return (x>='0' && x<='9');
149 static int lha_isalpha(u8 x)
151 return ((x>='A' && x<='Z') || (x>='a' && x<='z'));
154 static int lha_isalnum(u8 x)
156 return (lha_isdigit(x) || lha_isalpha(x));
159 static int is_possible_cmpr_meth(const u8 m[5])
161 if(m[0]!=m[4]) return 0;
162 if(m[0]==' ' && m[1]=='L' && m[2]=='H' && lha_isdigit(m[3])) return 1;
163 if(m[0]!='-') return 0;
164 if(!lha_isalpha(m[1]) ||
165 !lha_isalnum(m[2]) ||
166 !lha_isalnum(m[3]))
168 return 0;
170 return 1;
173 static void apply_timestamp(deark *c, lctx *d, struct member_data *md,
174 int tsidx, const struct de_timestamp *ts, int quality)
176 if(!ts->is_valid) return;
177 if(tsidx<0 || tsidx>=DE_TIMESTAMPIDX_COUNT) return;
178 if(quality < md->tsdata[tsidx].quality) return;
179 md->tsdata[tsidx].ts = *ts;
180 md->tsdata[tsidx].quality = quality;
183 static void read_msdos_modtime(deark *c, lctx *d, struct member_data *md,
184 i64 pos, const char *name)
186 i64 mod_time_raw, mod_date_raw;
187 char timestamp_buf[64];
188 struct de_timestamp tmp_timestamp;
190 mod_time_raw = de_getu16le(pos);
191 mod_date_raw = de_getu16le(pos+2);
192 if(mod_time_raw==0 && mod_date_raw==0) {
193 de_dbg(c, "%s: (not set)", name);
194 return;
196 de_dos_datetime_to_timestamp(&tmp_timestamp, mod_date_raw, mod_time_raw);
197 tmp_timestamp.tzcode = DE_TZCODE_LOCAL;
198 de_timestamp_to_string(&tmp_timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
199 de_dbg(c, "%s: %s", name, timestamp_buf);
200 apply_timestamp(c, d, md, DE_TIMESTAMPIDX_MODIFY, &tmp_timestamp, 10);
203 static void read_windows_FILETIME(deark *c, lctx *d, struct member_data *md,
204 i64 pos, int tsidx, const char *name)
206 i64 t_FILETIME;
207 char timestamp_buf[64];
208 struct de_timestamp tmp_timestamp;
210 t_FILETIME = de_geti64le(pos);
211 de_FILETIME_to_timestamp(t_FILETIME, &tmp_timestamp, 0x1);
212 if(t_FILETIME<=0) tmp_timestamp.is_valid = 0;
213 de_timestamp_to_string(&tmp_timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
214 de_dbg(c, "%s: %"I64_FMT" (%s)", name, t_FILETIME, timestamp_buf);
215 apply_timestamp(c, d, md, tsidx, &tmp_timestamp, 90);
218 static void read_unix_timestamp(deark *c, lctx *d, struct member_data *md,
219 i64 pos, int tsidx, const char *name)
221 i64 t;
222 char timestamp_buf[64];
223 struct de_timestamp tmp_timestamp;
225 t = de_geti32le(pos);
226 de_unix_time_to_timestamp(t, &tmp_timestamp, 0x1);
227 de_timestamp_to_string(&tmp_timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
228 de_dbg(c, "%s: %d (%s)", name, (int)t, timestamp_buf);
229 apply_timestamp(c, d, md, tsidx, &tmp_timestamp, 50);
232 static void rp_add_component(deark *c, lctx *d, struct member_data *md,
233 dbuf *f, i64 pos, i64 len, struct de_strarray *sa, de_ucstring *tmpstr)
235 if(len<1) return;
236 ucstring_empty(tmpstr);
237 dbuf_read_to_ucstring(f, pos, len, tmpstr, 0, md->encoding);
238 de_strarray_push(sa, tmpstr);
241 static void read_path_to_strarray(deark *c, lctx *d, struct member_data *md,
242 dbuf *inf, i64 pos, i64 len, struct de_strarray *sa, int is_exthdr_dirname)
244 dbuf *tmpdbuf = NULL;
245 de_ucstring *tmpstr = NULL;
246 i64 component_startpos;
247 i64 component_len;
248 i64 i;
250 tmpstr = ucstring_create(c);
252 tmpdbuf = dbuf_create_membuf(c, len, 0);
253 dbuf_copy(inf, pos, len, tmpdbuf);
255 component_startpos = 0;
256 component_len = 0;
258 for(i=0; i<len; i++) {
259 u8 ch;
261 ch = dbuf_getbyte(tmpdbuf, i);
262 if(ch==0x00) break; // Tolerate NUL termination
263 if((is_exthdr_dirname && ch==0xff) ||
264 (!is_exthdr_dirname && (ch=='\\' || ch=='/')))
266 component_len = i - component_startpos;
267 rp_add_component(c, d, md, tmpdbuf, component_startpos, component_len, sa, tmpstr);
268 component_startpos = i+1;
269 component_len = 0;
271 else {
272 component_len++;
275 rp_add_component(c, d, md, tmpdbuf, component_startpos, component_len, sa, tmpstr);
277 dbuf_close(tmpdbuf);
278 ucstring_destroy(tmpstr);
281 static void read_filename_hlev0(deark *c, lctx *d, struct member_data *md,
282 i64 pos, i64 len)
284 struct de_strarray *sa = NULL;
286 if(md->filename) {
287 ucstring_empty(md->filename);
289 else {
290 md->filename = ucstring_create(c);
293 sa = de_strarray_create(c, MAX_SUBDIR_LEVEL+2);
294 read_path_to_strarray(c, d, md, c->infile, pos, len, sa, 0);
296 de_strarray_make_path(sa, md->filename, DE_MPFLAG_NOTRAILINGSLASH);
297 de_dbg(c, "filename (parsed): \"%s\"", ucstring_getpsz_d(md->filename));
299 de_strarray_destroy(sa);
302 static void read_filename_hlev1_or_exthdr(deark *c, lctx *d, struct member_data *md,
303 i64 pos, i64 len)
305 i64 i;
307 if(md->filename) {
308 ucstring_empty(md->filename);
310 else {
311 md->filename = ucstring_create(c);
314 // Some files seem to assume NUL termination is allowed.
315 dbuf_read_to_ucstring(c->infile, pos, len,
316 md->filename, DE_CONVFLAG_STOP_AT_NUL, md->encoding);
317 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->filename));
319 // I don't think slashes are allowed
320 for(i=0; i<md->filename->len; i++) {
321 if(md->filename->str[i]=='/') {
322 md->filename->str[i]='_';
327 static void exthdr_common(deark *c, lctx *d, struct member_data *md,
328 u8 id, const struct exthdr_type_info_struct *e,
329 i64 pos, i64 dlen)
331 if(dlen<2) return;
332 md->hdr_crc_reported = (u32)de_getu16le(pos);
333 md->have_hdr_crc_reported = 1;
334 md->hdr_crc_field_pos = pos;
335 de_dbg(c, "header crc (reported): 0x%04x", (UI)md->hdr_crc_reported);
336 // TODO: Additional information
339 static void exthdr_filename(deark *c, lctx *d, struct member_data *md,
340 u8 id, const struct exthdr_type_info_struct *e,
341 i64 pos, i64 dlen)
343 read_filename_hlev1_or_exthdr(c, d, md, pos, dlen);
346 static void exthdr_dirname(deark *c, lctx *d, struct member_data *md,
347 u8 id, const struct exthdr_type_info_struct *e,
348 i64 pos, i64 dlen)
350 struct de_strarray *dirname_sa = NULL;
352 if(md->dirname) {
353 ucstring_empty(md->dirname);
355 else {
356 md->dirname = ucstring_create(c);
359 dirname_sa = de_strarray_create(c, MAX_SUBDIR_LEVEL+2);
360 // 0xff is used as the path separator. Don't know what happens if a directory
361 // name contains an actual 0xff byte.
362 read_path_to_strarray(c, d, md, c->infile, pos, dlen, dirname_sa, 1);
363 de_strarray_make_path(dirname_sa, md->dirname, DE_MPFLAG_NOTRAILINGSLASH);
364 de_dbg(c, "%s (parsed): \"%s\"", e->name, ucstring_getpsz_d(md->dirname));
366 de_strarray_destroy(dirname_sa);
369 static void exthdr_msdosattribs(deark *c, lctx *d, struct member_data *md,
370 u8 id, const struct exthdr_type_info_struct *e,
371 i64 pos, i64 dlen)
373 u32 attribs;
374 de_ucstring *descr = NULL;
376 if(dlen<2) goto done;
377 attribs = (u32)de_getu16le(pos);
378 descr = ucstring_create(c);
379 de_describe_dos_attribs(c, (UI)attribs, descr, 0);
380 de_dbg(c, "%s: 0x%04x (%s)", e->name, (UI)attribs, ucstring_getpsz_d(descr));
381 done:
382 ucstring_destroy(descr);
385 static void exthdr_filesize(deark *c, lctx *d, struct member_data *md,
386 u8 id, const struct exthdr_type_info_struct *e,
387 i64 pos, i64 dlen)
389 // TODO: Support this
390 de_warn(c, "Unsupported \"file size\" extended header found. This may prevent "
391 "the rest of the file from being processed correctly.");
394 static void exthdr_windowstimestamp(deark *c, lctx *d, struct member_data *md,
395 u8 id, const struct exthdr_type_info_struct *e,
396 i64 pos, i64 dlen)
398 if(dlen<24) return;
399 read_windows_FILETIME(c, d, md, pos, DE_TIMESTAMPIDX_CREATE, "create time");
400 read_windows_FILETIME(c, d, md, pos+8, DE_TIMESTAMPIDX_MODIFY, "mod time ");
401 read_windows_FILETIME(c, d, md, pos+16, DE_TIMESTAMPIDX_ACCESS, "access time");
404 static void interpret_unix_perms(deark *c, lctx *d, struct member_data *md, UI mode)
406 if(mode & 0100000) { // regular file
407 if(mode & 0111) { // executable
408 md->is_executable = 1;
410 else {
411 md->is_nonexecutable = 1;
415 if((mode & 0170000) == 0120000) {
416 md->is_special = 1; // symlink
420 static void exthdr_unixperms(deark *c, lctx *d, struct member_data *md,
421 u8 id, const struct exthdr_type_info_struct *e,
422 i64 pos, i64 dlen)
424 UI mode;
426 if(dlen<2) return;
427 mode = (UI)de_getu16le(pos);
428 de_dbg(c, "mode: octal(%06o)", mode);
429 interpret_unix_perms(c, d, md, mode);
432 static void exthdr_unixuidgid(deark *c, lctx *d, struct member_data *md,
433 u8 id, const struct exthdr_type_info_struct *e,
434 i64 pos, i64 dlen)
436 i64 uid, gid;
437 if(dlen<4) return;
439 // It's strange that the GID comes first, while the UID comes first in the
440 // level-0 "extended area".
441 gid = de_getu16le(pos);
442 de_dbg(c, "gid: %d", (int)gid);
443 uid = de_getu16le(pos+2);
444 de_dbg(c, "uid: %d", (int)uid);
447 static void exthdr_unixtimestamp(deark *c, lctx *d, struct member_data *md,
448 u8 id, const struct exthdr_type_info_struct *e,
449 i64 pos, i64 dlen)
451 if(dlen<4) return;
452 read_unix_timestamp(c, d, md, pos, DE_TIMESTAMPIDX_MODIFY, "last-modified");
455 static void exthdr_lev3newattribs2(deark *c, lctx *d, struct member_data *md,
456 u8 id, const struct exthdr_type_info_struct *e,
457 i64 pos, i64 dlen)
459 if(dlen<20) return;
461 // TODO: Permission
462 // TODO: GID/UID
464 // [Documented as "creation time", but this is a Unix-style header, so I
465 // wonder if someone mistranslated "ctime" (=change time).]
466 read_unix_timestamp(c, d, md, pos+12, TIMESTAMPIDX_INVALID, "create(?) time");
468 read_unix_timestamp(c, d, md, pos+16, DE_TIMESTAMPIDX_ACCESS, "access time ");
471 static void exthdr_codepage(deark *c, lctx *d, struct member_data *md,
472 u8 id, const struct exthdr_type_info_struct *e,
473 i64 pos, i64 dlen)
475 int n_codepage;
476 de_encoding n_encoding;
477 char descr[100];
479 if(dlen!=4) return;
480 n_codepage = (int)de_geti32le(pos);
481 n_encoding = de_windows_codepage_to_encoding(c, n_codepage, descr, sizeof(descr), 0);
482 de_dbg(c, "codepage: %d (%s)", n_codepage, descr);
483 if(n_encoding != DE_ENCODING_UNKNOWN) {
484 md->encoding = n_encoding;
488 static const struct exthdr_type_info_struct exthdr_type_info_arr[] = {
489 { 0x00, 0, "common", exthdr_common },
490 { 0x01, 0, "filename", exthdr_filename },
491 { 0x02, 0, "dir name", exthdr_dirname },
492 { 0x39, 0, "multi-disc", NULL },
493 { 0x3f, 0, "comment", NULL },
494 { 0x40, 0, "MS-DOS file attribs", exthdr_msdosattribs },
495 { 0x41, 0, "Windows timestamp", exthdr_windowstimestamp },
496 { 0x42, 0, "MS-DOS file size", exthdr_filesize },
497 { 0x43, 0, "time zone", NULL },
498 { 0x44, 0, "UTF-16 filename", NULL },
499 { 0x45, 0, "UTF-16 dir name", NULL },
500 { 0x46, 0, "codepage", exthdr_codepage },
501 { 0x50, 0, "Unix perms", exthdr_unixperms },
502 { 0x51, 0, "Unix UID/GID", exthdr_unixuidgid },
503 { 0x52, 0, "Unix group name", NULL },
504 { 0x53, 0, "Unix username", NULL },
505 { 0x54, 0, "Unix timestamp", exthdr_unixtimestamp },
506 { 0x7d, 0, "capsule", NULL },
507 { 0x7e, 0, "OS/2 extended attribs", NULL },
508 { 0x7f, 0, "level 3 new attribs type-1", NULL }, // (OS/2 only)
509 { 0xff, 0, "level 3 new attribs type-2", exthdr_lev3newattribs2 }
512 static void destroy_member_data(deark *c, struct member_data *md)
514 if(!md) return;
515 ucstring_destroy(md->dirname);
516 ucstring_destroy(md->filename);
517 ucstring_destroy(md->fullfilename);
518 de_free(c, md->cmi);
519 de_free(c, md);
522 static const struct exthdr_type_info_struct *get_exthdr_type_info(u8 id)
524 size_t i;
526 for(i=0; i<DE_ARRAYCOUNT(exthdr_type_info_arr); i++) {
527 if(id == exthdr_type_info_arr[i].id) {
528 return &exthdr_type_info_arr[i];
531 return NULL;
534 static void do_read_ext_header(deark *c, lctx *d, struct member_data *md,
535 i64 pos1, i64 len, i64 dlen)
537 u8 id = 0;
538 const char *name;
539 const struct exthdr_type_info_struct *e = NULL;
541 if(dlen>=1) {
542 id = de_getbyte(pos1);
543 e = get_exthdr_type_info(id);
545 name = e ? e->name : "?";
547 de_dbg(c, "ext header at %"I64_FMT", len=%"I64_FMT" (1+%"I64_FMT"+%"I64_FMT"), id=0x%02x (%s)",
548 pos1, len, dlen-1, len-dlen, (UI)id, name);
550 if(dlen<1) return; // Invalid header, too short to even have an id field
552 de_dbg_indent(c, 1);
553 if(e && e->decoder_fn) {
554 e->decoder_fn(c, d, md, id, e, pos1+1, dlen-1);
556 else {
557 if(c->debug_level>=2) {
558 de_dbg_hexdump(c, c->infile, pos1+1, dlen-1, 256, NULL, 0x1);
561 de_dbg_indent(c, -1);
564 static const char *get_os_name(u8 id)
566 const char *name = NULL;
567 switch(id) {
568 case ' ': name="unspecified"; break;
569 case '2': name="OS/2"; break;
570 case '3': name="OS/386?"; break;
571 case '9': name="OS-9"; break;
572 case 'A': name="Amiga"; break;
573 case 'C': name="CP/M"; break;
574 case 'F': name="FLEX"; break;
575 case 'H': name="Human68K"; break;
576 case 'J': name="JVM"; break;
577 case 'K': name="OS-9/68K"; break;
578 case 'M': name="DOS"; break;
579 case 'R': name="RUNser"; break;
580 case 'T': name="TownsOS"; break;
581 case 'U': name="Unix"; break;
582 case 'W': name="Windows NT"; break;
583 case 'a': name="Atari ST?"; break;
584 case 'm': name="Macintosh"; break;
585 case 'w': name="Windows"; break;
587 return name?name:"?";
590 static void do_lev0_ext_area(deark *c, lctx *d, struct member_data *md,
591 i64 pos1, i64 len)
593 if(len<1) return;
594 md->os_id = de_getbyte(pos1);
595 de_dbg(c, "OS id: %d ('%c') (%s)", (int)md->os_id,
596 de_byte_to_printable_char(md->os_id), get_os_name(md->os_id));
598 // TODO: Finish this
599 if(md->os_id=='U') {
600 UI mode;
601 i64 uid, gid;
603 if(len<12) goto done;
605 read_unix_timestamp(c, d, md, pos1+2, DE_TIMESTAMPIDX_MODIFY, "last-modified");
607 mode = (UI)de_getu16le(pos1+6);
608 de_dbg(c, "mode: octal(%06o)", mode);
609 interpret_unix_perms(c, d, md, mode);
611 uid = de_getu16le(pos1+8);
612 de_dbg(c, "uid: %d", (int)uid);
613 gid = de_getu16le(pos1+10);
614 de_dbg(c, "gid: %d", (int)gid);
617 done: ;
620 // AFAICT, we're expected to think of the extended headers as a kind of linked
621 // list. The last field in each node is the "size of next node" (instead of
622 // "pointer to next node", as a real linked list would have). A size of 0 is
623 // like a "nil" pointer, and marks the end of the list.
624 // The "size of the first node" field (analogous to the "head" pointer) is
625 // conceptually not part of the extended headers section.
627 // Note that if we simply shift our frame of reference, this format is identical
628 // to a more typical length-prefixed format. But our code follows the
629 // linked-list model, to make it more consistent with most LHA documentation,
630 // and the various "size" fields.
632 // A return value of 0 means we failed to calculate the size of the
633 // extended headers segment.
634 static int do_read_ext_headers(deark *c, lctx *d, struct member_data *md,
635 i64 pos1, i64 len, i64 first_ext_hdr_size, i64 *tot_bytes_consumed)
637 i64 pos = pos1;
638 i64 this_ext_hdr_size, next_ext_hdr_size;
639 int retval = 0;
640 i64 size_of_size_field;
642 *tot_bytes_consumed = 0;
644 if(first_ext_hdr_size==0) {
645 return 1;
648 de_dbg(c, "ext headers section at %"I64_FMT, pos);
649 de_dbg_indent(c, 1);
651 size_of_size_field = (md->hlev==3) ? 4 : 2;
653 next_ext_hdr_size = first_ext_hdr_size;
654 while(1) {
655 this_ext_hdr_size = next_ext_hdr_size;
656 if(this_ext_hdr_size==0) {
657 retval = 1;
658 *tot_bytes_consumed = pos - pos1;
659 goto done;
661 if(this_ext_hdr_size<size_of_size_field) goto done;
662 if(pos+this_ext_hdr_size > pos1+len) goto done;
664 do_read_ext_header(c, d, md, pos, this_ext_hdr_size, this_ext_hdr_size-size_of_size_field);
666 // Each ext header ends with a "size of next header" field.
667 // We'll read it at this level, instead of in do_read_ext_header().
668 pos += this_ext_hdr_size-size_of_size_field;
669 if(size_of_size_field==2) {
670 next_ext_hdr_size = de_getu16le(pos);
672 else {
673 next_ext_hdr_size = de_getu32le(pos);
675 pos += size_of_size_field;
678 done:
679 if(retval) {
680 de_dbg(c, "size of ext headers section: %"I64_FMT, (i64)*tot_bytes_consumed);
682 else {
683 de_dbg(c, "failed to parse all extended headers");
685 de_dbg_indent(c, -1);
686 return retval;
689 static void make_fullfilename(deark *c, lctx *d, struct member_data *md)
691 if(md->fullfilename) return;
693 if(!md->filename) {
694 md->filename = ucstring_create(c);
696 md->fullfilename = ucstring_create(c);
698 if(md->hlev==0) {
699 ucstring_append_ucstring(md->fullfilename, md->filename);
701 else {
702 if(md->is_dir) {
703 ucstring_append_ucstring(md->fullfilename, md->dirname);
705 else {
706 if(ucstring_isnonempty(md->dirname)) {
707 ucstring_append_ucstring(md->fullfilename, md->dirname);
708 ucstring_append_sz(md->fullfilename, "/", DE_ENCODING_LATIN1);
710 if(ucstring_isnonempty(md->filename)) {
711 ucstring_append_ucstring(md->fullfilename, md->filename);
713 else {
714 ucstring_append_char(md->fullfilename, '_');
720 static void decompress_uncompressed(deark *c, lctx *d, struct member_data *md,
721 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
722 struct de_dfilter_results *dres)
724 fmtutil_decompress_uncompressed(c, dcmpri, dcmpro, dres, 0);
727 static void decompress_lh1(deark *c, lctx *d, struct member_data *md,
728 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
729 struct de_dfilter_results *dres)
731 fmtutil_lh1_codectype1(c, dcmpri, dcmpro, dres, NULL);
734 // Caller supplies fmt (DE_LH5X_FMT_*).
735 static void decompress_lh5x_internal(deark *c, lctx *d,
736 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
737 struct de_dfilter_results *dres, int fmt)
739 struct de_lh5x_params lzhparams;
741 de_zeromem(&lzhparams, sizeof(struct de_lh5x_params));
742 lzhparams.fmt = fmt;
743 lzhparams.zero_codes_block_behavior = DE_LH5X_ZCB_65536;
744 lzhparams.warn_about_zero_codes_block = 1;
745 lzhparams.history_fill_val = 0x20;
746 fmtutil_decompress_lh5x(c, dcmpri, dcmpro, dres, &lzhparams);
749 static int decompress_lh5x_dry_run(deark *c, lctx *d, struct member_data *md,
750 struct de_dfilter_in_params *dcmpri, int fmt)
752 int retval = 0;
753 dbuf *outf = NULL;
754 struct de_crcobj *crco = NULL;
755 u32 crc_calc;
756 int old_debug_level;
757 struct de_dfilter_out_params dcmpro;
758 struct de_dfilter_results dres;
760 // Make a "dummy" dbuf to write to, which doesn't store the data, but
761 // tracks the size and CRC.
762 outf = dbuf_create_custom_dbuf(c, 0, 0);
763 dbuf_enable_wbuffer(outf);
764 crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
765 dbuf_set_writelistener(outf, de_writelistener_for_crc, crco);
767 de_dfilter_init_objects(c, NULL, &dcmpro, &dres);
768 dcmpro.f = outf;
769 dcmpro.len_known = 1;
770 dcmpro.expected_len = md->orig_size;
772 old_debug_level = c->debug_level;
773 c->debug_level = 0; // hack
774 decompress_lh5x_internal(c, d, dcmpri, &dcmpro, &dres, fmt);
775 c->debug_level = old_debug_level;
776 dbuf_flush(outf);
778 if(dres.errcode) goto done;
779 if(outf->len != md->orig_size) goto done;
780 // Note: Another possible test would be if
781 // (dres.bytes_consumed == md->compressed_data_len).
782 crc_calc = de_crcobj_getval(crco);
783 if(crc_calc != md->crc_reported) goto done;
784 retval = 1;
786 done:
787 dbuf_close(outf);
788 de_crcobj_destroy(crco);
789 return retval;
792 // Sets d->lhark_policy.
793 // This detection is slow, so we only do it for the first lh7 member in a file,
794 // and assume all other lh7 members use the same format.
795 static void detect_lhark(deark *c, lctx *d, struct member_data *md,
796 struct de_dfilter_in_params *dcmpri)
798 int ret;
799 int ok = 0;
800 const char *fmt_name;
802 if(d->lhark_policy>=0) goto done; // shouldn't get here
803 if(d->lhark_req>=0) {
804 d->lhark_policy = d->lhark_req; // shouldn't get here
805 goto done;
808 de_dbg(c, "[detecting lh7 format]");
809 de_dbg_indent(c, 1);
811 if(md->hlev!=1 || md->os_id!=0x20) {
812 d->lhark_policy = 0;
813 ok = 1;
814 goto done;
817 ret = decompress_lh5x_dry_run(c, d, md, dcmpri, DE_LH5X_FMT_LH7);
818 if(ret) {
819 d->lhark_policy = 0;
820 ok = 1;
821 goto done;
824 ret = decompress_lh5x_dry_run(c, d, md, dcmpri, DE_LH5X_FMT_LHARK);
825 if(ret) {
826 d->lhark_policy = 1;
827 ok = 1;
828 goto done;
831 d->lhark_policy = 0;
833 done:
834 if(ok) {
835 if(d->lhark_policy>0)
836 fmt_name = "LHARK";
837 else
838 fmt_name = "standard lh7";
840 else {
841 fmt_name = "unknown, assuming standard lh7";
843 de_dbg(c, "detected lh7 format: %s", fmt_name);
844 de_dbg_indent(c, -1);
847 // Compression method will be selected based on id_raw[3] (which
848 // should be '4'...'8'), etc.
849 static void decompress_lh5x_auto(deark *c, lctx *d, struct member_data *md,
850 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
851 struct de_dfilter_results *dres)
853 int fmt;
855 switch(md->cmi->id_raw[3]) {
856 case '4': case '5':
857 fmt = DE_LH5X_FMT_LH5;
858 break;
859 case '6':
860 fmt = DE_LH5X_FMT_LH6;
861 break;
862 case '7':
863 if(d->lhark_policy<0) {
864 detect_lhark(c, d, md, dcmpri);
866 if(d->lhark_policy>0) {
867 fmt = DE_LH5X_FMT_LHARK;
869 else {
870 fmt = DE_LH5X_FMT_LH7;
872 break;
873 case '8':
874 fmt = DE_LH5X_FMT_LH7;
875 break;
876 default:
877 return;
880 decompress_lh5x_internal(c, d, dcmpri, dcmpro, dres, fmt);
883 static void decompress_lh5(deark *c, lctx *d, struct member_data *md,
884 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
885 struct de_dfilter_results *dres)
887 decompress_lh5x_internal(c, d, dcmpri, dcmpro, dres, DE_LH5X_FMT_LH5);
890 static void decompress_lz5(deark *c, lctx *d, struct member_data *md,
891 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
892 struct de_dfilter_results *dres)
894 fmtutil_decompress_lzss1(c, dcmpri, dcmpro, dres, 0x2);
897 static void decompress_pakleo(deark *c, lctx *d, struct member_data *md,
898 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
899 struct de_dfilter_results *dres)
901 struct de_lzw_params delzwp;
903 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
904 delzwp.fmt = DE_LZWFMT_PAKLEO;
905 fmtutil_decompress_lzw(c, dcmpri, dcmpro, dres, &delzwp);
908 struct cmpr_meth_array_item {
909 enum lha_basefmt_enum basefmt;
910 u8 flags;
911 u32 uniq_id;
912 const char *descr;
913 decompressor_fn decompressor;
916 // Compression methods with a decompressor or a description are usually
917 // listed here, but note that it is also possible for get_cmpr_meth_info()
918 // to handle them procedurally.
919 static const struct cmpr_meth_array_item cmpr_meth_arr[] = {
920 { BASEFMT_LHA, 0x00, CODE_lhd, "directory", NULL },
921 { BASEFMT_LHA, 0x00, CODE_lh0, "uncompressed", decompress_uncompressed },
922 { BASEFMT_LHA, 0x00, CODE_lh1, "LZ77-4K, adaptive Huffman", decompress_lh1 },
923 { BASEFMT_LHA, 0x00, CODE_lh4, "LZ77-4K, static Huffman", decompress_lh5x_auto },
924 { BASEFMT_LHA, 0x00, CODE_lh5, "LZ77-8K, static Huffman", decompress_lh5 },
925 { BASEFMT_LHA, 0x00, CODE_lh6, "LZ77-32K, static Huffman", decompress_lh5x_auto },
926 { BASEFMT_LHA, 0x00, CODE_lh7, NULL, decompress_lh5x_auto },
927 { BASEFMT_LHA, 0x00, CODE_lh8, NULL, decompress_lh5x_auto },
928 { BASEFMT_LHA, 0x00, CODE_lz4, "uncompressed (LArc)", decompress_uncompressed },
929 { BASEFMT_LHA, 0x00, CODE_lz5, "LZSS-4K (LArc)", decompress_lz5 },
930 { BASEFMT_LHA, 0x00, CODE_pm0, "uncompressed (PMArc)", decompress_uncompressed },
931 { BASEFMT_LHA, 0x00, CODE_lZ0, "uncompressed (MicroFox PUT)", decompress_uncompressed },
932 { BASEFMT_LHA, 0x00, CODE_lZ1, "MicroFox PUT lZ1", decompress_lh1 },
933 { BASEFMT_LHA, 0x00, CODE_lZ5, "MicroFox PUT lZ5", decompress_lh5 },
934 { BASEFMT_LHA, 0x00, CODE_S_LH0, "uncompressed (SAR)", decompress_uncompressed },
935 { BASEFMT_LHA, 0x00, CODE_S_LH5, "SAR LH5", decompress_lh5 },
936 { BASEFMT_SWG, 0x00, CODE_sw0, "uncompressed", decompress_uncompressed },
937 { BASEFMT_SWG, 0x00, CODE_sw1, NULL, NULL },
938 { BASEFMT_PAKLEO, 0x00, CODE_ll0, "uncompressed", decompress_uncompressed },
939 { BASEFMT_PAKLEO, 0x00, CODE_ll1, "LZW", decompress_pakleo }
942 // For basefmt==BASEFMT_LHA only
943 static const u32 other_known_cmpr_methods[] = {
944 CODE_ah0, CODE_ari, CODE_hf0,
945 CODE_lh2, CODE_lh3, CODE_lh9,
946 CODE_lha, CODE_lhb, CODE_lhc, CODE_lhe, CODE_lhx, CODE_lx1,
947 CODE_lz2, CODE_lz3, CODE_lz7, CODE_lz8, CODE_lzs,
948 CODE_pm1, CODE_pm2 };
950 // Only call this after is_possible_cmpr_meth() return nonzero.
951 // Caller allocates cmi, and initializes to zeroes.
952 static void get_cmpr_meth_info(const u8 idbuf[5], enum lha_basefmt_enum basefmt,
953 struct cmpr_meth_info *cmi)
955 size_t k;
956 const struct cmpr_meth_array_item *cmai = NULL;
958 // The first 4 bytes are unique for all known methods.
959 cmi->uniq_id = (u32)de_getu32be_direct(idbuf);
961 de_memcpy(cmi->id_raw, idbuf, 5);
963 // All "possible" methods only use printable characters.
964 de_memcpy(cmi->id_printable_sz, idbuf, 5);
965 cmi->id_printable_sz[5] = '\0';
967 for(k=0; k<DE_ARRAYCOUNT(cmpr_meth_arr); k++) {
968 if(cmpr_meth_arr[k].basefmt != basefmt) continue;
969 if(cmpr_meth_arr[k].uniq_id == cmi->uniq_id) {
970 cmai = &cmpr_meth_arr[k];
971 break;
975 if(cmai) {
976 cmi->is_recognized = 1;
977 cmi->decompressor = cmai->decompressor;
979 else if(basefmt==BASEFMT_LHA) {
980 for(k=0; k<DE_ARRAYCOUNT(other_known_cmpr_methods); k++) {
981 if(other_known_cmpr_methods[k] == cmi->uniq_id) {
982 cmi->is_recognized = 1;
983 break;
988 if(cmai && cmai->descr) {
989 de_strlcpy(cmi->descr, cmai->descr, sizeof(cmi->descr));
991 else if(cmi->is_recognized) {
992 de_strlcpy(cmi->descr, "recognized, but no info avail.", sizeof(cmi->descr));
994 else {
995 de_strlcpy(cmi->descr, "?", sizeof(cmi->descr));
999 static void do_extract_file(deark *c, lctx *d, struct member_data *md)
1001 de_finfo *fi = NULL;
1002 dbuf *outf = NULL;
1003 u32 crc_calc;
1004 int tsidx;
1005 u8 dcmpr_disabled = 0;
1006 u8 dcmpr_attempted = 0;
1007 u8 dcmpr_ok = 0;
1008 struct de_dfilter_in_params dcmpri;
1009 struct de_dfilter_out_params dcmpro;
1010 struct de_dfilter_results dres;
1012 if(!md->cmi) goto done;
1014 if(md->is_special) {
1015 de_dbg(c, "[not extracting special file]");
1016 goto done;
1018 else if(md->is_dir) {
1021 else if((!md->cmi->decompressor) || dcmpr_disabled) {
1022 de_err(c, "%s: Unsupported compression method '%s'",
1023 ucstring_getpsz_d(md->fullfilename), md->cmi->id_printable_sz);
1024 goto done;
1027 if(md->compressed_data_pos+md->compressed_data_len > c->infile->len) {
1028 de_err(c, "%s: Data goes beyond end of file", ucstring_getpsz_d(md->fullfilename));
1029 goto done;
1032 fi = de_finfo_create(c);
1034 for(tsidx=0; tsidx<DE_TIMESTAMPIDX_COUNT; tsidx++) {
1035 if(md->tsdata[tsidx].ts.is_valid) {
1036 fi->timestamp[tsidx] = md->tsdata[tsidx].ts;
1040 if(md->is_dir) {
1041 fi->is_directory = 1;
1043 else if(md->is_executable) {
1044 fi->mode_flags |= DE_MODEFLAG_EXE;
1046 else if(md->is_nonexecutable) {
1047 fi->mode_flags |= DE_MODEFLAG_NONEXE;
1050 de_finfo_set_name_from_ucstring(c, fi, md->fullfilename, DE_SNFLAG_FULLPATH);
1051 fi->original_filename_flag = 1;
1053 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
1054 dbuf_enable_wbuffer(outf);
1055 de_crcobj_reset(d->crco);
1056 dbuf_set_writelistener(outf, de_writelistener_for_crc, (void*)d->crco);
1058 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
1059 dcmpri.f = c->infile;
1060 dcmpri.pos = md->compressed_data_pos;
1061 dcmpri.len = md->compressed_data_len;
1062 dcmpro.f = outf;
1063 dcmpro.expected_len = md->orig_size;
1064 dcmpro.len_known = 1;
1066 if(md->is_dir) goto done; // For directories, we're done.
1068 dcmpr_attempted = 1;
1069 if(md->cmi->decompressor) {
1070 md->cmi->decompressor(c, d, md, &dcmpri, &dcmpro, &dres);
1072 dbuf_flush(dcmpro.f);
1074 if(dres.errcode) {
1075 de_err(c, "%s: Decompression failed: %s", ucstring_getpsz_d(md->fullfilename),
1076 de_dfilter_get_errmsg(c, &dres));
1077 goto done;
1080 crc_calc = de_crcobj_getval(d->crco);
1081 if(d->basefmt==BASEFMT_PAKLEO) {
1082 de_dbg(c, "crc (calculated): 0x%08x", (UI)crc_calc);
1084 else {
1085 de_dbg(c, "crc (calculated): 0x%04x", (UI)crc_calc);
1087 if(crc_calc != md->crc_reported) {
1088 de_err(c, "%s: CRC check failed", ucstring_getpsz_d(md->fullfilename));
1089 goto done;
1092 dcmpr_ok = 1;
1094 done:
1095 if(dcmpr_attempted && md->cmi && md->cmi->uniq_id==CODE_lh7) {
1096 if(dcmpr_ok)
1097 d->lh7_success_flag = 1;
1098 else
1099 d->lh7_failed_flag = 1;
1101 dbuf_close(outf);
1102 de_finfo_destroy(c, fi);
1105 // Simple checksum used by some header formats.
1106 // Caller supplies a crcobj to use.
1107 static u32 lha_calc_checksum(dbuf *f, i64 pos, i64 len, struct de_crcobj *crco_cksum)
1109 u32 v;
1111 de_crcobj_reset(crco_cksum);
1112 de_crcobj_addslice(crco_cksum, f, pos, len);
1113 v = de_crcobj_getval(crco_cksum);
1114 return v & 0xff;
1117 static void do_check_header_crc(deark *c, lctx *d, struct member_data *md)
1119 // LHA members don't have to have a header CRC field, though it's probably
1120 // considered best practice to have one when the checksum field doesn't
1121 // exist, or there are any extended headers.
1122 if(!md->have_hdr_crc_reported) return;
1123 de_crcobj_reset(d->crco);
1125 // Everything before the CRC field:
1126 de_crcobj_addslice(d->crco, c->infile, md->member_pos,
1127 md->hdr_crc_field_pos - md->member_pos);
1129 // The zeroed-out CRC field:
1130 de_crcobj_addzeroes(d->crco, 2);
1132 // Everything after the CRC field:
1133 de_crcobj_addslice(d->crco, c->infile, md->hdr_crc_field_pos+2,
1134 md->compressed_data_pos - (md->hdr_crc_field_pos+2));
1136 md->hdr_crc_calc = de_crcobj_getval(d->crco);
1137 de_dbg(c, "header crc (calculated): 0x%04x", (UI)md->hdr_crc_calc);
1138 if(md->hdr_crc_calc != md->hdr_crc_reported) {
1139 de_err(c, "Wrong header CRC: reported=0x%04x, calculated=0x%04x",
1140 (UI)md->hdr_crc_reported, (UI)md->hdr_crc_calc);
1144 enum lha_whats_next_enum {
1145 LHA_WN_MEMBER,
1146 LHA_WN_TRAILER,
1147 LHA_WN_TRAILER_AND_JUNK, // Note: No longer handled differently from TRAILER
1148 LHA_WN_JUNK,
1149 LHA_WN_NOTHING
1152 static enum lha_whats_next_enum pakleo_classify_whats_next(deark *c, lctx *d,
1153 i64 pos, i64 len)
1155 u8 b[7];
1156 if(len<=0) return LHA_WN_NOTHING;
1157 de_read(b, pos, sizeof(b));
1158 if(is_possible_cmpr_meth(&b[2])) return LHA_WN_MEMBER;
1159 return LHA_WN_JUNK;
1162 static enum lha_whats_next_enum lha_classify_whats_next(deark *c, lctx *d, i64 pos, i64 len)
1164 u8 b[21];
1165 u8 hlev;
1167 if(d->basefmt==BASEFMT_PAKLEO) {
1168 return pakleo_classify_whats_next(c, d, pos, len);
1171 if(len<=0) return LHA_WN_NOTHING;
1172 b[0] = de_getbyte(pos);
1173 if(b[0]==0 && len<=2) return LHA_WN_TRAILER;
1174 if(b[0]==0 && len<21) return LHA_WN_TRAILER_AND_JUNK;
1175 de_read(&b[1], pos+1, sizeof(b)-1);
1176 if(d->basefmt==BASEFMT_SWG) hlev = 0;
1177 else hlev = b[20];
1178 if(b[0]==0 && b[1]==0) return LHA_WN_TRAILER_AND_JUNK;
1179 if(b[0]==0 && hlev!=2) return LHA_WN_TRAILER_AND_JUNK;
1180 if(hlev>3) return LHA_WN_JUNK;
1181 if(is_possible_cmpr_meth(&b[2])) return LHA_WN_MEMBER;
1182 return LHA_WN_JUNK;
1185 static void do_swg_string_field(deark *c, lctx *d,
1186 de_ucstring *s, i64 pos, i64 fldlen, const char *name)
1188 i64 dlen = (i64)de_getbyte(pos);
1189 if(dlen>fldlen-1) dlen = fldlen-1;
1190 ucstring_empty(s);
1191 dbuf_read_to_ucstring(c->infile, pos+1, dlen, s, 0, d->input_encoding);
1192 ucstring_strip_trailing_spaces(s);
1193 de_dbg(c, "SWG %s: \"%s\"", name, ucstring_getpsz_d(s));
1196 static void do_special_swg_fields(deark *c, lctx *d, struct member_data *md, i64 pos1)
1198 i64 pos = pos1;
1199 u32 crc32;
1200 de_ucstring *s = NULL;
1202 crc32 = (u32)de_getu32le_p(&pos);
1203 de_dbg(c, "SWG crc32 (reported): 0x%08x", (UI)crc32);
1204 s = ucstring_create(c);
1205 do_swg_string_field(c, d, s, pos, 13, "stored filename");
1206 pos += 13;
1207 do_swg_string_field(c, d, s, pos, 41, "subject");
1208 pos += 41;
1209 do_swg_string_field(c, d, s, pos, 36, "contributor");
1210 pos += 36;
1211 do_swg_string_field(c, d, s, pos, 71, "search keys");
1212 ucstring_destroy(s);
1215 // This single function parses all the different header formats, using lots of
1216 // "if" statements. It is messy, but it's a no-win situation.
1217 // The alternative of four separate functions would be have a lot of redundant
1218 // code, and be harder to maintain.
1220 // Caller allocates and initializes md.
1221 // If the member was successfully parsed, sets md->total_size and returns nonzero.
1222 static int do_read_member(deark *c, lctx *d, struct member_data *md)
1224 int retval = 0;
1225 i64 lev0_header_size = 0;
1226 i64 lev1_base_header_size = 0;
1227 i64 lev1_skip_size = 0;
1228 i64 lev2_total_header_size = 0;
1229 i64 lev3_header_size = 0;
1230 i64 pos1 = md->member_pos;
1231 i64 pos = pos1;
1232 i64 nbytes_avail;
1233 i64 exthdr_bytes_consumed = 0;
1234 i64 fnlen = 0;
1235 UI attribs;
1236 UI hdr_checksum_reported = 0;
1237 u8 has_hdr_checksum = 0;
1238 int is_compressed;
1239 int ret;
1240 enum lha_whats_next_enum wn;
1241 u8 cmpr_meth_raw[5];
1242 int saved_indent_level;
1244 de_dbg_indent_save(c, &saved_indent_level);
1246 nbytes_avail = c->infile->len - pos1;
1247 wn = lha_classify_whats_next(c, d, pos1, nbytes_avail);
1248 if(wn!=LHA_WN_MEMBER) {
1249 if(d->member_count==0) {
1250 de_err(c, "Not a%s %s file",
1251 ((d->basefmt==BASEFMT_LHA || d->basefmt==BASEFMT_SWG)?"n":""),
1252 d->basefmt_name);
1254 else if(wn==LHA_WN_TRAILER || wn==LHA_WN_TRAILER_AND_JUNK) {
1255 d->trailer_found = 1;
1256 d->trailer_pos = pos1;
1257 de_dbg(c, "trailer at %"I64_FMT, d->trailer_pos);
1259 else if(wn==LHA_WN_JUNK) {
1260 de_warn(c, "%"I64_FMT" bytes of non-%s data found at end of file (offset %"I64_FMT")",
1261 nbytes_avail, d->basefmt_name, pos1);
1263 goto done;
1266 de_dbg(c, "member at %"I64_FMT, pos1);
1267 de_dbg_indent(c, 1);
1269 // Look ahead to figure out the header format version.
1270 // This byte was originally the high byte of the "MS-DOS file attribute" field,
1271 // which happened to always be zero.
1272 // In later LHA versions, it is overloaded to identify the header format
1273 // version (called "header level" in LHA jargon).
1274 if(d->basefmt==BASEFMT_SWG) {
1275 md->hlev = 0; // SWG is most similar to header level 0
1277 else if(d->basefmt==BASEFMT_PAKLEO) {
1278 md->hlev = 0; // hlev field is present, but we only support one format
1280 else {
1281 md->hlev = de_getbyte(pos1+20);
1283 de_dbg(c, "header level: %d", (int)md->hlev);
1284 if(md->hlev>3) {
1285 goto done; // Shouldn't be possible; checked in lha_classify_whats_next().
1288 if(d->member_count==0) {
1289 d->hlev_of_first_member = md->hlev;
1292 if(d->basefmt==BASEFMT_PAKLEO) {
1293 pos += 2; // What is this field?
1295 else if(md->hlev==0) {
1296 lev0_header_size = (i64)de_getbyte_p(&pos);
1297 de_dbg(c, "header size: (2+)%d", (int)lev0_header_size);
1298 hdr_checksum_reported = (UI)de_getbyte_p(&pos);
1299 has_hdr_checksum = 1;
1300 md->hdr_checksum_calc = lha_calc_checksum(c->infile, pos, lev0_header_size,
1301 d->crco_cksum);
1303 else if(md->hlev==1) {
1304 lev1_base_header_size = (i64)de_getbyte_p(&pos);
1305 de_dbg(c, "base header size: %d", (int)lev1_base_header_size);
1306 hdr_checksum_reported = (UI)de_getbyte_p(&pos);
1307 has_hdr_checksum = 1;
1308 md->hdr_checksum_calc = lha_calc_checksum(c->infile, pos, lev1_base_header_size,
1309 d->crco_cksum);
1311 else if(md->hlev==2) {
1312 lev2_total_header_size = de_getu16le_p(&pos);
1313 de_dbg(c, "total header size: %d", (int)lev2_total_header_size);
1315 else if(md->hlev==3) {
1316 i64 lev3_word_size;
1317 lev3_word_size = de_getu16le_p(&pos);
1318 de_dbg(c, "word size: %d", (int)lev3_word_size);
1319 if(lev3_word_size!=4) {
1320 de_err(c, "Unsupported word size: %d", (int)lev3_word_size);
1321 goto done;
1325 if(has_hdr_checksum) {
1326 de_dbg(c, "header checksum (reported): 0x%02x", hdr_checksum_reported);
1327 de_dbg(c, "header checksum (calculated): 0x%02x", (UI)md->hdr_checksum_calc);
1328 if(md->hdr_checksum_calc != hdr_checksum_reported) {
1329 de_err(c, "Wrong header checksum: reported=0x%02x, calculated=0x%02x",
1330 hdr_checksum_reported, md->hdr_checksum_calc);
1334 de_read(cmpr_meth_raw, pos, 5);
1335 md->cmi = de_malloc(c, sizeof(struct cmpr_meth_info));
1336 get_cmpr_meth_info(cmpr_meth_raw, d->basefmt, md->cmi);
1337 de_dbg(c, "cmpr method: '%s' (%s)", md->cmi->id_printable_sz, md->cmi->descr);
1338 pos+=5;
1340 if(md->cmi->uniq_id == CODE_lhd) {
1341 is_compressed = 0;
1342 md->is_dir = 1;
1344 else if(md->cmi->decompressor == decompress_uncompressed) {
1345 is_compressed = 0;
1347 else {
1348 is_compressed = 1;
1351 if(md->hlev==1) {
1352 // lev1_skip_size is the distance from the third byte of the extended
1353 // header section, to the end of the compressed data.
1354 lev1_skip_size = de_getu32le_p(&pos);
1355 de_dbg(c, "skip size: %u", (UI)lev1_skip_size);
1356 md->total_size = 2 + lev1_base_header_size + lev1_skip_size;
1358 else {
1359 md->compressed_data_len = de_getu32le(pos);
1360 de_dbg(c, "compressed size: %"I64_FMT, md->compressed_data_len);
1361 pos += 4;
1363 if(md->hlev==0 && d->basefmt!=BASEFMT_PAKLEO) {
1364 md->total_size = 2 + lev0_header_size + md->compressed_data_len;
1366 else if(md->hlev==2) {
1367 md->total_size = lev2_total_header_size + md->compressed_data_len;
1371 md->orig_size = de_getu32le(pos);
1372 de_dbg(c, "original size: %u", (UI)md->orig_size);
1373 pos += 4;
1375 if(md->hlev==0 || md->hlev==1) {
1376 read_msdos_modtime(c, d, md, pos, "last-modified");
1377 pos += 4; // modification time/date (MS-DOS)
1379 else if(md->hlev==2 || md->hlev==3) {
1380 read_unix_timestamp(c, d, md, pos, DE_TIMESTAMPIDX_MODIFY, "last-modified");
1381 pos += 4; // Unix time
1384 if(md->hlev==0) {
1385 de_ucstring *attr_descr;
1387 // Normally, the high byte can only be 0 here, because it's
1388 // also the header level.
1389 attribs = (UI)de_getu16le_p(&pos);
1391 attr_descr = ucstring_create(c);
1392 de_describe_dos_attribs(c, attribs, attr_descr, 0);
1393 de_dbg(c, "attribs: 0x%04x (%s)", attribs, ucstring_getpsz_d(attr_descr));
1394 ucstring_destroy(attr_descr);
1396 else {
1397 attribs = (UI)de_getbyte_p(&pos);
1398 de_dbg(c, "obsolete attribs low byte: 0x%02x", attribs);
1399 pos++; // header level, already handled
1402 if(d->basefmt==BASEFMT_SWG) {
1403 do_special_swg_fields(c, d, md, pos);
1404 pos += 165;
1407 if(d->basefmt==BASEFMT_PAKLEO) {
1408 md->crc_reported = (u32)de_getu32le_p(&pos);
1409 de_dbg(c, "crc32 (reported): 0x%08x", (UI)md->crc_reported);
1412 if(md->hlev<=1) {
1413 fnlen = de_getbyte(pos++);
1414 de_dbg(c, "filename len: %d", (int)fnlen);
1415 if(md->hlev==0) {
1416 read_filename_hlev0(c, d, md, pos, fnlen);
1418 else {
1419 read_filename_hlev1_or_exthdr(c, d, md, pos, fnlen);
1421 pos += fnlen;
1424 if(d->basefmt!=BASEFMT_PAKLEO) {
1425 md->crc_reported = (u32)de_getu16le_p(&pos);
1426 de_dbg(c, "crc16 (reported): 0x%04x", (UI)md->crc_reported);
1429 if(md->hlev==1 || md->hlev==2 || md->hlev==3) {
1430 md->os_id = de_getbyte_p(&pos);
1431 de_dbg(c, "OS id: %u ('%c') (%s)", (UI)md->os_id,
1432 de_byte_to_printable_char(md->os_id), get_os_name(md->os_id));
1435 if(md->hlev==3) {
1436 lev3_header_size = de_getu32le_p(&pos);
1437 md->total_size = lev3_header_size + md->compressed_data_len;
1440 if(d->basefmt==BASEFMT_PAKLEO) {
1441 md->compressed_data_pos = pos;
1442 md->total_size = md->compressed_data_pos + md->compressed_data_len - md->member_pos;
1444 else if(md->hlev==0) {
1445 i64 ext_headers_size = (2+lev0_header_size) - (pos-pos1);
1446 md->compressed_data_pos = pos1 + 2 + lev0_header_size;
1447 if(ext_headers_size>0) {
1448 de_dbg(c, "extended header area at %"I64_FMT", len=%"I64_FMT, pos, ext_headers_size);
1449 de_dbg_indent(c, 1);
1450 do_lev0_ext_area(c, d, md, pos, ext_headers_size);
1451 de_dbg_indent(c, -1);
1454 else if(md->hlev==1) {
1455 i64 first_ext_hdr_size;
1457 // The last two bytes of the base header are the size of the first ext. header.
1458 pos = pos1 + 2 + lev1_base_header_size - 2;
1459 // TODO: sanitize pos?
1460 first_ext_hdr_size = de_getu16le_p(&pos);
1461 de_dbg(c, "first ext hdr size: %"I64_FMT, first_ext_hdr_size);
1463 ret = do_read_ext_headers(c, d, md, pos, lev1_skip_size, first_ext_hdr_size,
1464 &exthdr_bytes_consumed);
1466 if(!ret) {
1467 de_err(c, "Error parsing extended headers at %"I64_FMT". Cannot extract this file.",
1468 pos);
1469 retval = 1;
1470 goto done;
1473 pos += exthdr_bytes_consumed;
1474 md->compressed_data_pos = pos;
1475 md->compressed_data_len = lev1_skip_size - exthdr_bytes_consumed;
1477 else if(md->hlev==2) {
1478 i64 first_ext_hdr_size;
1480 if(md->os_id=='K') {
1481 // So that some lhasa test files will work.
1482 // TODO: The extended headers section is (usually?) self-terminating, so we
1483 // should be able to parse it and figure out if this bug is present. That
1484 // would be better than just guessing.
1485 lev2_total_header_size += 2;
1486 md->total_size = lev2_total_header_size + md->compressed_data_len;
1487 de_dbg(c, "attempting bug workaround: changing total header size to %"I64_FMT,
1488 lev2_total_header_size);
1491 md->compressed_data_pos = pos1+lev2_total_header_size;
1493 first_ext_hdr_size = de_getu16le_p(&pos);
1494 de_dbg(c, "first ext hdr size: %"I64_FMT, first_ext_hdr_size);
1496 do_read_ext_headers(c, d, md, pos, pos1+lev2_total_header_size-pos,
1497 first_ext_hdr_size, &exthdr_bytes_consumed);
1499 else if(md->hlev==3) {
1500 i64 first_ext_hdr_size;
1502 md->compressed_data_pos = pos1+lev3_header_size;
1504 first_ext_hdr_size = de_getu32le_p(&pos);
1505 de_dbg(c, "first ext hdr size: %"I64_FMT, first_ext_hdr_size);
1507 do_read_ext_headers(c, d, md, pos, pos1+lev3_header_size-pos,
1508 first_ext_hdr_size, &exthdr_bytes_consumed);
1511 do_check_header_crc(c, d, md);
1513 de_dbg(c, "member data (%scompressed) at %"I64_FMT", len=%"I64_FMT,
1514 is_compressed?"":"un",
1515 md->compressed_data_pos, md->compressed_data_len);
1517 make_fullfilename(c, d, md);
1519 de_dbg_indent(c, 1);
1520 do_extract_file(c, d, md);
1521 de_dbg_indent(c, -1);
1523 retval = 1;
1524 done:
1525 de_dbg_indent_restore(c, saved_indent_level);
1526 return retval;
1529 static void do_swg_footer(deark *c, lctx *d, i64 pos1)
1531 i64 pos = pos1;
1532 i64 n;
1533 de_ucstring *s = NULL;
1535 de_dbg(c, "SWG footer at %"I64_FMT, pos1);
1536 de_dbg_indent(c, 1);
1537 s = ucstring_create(c);
1538 do_swg_string_field(c, d, s, pos, 61, "message");
1539 pos += 61;
1540 do_swg_string_field(c, d, s, pos, 66, "title");
1541 pos += 66;
1542 n = de_getu16le_p(&pos);
1543 de_dbg(c, "SWG number of items: %d", (int)n);
1544 de_dbg_indent(c, -1);
1545 ucstring_destroy(s);
1548 static void do_lha_footer(deark *c, lctx *d)
1550 i64 extra_bytes_pos, extra_bytes_len;
1552 if(!d->trailer_found) goto done;
1553 extra_bytes_pos = d->trailer_pos+1;
1554 extra_bytes_len = c->infile->len - extra_bytes_pos;
1555 if(extra_bytes_len<=1) goto done;
1557 if(d->basefmt==BASEFMT_SWG && extra_bytes_len==129) {
1558 do_swg_footer(c, d, extra_bytes_pos);
1559 goto done;
1562 de_info(c, "Note: %"I64_FMT" extra bytes at end of file (offset %"I64_FMT")",
1563 extra_bytes_len, extra_bytes_pos);
1564 done:
1568 static lctx *lha_create_lctx(deark *c)
1570 lctx *d;
1572 d = de_malloc(c, sizeof(lctx));
1573 return d;
1576 static void lha_destroy_lctx(deark *c, lctx *d)
1578 if(!d) return;
1579 de_crcobj_destroy(d->crco);
1580 de_crcobj_destroy(d->crco_cksum);
1581 de_free(c, d);
1584 static void do_run_lha_internal(deark *c, lctx *d, de_module_params *mparams)
1586 i64 pos;
1587 struct member_data *md = NULL;
1589 if(!d->basefmt_name) {
1590 d->basefmt_name = "LHA";
1592 d->lhark_req = de_get_ext_option_bool(c, "lha:lhark", -1);
1593 d->lhark_policy = d->lhark_req;
1595 // It's not really safe to guess CP437, because Japanese-encoded (CP932?)
1596 // filenames are common.
1597 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_ASCII);
1599 d->hlev_of_first_member = 0xff;
1600 if(d->basefmt==BASEFMT_PAKLEO) {
1601 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC32_PL);
1603 else {
1604 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
1606 d->crco_cksum = de_crcobj_create(c, DE_CRCOBJ_SUM_BYTES);
1608 pos = 0;
1609 if(d->basefmt==BASEFMT_PAKLEO) pos += 37;
1611 while(1) {
1612 if(pos >= c->infile->len) break;
1614 md = de_malloc(c, sizeof(struct member_data));
1615 md->encoding = d->input_encoding;
1616 md->member_pos = pos;
1617 if(!do_read_member(c, d, md)) goto done;
1618 if(md->total_size<1) goto done;
1620 d->member_count++;
1621 pos += md->total_size;
1623 destroy_member_data(c, md);
1624 md = NULL;
1627 done:
1628 do_lha_footer(c, d);
1629 destroy_member_data(c, md);
1632 static void de_run_lha(deark *c, de_module_params *mparams)
1634 lctx *d;
1636 d = lha_create_lctx(c);
1637 d->basefmt = BASEFMT_LHA;
1638 do_run_lha_internal(c, d, mparams);
1639 lha_destroy_lctx(c, d);
1642 static int is_swg_sig(const u8 *b)
1644 return b[0]=='-' && b[1]=='s' && b[2]=='w' &&
1645 (b[3]=='0' || b[3]=='1') && b[4]=='-';
1648 static int de_identify_lha(deark *c)
1650 int has_ext = 0;
1651 u8 b[22];
1652 struct cmpr_meth_info cmi;
1654 de_read(b, 0, sizeof(b));
1655 if(b[20]>3) return 0; // header level
1657 if(!is_possible_cmpr_meth(&b[2])) return 0;
1658 if(is_swg_sig(&b[2])) return 0; // Handled by the swg module
1660 if(b[20]==0) {
1661 if(b[0]<22) return 0;
1662 if(22 + (int)b[21] + 2 > 2 + (int)b[0]) return 0;
1664 else if(b[20]==1) {
1665 if(b[0]<25) return 0;
1666 if(22 + (int)b[21] + 5 > 2 + (int)b[0]) return 0;
1668 else if(b[20]==2) {
1669 i64 hsize = de_getu16le_direct(&b[0]);
1670 if(hsize < 26) return 0;
1672 else if(b[20]==3) {
1673 if((b[0]!=4 && b[0]!=8) || b[1]!=0) return 0;
1676 de_zeromem(&cmi, sizeof(struct cmpr_meth_info));
1677 get_cmpr_meth_info(&b[2], BASEFMT_LHA, &cmi);
1678 if(!cmi.is_recognized) {
1679 return 0;
1682 if(de_input_file_has_ext(c, "lzh") ||
1683 de_input_file_has_ext(c, "lha") ||
1684 ((b[4]=='z') && de_input_file_has_ext(c, "lzs")))
1686 has_ext = 1;
1689 if(has_ext) return 100;
1690 return 80; // Must be less than car_lha
1693 static void de_help_lha(deark *c)
1695 de_msg(c, "-opt lha:lhark=<0|1> : LHARK mode (for 'lh7' compression)");
1698 void de_module_lha(deark *c, struct deark_module_info *mi)
1700 mi->id = "lha";
1701 mi->desc = "LHA/LZH/PMA archive";
1702 mi->run_fn = de_run_lha;
1703 mi->identify_fn = de_identify_lha;
1704 mi->help_fn = de_help_lha;
1707 /////////////////////// SWG / SWAG
1709 // This module works almost just like lha, except that all members are assumed
1710 // to use the SWG header format. (For lha, the SWG header format is never used.)
1712 static void de_run_swg(deark *c, de_module_params *mparams)
1714 lctx *d;
1716 d = lha_create_lctx(c);
1717 d->basefmt = BASEFMT_SWG;
1718 d->basefmt_name = "SWG";
1719 de_declare_fmt(c, "SWAG packet");
1720 do_run_lha_internal(c, d, mparams);
1721 lha_destroy_lctx(c, d);
1724 static int de_identify_swg(deark *c)
1726 u8 b[5];
1728 de_read(b, 2, sizeof(b));
1729 if(is_swg_sig(b)) {
1730 if(de_input_file_has_ext(c, "swg")) return 100;
1731 return 90;
1733 return 0;
1736 void de_module_swg(deark *c, struct deark_module_info *mi)
1738 mi->id = "swg";
1739 mi->desc = "SWAG packet";
1740 mi->run_fn = de_run_swg;
1741 mi->identify_fn = de_identify_swg;
1742 mi->flags |= DE_MODFLAG_WARNPARSEONLY;
1745 /////////////////////// PAKLEO
1747 static void de_run_pakleo(deark *c, de_module_params *mparams)
1749 lctx *d;
1751 d = lha_create_lctx(c);
1752 d->basefmt = BASEFMT_PAKLEO;
1753 d->basefmt_name = "PAKLEO";
1754 de_declare_fmt(c, "PAKLEO");
1755 do_run_lha_internal(c, d, mparams);
1756 lha_destroy_lctx(c, d);
1759 static int de_identify_pakleo(deark *c)
1761 if(dbuf_memcmp(c->infile, 0, "LEOLZW", 6)) return 0;
1762 if(dbuf_memcmp(c->infile, 39, "-l", 2)) return 0;
1763 return 100;
1766 void de_module_pakleo(deark *c, struct deark_module_info *mi)
1768 mi->id = "pakleo";
1769 mi->desc = "PAKLEO archive";
1770 mi->run_fn = de_run_pakleo;
1771 mi->identify_fn = de_identify_pakleo;
1774 /////////////////////// CAR (MylesHi!)
1776 struct car_member_data {
1777 i64 member_pos;
1778 i64 total_size;
1779 u32 hdr_checksum_calc;
1782 struct car_ctx {
1783 dbuf *hdr_tmp;
1784 dbuf *lha_outf;
1785 struct de_crcobj *crco_cksum;
1788 static int looks_like_car_member(deark *c, i64 pos)
1790 u8 b[16];
1792 de_read(b, pos, 16);
1793 if(b[2]!='-' || b[3]!='l'|| b[4]!='h' || b[6]!='-') return 0;
1794 if(b[5]!='0' && b[5]!='5') return 0;
1795 if((int)b[0] != (int)b[15] + 25) return 0;
1796 if(dbuf_memcmp(c->infile, pos + (i64)b[15] + 24, (const u8*)"\x20\x00\x00", 3)) return 0;
1797 return 1;
1800 static int do_car_member(deark *c, struct car_ctx *d, struct car_member_data *md)
1802 i64 lev1_base_header_size;
1803 i64 fnlen;
1804 i64 hdr_endpos;
1805 i64 compressed_data_len;
1806 i64 pos1 = md->member_pos;
1807 int retval = 0;
1808 int saved_indent_level;
1810 de_dbg_indent_save(c, &saved_indent_level);
1811 de_dbg(c, "member at %"I64_FMT, pos1);
1812 de_dbg_indent(c, 1);
1814 // Figure out where everything is...
1815 lev1_base_header_size = (i64)de_getbyte(pos1);
1816 de_dbg(c, "base header size: %d", (int)lev1_base_header_size);
1817 hdr_endpos = pos1 + 2 + lev1_base_header_size;
1818 fnlen = lev1_base_header_size - 25;
1819 de_dbg(c, "implied filename len: %d", (int)fnlen);
1820 if(fnlen<0) goto done;
1822 compressed_data_len = de_getu32le(pos1 + 7);
1823 de_dbg(c, "compressed size: %"I64_FMT, compressed_data_len);
1824 if(hdr_endpos + compressed_data_len > c->infile->len) goto done;
1826 // Convert to an LHA level-1 header
1827 dbuf_empty(d->hdr_tmp);
1829 // Fields through uncmpr_size are the same (we'll patch the checksum later)
1830 dbuf_copy(c->infile, pos1, 15, d->hdr_tmp);
1832 dbuf_copy(c->infile, hdr_endpos-7, 4, d->hdr_tmp); // timestamp
1834 // attribute (low byte)
1835 dbuf_copy(c->infile, hdr_endpos-9, 1, d->hdr_tmp);
1836 dbuf_writebyte(d->hdr_tmp, 0x01); // level identifier
1838 // Fields starting with filename length, through crc
1839 dbuf_copy(c->infile, pos1+15, 1+fnlen+2, d->hdr_tmp);
1841 dbuf_writebyte(d->hdr_tmp, 77); // OS ID = 'M' = MS-DOS
1843 // Recalculate checksum
1844 md->hdr_checksum_calc = lha_calc_checksum(d->hdr_tmp, 2, lev1_base_header_size, d->crco_cksum);
1845 de_dbg(c, "header checksum (calculated): 0x%02x", (UI)md->hdr_checksum_calc);
1846 dbuf_writebyte_at(d->hdr_tmp, 1, (u8)md->hdr_checksum_calc);
1847 dbuf_truncate(d->hdr_tmp, 2+lev1_base_header_size);
1849 // Write everything out
1850 dbuf_copy(d->hdr_tmp, 0, d->hdr_tmp->len, d->lha_outf);
1851 de_dbg(c, "member data at %"I64_FMT", len=%"I64_FMT, hdr_endpos, compressed_data_len);
1852 dbuf_copy(c->infile, hdr_endpos, compressed_data_len, d->lha_outf);
1853 md->total_size = (hdr_endpos-md->member_pos) + compressed_data_len;
1854 retval = 1;
1856 done:
1857 de_dbg_indent_restore(c, saved_indent_level);
1858 return retval;
1861 static void de_run_car_lha(deark *c, de_module_params *mparams)
1863 struct car_ctx *d = NULL;
1864 struct car_member_data *md = NULL;
1865 int ok = 0;
1866 i64 pos = 0;
1868 d = de_malloc(c, sizeof(struct car_ctx));
1870 if(!looks_like_car_member(c, 0)) {
1871 de_err(c, "Not a CAR file");
1872 goto done;
1875 d->crco_cksum = de_crcobj_create(c, DE_CRCOBJ_SUM_BYTES);
1876 d->lha_outf = dbuf_create_output_file(c, "lha", NULL, 0);
1877 d->hdr_tmp = dbuf_create_membuf(c, 0, 0);
1879 md = de_malloc(c, sizeof(struct car_member_data));
1880 while(1) {
1881 if(de_getbyte(pos)==0) {
1882 de_dbg(c, "trailer at %"I64_FMT, pos);
1883 dbuf_writebyte(d->lha_outf, 0);
1884 ok = 1;
1885 break;
1887 if(pos+27 > c->infile->len) goto done;
1888 if(!looks_like_car_member(c, pos)) goto done;
1890 de_zeromem(md, sizeof(struct car_member_data));
1891 md->member_pos = pos;
1892 if(!do_car_member(c, d, md)) goto done;
1893 pos += md->total_size;
1896 done:
1897 de_free(c, md);
1898 if(d) {
1899 if(d->lha_outf) {
1900 de_crcobj_destroy(d->crco_cksum);
1901 dbuf_close(d->lha_outf);
1902 if(!ok) {
1903 de_err(c, "Conversion to LHA format failed");
1906 dbuf_close(d->hdr_tmp);
1907 de_free(c, d);
1911 static int de_identify_car_lha(deark *c)
1913 if(!de_input_file_has_ext(c, "car")) return 0;
1914 if(looks_like_car_member(c, 0)) {
1915 return 95;
1917 return 0;
1920 void de_module_car_lha(deark *c, struct deark_module_info *mi)
1922 mi->id = "car_lha";
1923 mi->desc = "CAR (MylesHi!) LHA-like archive";
1924 mi->run_fn = de_run_car_lha;
1925 mi->identify_fn = de_identify_car_lha;
1928 /////////////////////// ARX
1930 struct arx_member_data {
1931 i64 member_pos;
1932 i64 total_size;
1933 i64 hdr_endpos;
1934 i64 compressed_data_len;
1935 i64 unc_data_len;
1936 int is_uncompressed;
1937 u32 crc_calc;
1938 u32 hdr_checksum_calc;
1941 struct arx_ctx {
1942 dbuf *hdr_tmp;
1943 dbuf *lha_outf;
1944 struct de_crcobj *crco;
1945 struct de_crcobj *crco_cksum;
1948 static int looks_like_arx_member(deark *c, i64 pos)
1950 u8 b[22];
1952 de_read(b, pos, sizeof(b));
1953 if(b[2]!='-' || b[3]!='l'|| b[4]!='h' || b[6]!='-') return 0;
1954 if(b[21]!=0) return 0;
1955 return 1;
1958 // Decompress the file, discarding the output, just to figure out the CRC.
1959 static void arx_recalc_lh1(deark *c, struct arx_ctx *d, struct arx_member_data *md)
1961 dbuf *outf = NULL;
1962 struct de_dfilter_in_params dcmpri;
1963 struct de_dfilter_out_params dcmpro;
1964 struct de_dfilter_results dres;
1966 outf = dbuf_create_custom_dbuf(c, md->unc_data_len, 0);
1967 dbuf_set_writelistener(outf, de_writelistener_for_crc, (void*)d->crco);
1969 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
1970 dcmpri.f = c->infile;
1971 dcmpri.pos = md->hdr_endpos;
1972 dcmpri.len = md->compressed_data_len;
1973 dcmpro.f = outf;
1974 dcmpro.expected_len = md->unc_data_len;
1975 dcmpro.len_known = 1;
1977 fmtutil_lh1_codectype1(c, &dcmpri, &dcmpro, &dres, NULL);
1979 dbuf_close(outf);
1982 static void arx_recalc_crc(deark *c, struct arx_ctx *d, struct arx_member_data *md)
1984 de_crcobj_reset(d->crco);
1985 if(md->is_uncompressed) {
1986 de_crcobj_addslice(d->crco, c->infile, md->hdr_endpos, md->compressed_data_len);
1988 else {
1989 arx_recalc_lh1(c, d, md);
1991 md->crc_calc = de_crcobj_getval(d->crco);
1994 static int do_arx_member(deark *c, struct arx_ctx *d, struct arx_member_data *md)
1996 i64 lev0_header_size;
1997 i64 pos1 = md->member_pos;
1998 u8 extra_crc_byte;
1999 int retval = 0;
2000 int saved_indent_level;
2002 de_dbg_indent_save(c, &saved_indent_level);
2003 de_dbg(c, "member at %"I64_FMT, pos1);
2004 de_dbg_indent(c, 1);
2006 lev0_header_size = (i64)de_getbyte(pos1);
2007 de_dbg(c, "header size: %d", (int)lev0_header_size);
2008 if(lev0_header_size<22) goto done;
2009 md->hdr_endpos = pos1 + 2 + lev0_header_size;
2011 md->compressed_data_len = de_getu32le(pos1+8);
2012 de_dbg(c, "compressed size: %"I64_FMT, md->compressed_data_len);
2014 md->unc_data_len = de_getu32le(pos1+12);
2015 de_dbg(c, "uncmpr. size: %"I64_FMT, md->unc_data_len);
2017 if(md->compressed_data_len==0) {
2018 md->is_uncompressed = 1;
2019 md->compressed_data_len = md->unc_data_len;
2022 if(md->hdr_endpos + md->compressed_data_len > c->infile->len) goto done;
2024 // Just the first byte of the CRC is present, and apparently not even
2025 // that for non-compressed files.
2026 extra_crc_byte = de_getbyte(md->hdr_endpos-1);
2027 de_dbg(c, "crc (reported): 0x??%02x", (UI)extra_crc_byte);
2029 // Find the correct CRC of the file data.
2030 arx_recalc_crc(c, d, md);
2031 de_dbg(c, "crc (calculated): 0x%04x", (UI)md->crc_calc);
2032 if(!md->is_uncompressed) {
2033 if((u8)(md->crc_calc & 0xff) != extra_crc_byte) {
2034 de_warn(c, "CRC mismatch. Conversion to LHA may have failed.");
2038 // Convert to an LHA header
2039 dbuf_empty(d->hdr_tmp);
2041 // Fields through cmpr meth. (We'll patch the checksum, and
2042 // compression method if necessary, later.)
2043 dbuf_copy(c->infile, pos1, 7, d->hdr_tmp);
2045 dbuf_writeu32le(d->hdr_tmp, md->compressed_data_len);
2046 dbuf_writeu32le(d->hdr_tmp, md->unc_data_len);
2048 /// This part of the header can be copied as-is.
2049 dbuf_copy(c->infile, pos1+8+8, lev0_header_size-1-6-8, d->hdr_tmp);
2051 // CRC
2052 dbuf_writebyte(d->hdr_tmp, (u8)(md->crc_calc & 0xff));
2053 dbuf_writebyte(d->hdr_tmp, (u8)((md->crc_calc & 0xff00)>>8));
2055 if(md->is_uncompressed) {
2056 dbuf_writebyte_at(d->hdr_tmp, 5, '0'); // lh1 -> lh0
2059 // Recalculate header checksum
2060 md->hdr_checksum_calc = lha_calc_checksum(d->hdr_tmp, 2, lev0_header_size,
2061 d->crco_cksum);
2062 de_dbg(c, "header checksum (calculated): 0x%02x", (UI)md->hdr_checksum_calc);
2063 dbuf_writebyte_at(d->hdr_tmp, 1, (u8)md->hdr_checksum_calc);
2064 dbuf_truncate(d->hdr_tmp, 2+lev0_header_size);
2066 // Write everything out
2067 dbuf_copy(d->hdr_tmp, 0, d->hdr_tmp->len, d->lha_outf);
2068 de_dbg(c, "member data at %"I64_FMT", len=%"I64_FMT, md->hdr_endpos, md->compressed_data_len);
2069 dbuf_copy(c->infile, md->hdr_endpos, md->compressed_data_len, d->lha_outf);
2070 md->total_size = 2 + lev0_header_size + md->compressed_data_len;
2071 retval = 1;
2073 done:
2074 de_dbg_indent_restore(c, saved_indent_level);
2075 return retval;
2078 static void de_run_arx(deark *c, de_module_params *mparams)
2080 struct arx_ctx *d = NULL;
2081 struct arx_member_data *md = NULL;
2082 int ok = 0;
2083 i64 pos = 0;
2085 d = de_malloc(c, sizeof(struct arx_ctx));
2087 if(!looks_like_arx_member(c, 0)) {
2088 de_err(c, "Not an ARX file");
2089 goto done;
2092 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
2093 d->crco_cksum = de_crcobj_create(c, DE_CRCOBJ_SUM_BYTES);
2094 d->lha_outf = dbuf_create_output_file(c, "lha", NULL, 0);
2095 d->hdr_tmp = dbuf_create_membuf(c, 0, 0);
2097 md = de_malloc(c, sizeof(struct arx_member_data));
2098 while(1) {
2099 if(de_getbyte(pos)==0) {
2100 de_dbg(c, "trailer at %"I64_FMT, pos);
2101 dbuf_writebyte(d->lha_outf, 0);
2102 ok = 1;
2103 break;
2105 if(pos+27 > c->infile->len) goto done;
2106 if(!looks_like_arx_member(c, pos)) goto done;
2108 de_zeromem(md, sizeof(struct arx_member_data));
2109 md->member_pos = pos;
2110 if(!do_arx_member(c, d, md)) goto done;
2111 pos += md->total_size;
2114 done:
2115 de_free(c, md);
2116 if(d) {
2117 if(d->lha_outf) {
2118 dbuf_close(d->lha_outf);
2119 if(!ok) {
2120 de_err(c, "Conversion to LHA format failed");
2123 dbuf_close(d->hdr_tmp);
2124 de_crcobj_destroy(d->crco);
2125 de_crcobj_destroy(d->crco_cksum);
2126 de_free(c, d);
2130 static int de_identify_arx(deark *c)
2132 if(dbuf_memcmp(c->infile, 2, "-lh1-", 5)) return 0;
2133 if(de_getbyte(20)!=0x20 || de_getbyte(21)!=0x00) return 0;
2134 if(de_input_file_has_ext(c, "arx")) return 100;
2135 return 70;
2138 void de_module_arx(deark *c, struct deark_module_info *mi)
2140 mi->id = "arx";
2141 mi->desc = "ARX LHA-like archive";
2142 mi->run_fn = de_run_arx;
2143 mi->identify_fn = de_identify_arx;
2147 /////////////////////// ar (Haruhiko Okumura) version "ar001"
2149 static void do_check_ar001_header_crc(deark *c, lctx *d, struct member_data *md, i64 basic_hdr_size)
2151 //if(!md->have_hdr_crc_reported) return;
2152 de_crcobj_reset(d->crco);
2154 // Everything before the CRC field:
2155 de_crcobj_addslice(d->crco, c->infile, md->member_pos+2, basic_hdr_size);
2157 md->hdr_crc_calc = de_crcobj_getval(d->crco);
2158 de_dbg(c, "header crc (calculated): 0x%04x", (UI)md->hdr_crc_calc);
2159 if(md->hdr_crc_calc != md->hdr_crc_reported) {
2160 de_err(c, "Wrong header CRC: reported=0x%04x, calculated=0x%04x",
2161 (UI)md->hdr_crc_reported, (UI)md->hdr_crc_calc);
2165 // Caller allocates and initializes md.
2166 // If the member was successfully parsed, sets md->total_size and returns nonzero.
2167 static int do_read_ar001_member(deark *c, lctx *d, struct member_data *md)
2169 int saved_indent_level;
2170 int retval = 0;
2171 i64 pos1 = md->member_pos;
2172 i64 pos = pos1;
2173 i64 basic_hdr_size;
2174 i64 fnlen;
2175 i64 first_ext_hdr_size;
2176 UI cmpr_method;
2177 u8 cmpr_meth_raw[5];
2179 de_dbg_indent_save(c, &saved_indent_level);
2180 de_dbg(c, "member at %"I64_FMT, pos1);
2181 de_dbg_indent(c, 1);
2183 md->hlev = 0; // (hack)
2184 basic_hdr_size = de_getu16le_p(&pos);
2185 if(basic_hdr_size==0) {
2186 de_dbg(c, "end of archive");
2187 goto done;
2189 de_dbg(c, "basic header size: %u", (UI)basic_hdr_size);
2190 if(basic_hdr_size<18) goto done;
2191 fnlen = basic_hdr_size-18;
2193 cmpr_method = (UI)de_getu16le_p(&pos);
2195 de_zeromem(cmpr_meth_raw, 5);
2196 switch(cmpr_method) {
2197 case 0: de_memcpy(cmpr_meth_raw, (const void*)"-lh0-", 5); break; // (hack)
2198 case 1: de_memcpy(cmpr_meth_raw, (const void*)"-lh4-", 5); break; // ...
2200 md->cmi = de_malloc(c, sizeof(struct cmpr_meth_info));
2201 get_cmpr_meth_info(cmpr_meth_raw, BASEFMT_LHA, md->cmi);
2202 de_dbg(c, "cmpr method: %u (%s)", cmpr_method, md->cmi->descr);
2204 pos += 1; // file type
2206 // timestamp: This is the time the file was added to the archive, not its
2207 // last-modified timestamp, so it's not very useful.
2208 pos += 5;
2210 md->compressed_data_len = de_getu32le_p(&pos);
2211 de_dbg(c, "compressed size: %"I64_FMT, md->compressed_data_len);
2213 md->orig_size = de_getu32le_p(&pos);
2214 de_dbg(c, "original size: %"I64_FMT, md->orig_size);
2216 md->crc_reported = (u32)de_getu16le_p(&pos);
2217 de_dbg(c, "crc16 (reported): 0x%04x", (UI)md->crc_reported);
2219 read_filename_hlev0(c, d, md, pos, fnlen);
2220 pos += fnlen;
2221 make_fullfilename(c, d, md);
2223 md->hdr_crc_reported = (UI)de_getu16le_p(&pos);
2224 de_dbg(c, "basic header crc (reported): 0x%04x", (UI)md->hdr_crc_reported);
2226 do_check_ar001_header_crc(c, d, md, basic_hdr_size);
2228 first_ext_hdr_size = de_getu16le_p(&pos);
2229 de_dbg(c, "first ext header size: %"I64_FMT, first_ext_hdr_size);
2230 if(first_ext_hdr_size) {
2231 // The ar001 software never uses this feature, so I'm not going to try to
2232 // support it.
2233 de_err(c, "Files with extended headers aren't supported");
2234 goto done;
2237 md->total_size = pos + md->compressed_data_len - pos1;
2238 retval = 1;
2240 md->compressed_data_pos = pos;
2242 de_dbg(c, "member data at %"I64_FMT", len=%"I64_FMT,
2243 md->compressed_data_pos, md->compressed_data_len);
2245 if(!md->cmi->decompressor) {
2246 de_err(c, "%s: Unsupported compression method: %u",
2247 ucstring_getpsz_d(md->fullfilename), cmpr_method);
2248 goto done;
2251 de_dbg_indent(c, 1);
2252 do_extract_file(c, d, md);
2254 done:
2255 de_dbg_indent_restore(c, saved_indent_level);
2256 return retval;
2259 static void de_run_ar001(deark *c, de_module_params *mparams)
2261 lctx *d = NULL;
2262 i64 pos;
2263 struct member_data *md = NULL;
2265 d = lha_create_lctx(c);
2266 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_ASCII);
2268 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_IBMSDLC);
2270 pos = 0;
2271 while(1) {
2272 if(pos >= c->infile->len) break;
2274 md = de_malloc(c, sizeof(struct member_data));
2275 md->encoding = d->input_encoding;
2276 md->member_pos = pos;
2277 if(!do_read_ar001_member(c, d, md)) goto done;
2278 if(md->total_size<1) goto done;
2280 d->member_count++;
2281 pos += md->total_size;
2283 destroy_member_data(c, md);
2284 md = NULL;
2287 done:
2288 destroy_member_data(c, md);
2289 lha_destroy_lctx(c, d);
2292 static int slice_is_printable_ascii(dbuf *f, i64 pos, i64 len)
2294 i64 i;
2296 for(i=0; i<len; i++) {
2297 u8 b;
2299 b = dbuf_getbyte(f, pos+i);
2300 if(b<32 || b>126) return 0;
2302 return 1;
2305 static int de_identify_ar001(deark *c)
2307 i64 n;
2308 i64 bhlen;
2309 i64 nlen;
2310 u32 bhcrc_r, bhcrc_c;
2311 u8 b;
2312 int conf = 0;
2313 struct de_crcobj *crco = NULL;
2315 bhlen = de_getu16le(0); // basic header size
2316 if(bhlen<(18+1) || bhlen>(18+1024)) goto done;
2317 if(c->infile->len < bhlen+8) goto done;
2318 nlen = bhlen-18;
2319 n = de_getu16le(2); // cmpr method
2320 if(n>1) goto done;
2321 b = de_getbyte(4); // file type
2322 if(b!=0) goto done; // 1 (text) is also defined, but not used by ar001
2323 n = de_getu16le(2+bhlen+2); // first ext hdr len
2324 if(n!=0) goto done;
2325 if(!slice_is_printable_ascii(c->infile, 20, nlen)) goto done;
2326 bhcrc_r = (u32)de_getu16le(2+bhlen);
2327 crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_IBMSDLC);
2328 de_crcobj_addslice(crco, c->infile, 2, bhlen);
2329 bhcrc_c = de_crcobj_getval(crco);
2330 if(bhcrc_c == bhcrc_r) conf = 91;
2332 done:
2333 if(crco) de_crcobj_destroy(crco);
2334 return conf;
2337 void de_module_ar001(deark *c, struct deark_module_info *mi)
2339 mi->id = "ar001";
2340 mi->desc = "ar001 archive (Okumura)";
2341 mi->run_fn = de_run_ar001;
2342 mi->identify_fn = de_identify_ar001;
2345 // **************************************************************************
2346 // LHarc & LArc SFX - COM format
2347 // **************************************************************************
2348 // Note that EXE SFX format is handled by the exe module.
2350 struct lhasfx_context {
2351 u8 errflag;
2352 u8 need_errmsg;
2353 int sfx_container_is_larc;
2354 i64 payload_offs;
2357 static int looks_like_lharc_sfx_com(deark *c, int *is_larc)
2359 u64 v;
2360 i64 pos = 0;
2361 u8 b;
2363 b = de_getbyte_p(&pos);
2364 if(b!=0xeb) return 0;
2365 b = de_getbyte_p(&pos);
2366 pos += (i64)b;
2367 // I don't know how good this test is, but I don't trust the text signature.
2368 // It's not formatted consistently. And because the source code was released,
2369 // who knows what weirdness is out there?
2370 if(b==0x60 || b==0x6c) {
2371 v = dbuf_getu64be(c->infile, pos);
2372 if(v==0xfcbc0001bb0601e8ULL) {
2373 return 1;
2376 else if(b==0x1c) {
2377 v = dbuf_getu64be(c->infile, pos);
2378 if(v==0xfc8cc8030602018eULL) {
2379 *is_larc = 1;
2380 return 1;
2384 return 0;
2387 // Probe for LHarc (v1.x) or LArc data
2388 static int is_lharc_data_at(deark *c, i64 pos, i64 *pfoundpos)
2390 u8 b[5];
2392 if(pos+21 > c->infile->len) return 0;
2393 de_read(b, pos+2, sizeof(b));
2394 if(b[0]!='-' || b[1]!='l' || b[4]!='-') return 0;
2395 if(b[2]!='h' && b[2]!='z') return 0;
2396 *pfoundpos = pos;
2397 return 1;
2400 static void lhasfx_find_payload(deark *c, struct lhasfx_context *d)
2402 if(d->sfx_container_is_larc) {
2403 if(is_lharc_data_at(c, 594, &d->payload_offs)) {
2404 goto done;
2407 else {
2408 if(is_lharc_data_at(c, 1260, &d->payload_offs) || // LHarc 1.12
2409 is_lharc_data_at(c, 1263, &d->payload_offs) || // LHarc 1.13-1.14
2410 is_lharc_data_at(c, 1290, &d->payload_offs)) // LHarc 1.00
2412 goto done;
2416 done:
2417 if(d->payload_offs==0) {
2418 d->errflag = 1;
2419 d->need_errmsg = 1;
2423 static void de_run_lharc_sfx_com(deark *c, de_module_params *mparams)
2425 struct lhasfx_context *d = NULL;
2426 int ret;
2428 d = de_malloc(c, sizeof(struct lhasfx_context));
2430 ret = looks_like_lharc_sfx_com(c, &d->sfx_container_is_larc);
2431 if(!ret) {
2432 d->errflag = 1;
2433 d->need_errmsg = 1;
2434 goto done;
2437 de_declare_fmtf(c, "%s self-extracting archive (COM)",
2438 (d->sfx_container_is_larc ? "LArc":"LHarc"));
2440 lhasfx_find_payload(c, d);
2441 if(d->errflag) goto done;
2443 de_dbg(c, "payload found at: %"I64_FMT, d->payload_offs);
2445 dbuf_create_file_from_slice(c->infile, d->payload_offs,
2446 c->infile->len-d->payload_offs,
2447 (d->sfx_container_is_larc ? "lzs" : "lha"), NULL, 0);
2449 done:
2450 if(d->errflag && d->need_errmsg) {
2451 de_err(c, "Not a known LHarc/LArc SFX format");
2453 de_free(c, d);
2456 static int de_identify_lharc_sfx_com(deark *c)
2458 int is_larc = 0;
2459 int ret;
2461 if(c->infile->len>65280) return 0;
2462 ret = looks_like_lharc_sfx_com(c, &is_larc);
2463 if(ret) return 70;
2464 return 0;
2467 void de_module_lharc_sfx_com(deark *c, struct deark_module_info *mi)
2469 mi->id = "lharc_sfx_com";
2470 mi->desc = "LHarc/LArc self-extracting archive (COM)";
2471 mi->run_fn = de_run_lharc_sfx_com;
2472 mi->identify_fn = de_identify_lharc_sfx_com;