1 // This file is part of Deark.
2 // Copyright (C) 2017 Jason Summers
3 // See the file COPYING for terms of use.
5 // LHA/LZH compressed archive format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_lha
);
11 DE_DECLARE_MODULE(de_module_car_lha
);
12 DE_DECLARE_MODULE(de_module_arx
);
14 #define MAX_SUBDIR_LEVEL 32
16 #define CODE_S_LH0 0x204c4830 // SAR
17 #define CODE_S_LH5 0x204c4835 // SAR
18 #define CODE_ah0 0x2d616830U // MAR
19 #define CODE_ari 0x2d617269U // MAR
20 #define CODE_hf0 0x2d686630U // MAR
21 #define CODE_lZ0 0x2d6c5a30U // PUT
22 #define CODE_lZ1 0x2d6c5a31U // PUT
23 #define CODE_lZ5 0x2d6c5a35U // PUT
24 #define CODE_lh0 0x2d6c6830U
25 #define CODE_lh1 0x2d6c6831U
26 #define CODE_lh2 0x2d6c6832U
27 #define CODE_lh3 0x2d6c6833U
28 #define CODE_lh4 0x2d6c6834U
29 #define CODE_lh5 0x2d6c6835U
30 #define CODE_lh6 0x2d6c6836U
31 #define CODE_lh7 0x2d6c6837U // standard, or LHARK
32 #define CODE_lh8 0x2d6c6838U
33 #define CODE_lh9 0x2d6c6839U
34 #define CODE_lha 0x2d6c6861U
35 #define CODE_lhb 0x2d6c6862U
36 #define CODE_lhc 0x2d6c6863U
37 #define CODE_lhd 0x2d6c6864U
38 #define CODE_lhe 0x2d6c6865U
39 #define CODE_lhx 0x2d6c6878U
40 #define CODE_lx1 0x2d6c7831U
41 #define CODE_lz2 0x2d6c7a32U
42 #define CODE_lz3 0x2d6c7a33U
43 #define CODE_lz4 0x2d6c7a34U
44 #define CODE_lz5 0x2d6c7a35U
45 #define CODE_lz7 0x2d6c7a37U
46 #define CODE_lz8 0x2d6c7a38U
47 #define CODE_lzs 0x2d6c7a73U
48 #define CODE_pc1 0x2d706331U
49 #define CODE_pm0 0x2d706d30U
50 #define CODE_pm1 0x2d706d31U
51 #define CODE_pm2 0x2d706d32U
53 #define TIMESTAMPIDX_INVALID (-1)
54 struct timestamp_data
{
55 struct de_timestamp ts
; // The best timestamp of this type found so far
59 struct cmpr_meth_info
;
62 u8 hlev
; // header level
66 struct cmpr_meth_info
*cmi
;
74 u8 have_hdr_crc_reported
;
77 i64 hdr_crc_field_pos
;
81 i64 compressed_data_pos
; // relative to beginning of file
82 i64 compressed_data_len
;
84 de_ucstring
*filename
;
85 de_ucstring
*fullfilename
;
86 struct timestamp_data tsdata
[DE_TIMESTAMPIDX_COUNT
];
89 typedef struct localctx_struct
{
90 de_encoding input_encoding
;
91 u8 hlev_of_first_member
;
97 struct de_crcobj
*crco
;
100 typedef void (*decompressor_fn
)(deark
*c
, lctx
*d
, struct member_data
*md
,
101 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
102 struct de_dfilter_results
*dres
);
104 struct cmpr_meth_info
{
107 decompressor_fn decompressor
;
109 char id_printable_sz
[6];
113 struct exthdr_type_info_struct
;
115 typedef void (*exthdr_decoder_fn
)(deark
*c
, lctx
*d
, struct member_data
*md
,
116 u8 id
, const struct exthdr_type_info_struct
*e
,
119 struct exthdr_type_info_struct
{
123 exthdr_decoder_fn decoder_fn
;
126 static int lha_isdigit(u8 x
)
128 return (x
>='0' && x
<='9');
131 static int lha_isalpha(u8 x
)
133 return ((x
>='A' && x
<='Z') || (x
>='a' && x
<='z'));
136 static int lha_isalnum(u8 x
)
138 return (lha_isdigit(x
) || lha_isalpha(x
));
141 static int is_possible_cmpr_meth(const u8 m
[5])
143 if(m
[0]!=m
[4]) return 0;
144 if(m
[0]==' ' && m
[1]=='L' && m
[2]=='H' && lha_isdigit(m
[3])) return 1;
145 if(m
[0]!='-') return 0;
146 if(!lha_isalpha(m
[1]) ||
147 !lha_isalnum(m
[2]) ||
155 static void apply_timestamp(deark
*c
, lctx
*d
, struct member_data
*md
,
156 int tsidx
, const struct de_timestamp
*ts
, int quality
)
158 if(!ts
->is_valid
) return;
159 if(tsidx
<0 || tsidx
>=DE_TIMESTAMPIDX_COUNT
) return;
160 if(quality
< md
->tsdata
[tsidx
].quality
) return;
161 md
->tsdata
[tsidx
].ts
= *ts
;
162 md
->tsdata
[tsidx
].quality
= quality
;
165 static void read_msdos_modtime(deark
*c
, lctx
*d
, struct member_data
*md
,
166 i64 pos
, const char *name
)
168 i64 mod_time_raw
, mod_date_raw
;
169 char timestamp_buf
[64];
170 struct de_timestamp tmp_timestamp
;
172 mod_time_raw
= de_getu16le(pos
);
173 mod_date_raw
= de_getu16le(pos
+2);
174 if(mod_time_raw
==0 && mod_date_raw
==0) {
175 de_dbg(c
, "%s: (not set)", name
);
178 de_dos_datetime_to_timestamp(&tmp_timestamp
, mod_date_raw
, mod_time_raw
);
179 tmp_timestamp
.tzcode
= DE_TZCODE_LOCAL
;
180 de_timestamp_to_string(&tmp_timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
181 de_dbg(c
, "%s: %s", name
, timestamp_buf
);
182 apply_timestamp(c
, d
, md
, DE_TIMESTAMPIDX_MODIFY
, &tmp_timestamp
, 10);
185 static void read_windows_FILETIME(deark
*c
, lctx
*d
, struct member_data
*md
,
186 i64 pos
, int tsidx
, const char *name
)
189 char timestamp_buf
[64];
190 struct de_timestamp tmp_timestamp
;
192 t_FILETIME
= de_geti64le(pos
);
193 de_FILETIME_to_timestamp(t_FILETIME
, &tmp_timestamp
, 0x1);
194 if(t_FILETIME
<=0) tmp_timestamp
.is_valid
= 0;
195 de_timestamp_to_string(&tmp_timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
196 de_dbg(c
, "%s: %"I64_FMT
" (%s)", name
, t_FILETIME
, timestamp_buf
);
197 apply_timestamp(c
, d
, md
, tsidx
, &tmp_timestamp
, 90);
200 static void read_unix_timestamp(deark
*c
, lctx
*d
, struct member_data
*md
,
201 i64 pos
, int tsidx
, const char *name
)
204 char timestamp_buf
[64];
205 struct de_timestamp tmp_timestamp
;
207 t
= de_geti32le(pos
);
208 de_unix_time_to_timestamp(t
, &tmp_timestamp
, 0x1);
209 de_timestamp_to_string(&tmp_timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
210 de_dbg(c
, "%s: %d (%s)", name
, (int)t
, timestamp_buf
);
211 apply_timestamp(c
, d
, md
, tsidx
, &tmp_timestamp
, 50);
214 static void rp_add_component(deark
*c
, lctx
*d
, struct member_data
*md
,
215 dbuf
*f
, i64 pos
, i64 len
, struct de_strarray
*sa
, de_ucstring
*tmpstr
)
218 ucstring_empty(tmpstr
);
219 dbuf_read_to_ucstring(f
, pos
, len
, tmpstr
, 0, md
->encoding
);
220 de_strarray_push(sa
, tmpstr
);
223 static void read_path_to_strarray(deark
*c
, lctx
*d
, struct member_data
*md
,
224 dbuf
*inf
, i64 pos
, i64 len
, struct de_strarray
*sa
, int is_exthdr_dirname
)
226 dbuf
*tmpdbuf
= NULL
;
227 de_ucstring
*tmpstr
= NULL
;
228 i64 component_startpos
;
232 tmpstr
= ucstring_create(c
);
234 tmpdbuf
= dbuf_create_membuf(c
, len
, 0);
235 dbuf_copy(inf
, pos
, len
, tmpdbuf
);
237 component_startpos
= 0;
240 for(i
=0; i
<len
; i
++) {
243 ch
= dbuf_getbyte(tmpdbuf
, i
);
244 if(ch
==0x00) break; // Tolerate NUL termination
245 if((is_exthdr_dirname
&& ch
==0xff) ||
246 (!is_exthdr_dirname
&& (ch
=='\\' || ch
=='/')))
248 component_len
= i
- component_startpos
;
249 rp_add_component(c
, d
, md
, tmpdbuf
, component_startpos
, component_len
, sa
, tmpstr
);
250 component_startpos
= i
+1;
257 rp_add_component(c
, d
, md
, tmpdbuf
, component_startpos
, component_len
, sa
, tmpstr
);
260 ucstring_destroy(tmpstr
);
263 static void read_filename_hlev0(deark
*c
, lctx
*d
, struct member_data
*md
,
266 struct de_strarray
*sa
= NULL
;
269 ucstring_empty(md
->filename
);
272 md
->filename
= ucstring_create(c
);
275 sa
= de_strarray_create(c
, MAX_SUBDIR_LEVEL
+2);
276 read_path_to_strarray(c
, d
, md
, c
->infile
, pos
, len
, sa
, 0);
278 de_strarray_make_path(sa
, md
->filename
, DE_MPFLAG_NOTRAILINGSLASH
);
279 de_dbg(c
, "filename (parsed): \"%s\"", ucstring_getpsz_d(md
->filename
));
281 de_strarray_destroy(sa
);
284 static void read_filename_hlev1_or_exthdr(deark
*c
, lctx
*d
, struct member_data
*md
,
290 ucstring_empty(md
->filename
);
293 md
->filename
= ucstring_create(c
);
296 // Some files seem to assume NUL termination is allowed.
297 dbuf_read_to_ucstring(c
->infile
, pos
, len
,
298 md
->filename
, DE_CONVFLAG_STOP_AT_NUL
, md
->encoding
);
299 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(md
->filename
));
301 // I don't think slashes are allowed
302 for(i
=0; i
<md
->filename
->len
; i
++) {
303 if(md
->filename
->str
[i
]=='/') {
304 md
->filename
->str
[i
]='_';
309 static void exthdr_common(deark
*c
, lctx
*d
, struct member_data
*md
,
310 u8 id
, const struct exthdr_type_info_struct
*e
,
314 md
->hdr_crc_reported
= (u32
)de_getu16le(pos
);
315 md
->have_hdr_crc_reported
= 1;
316 md
->hdr_crc_field_pos
= pos
;
317 de_dbg(c
, "header crc (reported): 0x%04x", (unsigned int)md
->hdr_crc_reported
);
318 // TODO: Additional information
321 static void exthdr_filename(deark
*c
, lctx
*d
, struct member_data
*md
,
322 u8 id
, const struct exthdr_type_info_struct
*e
,
325 read_filename_hlev1_or_exthdr(c
, d
, md
, pos
, dlen
);
328 static void exthdr_dirname(deark
*c
, lctx
*d
, struct member_data
*md
,
329 u8 id
, const struct exthdr_type_info_struct
*e
,
332 struct de_strarray
*dirname_sa
= NULL
;
335 ucstring_empty(md
->dirname
);
338 md
->dirname
= ucstring_create(c
);
341 dirname_sa
= de_strarray_create(c
, MAX_SUBDIR_LEVEL
+2);
342 // 0xff is used as the path separator. Don't know what happens if a directory
343 // name contains an actual 0xff byte.
344 read_path_to_strarray(c
, d
, md
, c
->infile
, pos
, dlen
, dirname_sa
, 1);
345 de_strarray_make_path(dirname_sa
, md
->dirname
, DE_MPFLAG_NOTRAILINGSLASH
);
346 de_dbg(c
, "%s (parsed): \"%s\"", e
->name
, ucstring_getpsz_d(md
->dirname
));
348 de_strarray_destroy(dirname_sa
);
351 static void exthdr_msdosattribs(deark
*c
, lctx
*d
, struct member_data
*md
,
352 u8 id
, const struct exthdr_type_info_struct
*e
,
356 de_ucstring
*descr
= NULL
;
358 if(dlen
<2) goto done
;
359 attribs
= (u32
)de_getu16le(pos
);
360 descr
= ucstring_create(c
);
361 de_describe_dos_attribs(c
, (UI
)attribs
, descr
, 0);
362 de_dbg(c
, "%s: 0x%04x (%s)", e
->name
, (UI
)attribs
, ucstring_getpsz_d(descr
));
364 ucstring_destroy(descr
);
367 static void exthdr_filesize(deark
*c
, lctx
*d
, struct member_data
*md
,
368 u8 id
, const struct exthdr_type_info_struct
*e
,
371 // TODO: Support this
372 de_warn(c
, "Unsupported \"file size\" extended header found. This may prevent "
373 "the rest of the file from being processed correctly.");
376 static void exthdr_windowstimestamp(deark
*c
, lctx
*d
, struct member_data
*md
,
377 u8 id
, const struct exthdr_type_info_struct
*e
,
381 read_windows_FILETIME(c
, d
, md
, pos
, DE_TIMESTAMPIDX_CREATE
, "create time");
382 read_windows_FILETIME(c
, d
, md
, pos
+8, DE_TIMESTAMPIDX_MODIFY
, "mod time ");
383 read_windows_FILETIME(c
, d
, md
, pos
+16, DE_TIMESTAMPIDX_ACCESS
, "access time");
386 static void interpret_unix_perms(deark
*c
, lctx
*d
, struct member_data
*md
, unsigned int mode
)
388 if(mode
& 0100000) { // regular file
389 if(mode
& 0111) { // executable
390 md
->is_executable
= 1;
393 md
->is_nonexecutable
= 1;
397 if((mode
& 0170000) == 0120000) {
398 md
->is_special
= 1; // symlink
402 static void exthdr_unixperms(deark
*c
, lctx
*d
, struct member_data
*md
,
403 u8 id
, const struct exthdr_type_info_struct
*e
,
409 mode
= (unsigned int)de_getu16le(pos
);
410 de_dbg(c
, "mode: octal(%06o)", mode
);
411 interpret_unix_perms(c
, d
, md
, mode
);
414 static void exthdr_unixuidgid(deark
*c
, lctx
*d
, struct member_data
*md
,
415 u8 id
, const struct exthdr_type_info_struct
*e
,
421 // It's strange that the GID comes first, while the UID comes first in the
422 // level-0 "extended area".
423 gid
= de_getu16le(pos
);
424 de_dbg(c
, "gid: %d", (int)gid
);
425 uid
= de_getu16le(pos
+2);
426 de_dbg(c
, "uid: %d", (int)uid
);
429 static void exthdr_unixtimestamp(deark
*c
, lctx
*d
, struct member_data
*md
,
430 u8 id
, const struct exthdr_type_info_struct
*e
,
434 read_unix_timestamp(c
, d
, md
, pos
, DE_TIMESTAMPIDX_MODIFY
, "last-modified");
437 static void exthdr_lev3newattribs2(deark
*c
, lctx
*d
, struct member_data
*md
,
438 u8 id
, const struct exthdr_type_info_struct
*e
,
446 // [Documented as "creation time", but this is a Unix-style header, so I
447 // wonder if someone mistranslated "ctime" (=change time).]
448 read_unix_timestamp(c
, d
, md
, pos
+12, TIMESTAMPIDX_INVALID
, "create(?) time");
450 read_unix_timestamp(c
, d
, md
, pos
+16, DE_TIMESTAMPIDX_ACCESS
, "access time ");
453 static void exthdr_codepage(deark
*c
, lctx
*d
, struct member_data
*md
,
454 u8 id
, const struct exthdr_type_info_struct
*e
,
458 de_encoding n_encoding
;
462 n_codepage
= (int)de_geti32le(pos
);
463 n_encoding
= de_windows_codepage_to_encoding(c
, n_codepage
, descr
, sizeof(descr
), 0);
464 de_dbg(c
, "codepage: %d (%s)", n_codepage
, descr
);
465 if(n_encoding
!= DE_ENCODING_UNKNOWN
) {
466 md
->encoding
= n_encoding
;
470 static const struct exthdr_type_info_struct exthdr_type_info_arr
[] = {
471 { 0x00, 0, "common", exthdr_common
},
472 { 0x01, 0, "filename", exthdr_filename
},
473 { 0x02, 0, "dir name", exthdr_dirname
},
474 { 0x39, 0, "multi-disc", NULL
},
475 { 0x3f, 0, "comment", NULL
},
476 { 0x40, 0, "MS-DOS file attribs", exthdr_msdosattribs
},
477 { 0x41, 0, "Windows timestamp", exthdr_windowstimestamp
},
478 { 0x42, 0, "MS-DOS file size", exthdr_filesize
},
479 { 0x43, 0, "time zone", NULL
},
480 { 0x44, 0, "UTF-16 filename", NULL
},
481 { 0x45, 0, "UTF-16 dir name", NULL
},
482 { 0x46, 0, "codepage", exthdr_codepage
},
483 { 0x50, 0, "Unix perms", exthdr_unixperms
},
484 { 0x51, 0, "Unix UID/GID", exthdr_unixuidgid
},
485 { 0x52, 0, "Unix group name", NULL
},
486 { 0x53, 0, "Unix username", NULL
},
487 { 0x54, 0, "Unix timestamp", exthdr_unixtimestamp
},
488 { 0x7d, 0, "capsule", NULL
},
489 { 0x7e, 0, "OS/2 extended attribs", NULL
},
490 { 0x7f, 0, "level 3 new attribs type-1", NULL
}, // (OS/2 only)
491 { 0xff, 0, "level 3 new attribs type-2", exthdr_lev3newattribs2
}
494 static void destroy_member_data(deark
*c
, struct member_data
*md
)
497 ucstring_destroy(md
->dirname
);
498 ucstring_destroy(md
->filename
);
499 ucstring_destroy(md
->fullfilename
);
504 static const struct exthdr_type_info_struct
*get_exthdr_type_info(u8 id
)
508 for(i
=0; i
<DE_ARRAYCOUNT(exthdr_type_info_arr
); i
++) {
509 if(id
== exthdr_type_info_arr
[i
].id
) {
510 return &exthdr_type_info_arr
[i
];
516 static void do_read_ext_header(deark
*c
, lctx
*d
, struct member_data
*md
,
517 i64 pos1
, i64 len
, i64 dlen
)
521 const struct exthdr_type_info_struct
*e
= NULL
;
524 id
= de_getbyte(pos1
);
525 e
= get_exthdr_type_info(id
);
527 name
= e
? e
->name
: "?";
529 de_dbg(c
, "ext header at %d, len=%d (1+%d+%d), id=0x%02x (%s)", (int)pos1
, (int)len
,
530 (int)(dlen
-1), (int)(len
-dlen
), (unsigned int)id
, name
);
532 if(dlen
<1) return; // Invalid header, too short to even have an id field
534 if(e
&& e
->decoder_fn
) {
536 e
->decoder_fn(c
, d
, md
, id
, e
, pos1
+1, dlen
-1);
537 de_dbg_indent(c
, -1);
540 if(c
->debug_level
>=2) {
541 de_dbg_hexdump(c
, c
->infile
, pos1
+1, dlen
-1, 256, NULL
, 0x1);
546 static const char *get_os_name(u8 id
)
548 const char *name
= NULL
;
550 case ' ': name
="unspecified"; break;
551 case '2': name
="OS/2"; break;
552 case '3': name
="OS/386?"; break;
553 case '9': name
="OS-9"; break;
554 case 'A': name
="Amiga"; break;
555 case 'C': name
="CP/M"; break;
556 case 'F': name
="FLEX"; break;
557 case 'H': name
="Human68K"; break;
558 case 'J': name
="JVM"; break;
559 case 'K': name
="OS-9/68K"; break;
560 case 'M': name
="DOS"; break;
561 case 'R': name
="RUNser"; break;
562 case 'T': name
="TownsOS"; break;
563 case 'U': name
="Unix"; break;
564 case 'W': name
="Windows NT"; break;
565 case 'a': name
="Atari ST?"; break;
566 case 'm': name
="Macintosh"; break;
567 case 'w': name
="Windows"; break;
569 return name
?name
:"?";
572 static void do_lev0_ext_area(deark
*c
, lctx
*d
, struct member_data
*md
,
576 md
->os_id
= de_getbyte(pos1
);
577 de_dbg(c
, "OS id: %d ('%c') (%s)", (int)md
->os_id
,
578 de_byte_to_printable_char(md
->os_id
), get_os_name(md
->os_id
));
585 if(len
<12) goto done
;
587 read_unix_timestamp(c
, d
, md
, pos1
+2, DE_TIMESTAMPIDX_MODIFY
, "last-modified");
589 mode
= (unsigned int)de_getu16le(pos1
+6);
590 de_dbg(c
, "mode: octal(%06o)", mode
);
591 interpret_unix_perms(c
, d
, md
, mode
);
593 uid
= de_getu16le(pos1
+8);
594 de_dbg(c
, "uid: %d", (int)uid
);
595 gid
= de_getu16le(pos1
+10);
596 de_dbg(c
, "gid: %d", (int)gid
);
602 // AFAICT, we're expected to think of the extended headers as a kind of linked
603 // list. The last field in each node is the "size of next node" (instead of
604 // "pointer to next node", as a real linked list would have). A size of 0 is
605 // like a "nil" pointer, and marks the end of the list.
606 // The "size of the first node" field (analogous to the "head" pointer) is
607 // conceptually not part of the extended headers section.
609 // Note that if we simply shift our frame of reference, this format is identical
610 // to a more typical length-prefixed format. But our code follows the
611 // linked-list model, to make it more consistent with most LHA documentation,
612 // and the various "size" fields.
614 // A return value of 0 means we failed to calculate the size of the
615 // extended headers segment.
616 static int do_read_ext_headers(deark
*c
, lctx
*d
, struct member_data
*md
,
617 i64 pos1
, i64 len
, i64 first_ext_hdr_size
, i64
*tot_bytes_consumed
)
620 i64 this_ext_hdr_size
, next_ext_hdr_size
;
622 i64 size_of_size_field
;
624 *tot_bytes_consumed
= 0;
626 if(first_ext_hdr_size
==0) {
630 de_dbg(c
, "ext headers section at %d", (int)pos
);
633 size_of_size_field
= (md
->hlev
==3) ? 4 : 2;
635 next_ext_hdr_size
= first_ext_hdr_size
;
637 this_ext_hdr_size
= next_ext_hdr_size
;
638 if(this_ext_hdr_size
==0) {
640 *tot_bytes_consumed
= pos
- pos1
;
643 if(this_ext_hdr_size
<size_of_size_field
) goto done
;
644 if(pos
+this_ext_hdr_size
> pos1
+len
) goto done
;
646 do_read_ext_header(c
, d
, md
, pos
, this_ext_hdr_size
, this_ext_hdr_size
-size_of_size_field
);
648 // Each ext header ends with a "size of next header" field.
649 // We'll read it at this level, instead of in do_read_ext_header().
650 pos
+= this_ext_hdr_size
-size_of_size_field
;
651 if(size_of_size_field
==2) {
652 next_ext_hdr_size
= de_getu16le(pos
);
655 next_ext_hdr_size
= de_getu32le(pos
);
657 pos
+= size_of_size_field
;
662 de_dbg(c
, "size of ext headers section: %d", (int)*tot_bytes_consumed
);
665 de_dbg(c
, "failed to parse all extended headers");
667 de_dbg_indent(c
, -1);
671 static void make_fullfilename(deark
*c
, lctx
*d
, struct member_data
*md
)
673 if(md
->fullfilename
) return;
676 md
->filename
= ucstring_create(c
);
678 md
->fullfilename
= ucstring_create(c
);
681 ucstring_append_ucstring(md
->fullfilename
, md
->filename
);
685 ucstring_append_ucstring(md
->fullfilename
, md
->dirname
);
688 if(ucstring_isnonempty(md
->dirname
)) {
689 ucstring_append_ucstring(md
->fullfilename
, md
->dirname
);
690 ucstring_append_sz(md
->fullfilename
, "/", DE_ENCODING_LATIN1
);
692 if(ucstring_isnonempty(md
->filename
)) {
693 ucstring_append_ucstring(md
->fullfilename
, md
->filename
);
696 ucstring_append_char(md
->fullfilename
, '_');
702 static void decompress_uncompressed(deark
*c
, lctx
*d
, struct member_data
*md
,
703 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
704 struct de_dfilter_results
*dres
)
706 fmtutil_decompress_uncompressed(c
, dcmpri
, dcmpro
, dres
, 0);
709 static void decompress_lhark_lh7(deark
*c
, lctx
*d
, struct member_data
*md
,
710 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
711 struct de_dfilter_results
*dres
)
713 struct de_lzh_params lzhparams
;
715 de_zeromem(&lzhparams
, sizeof(struct de_lzh_params
));
716 lzhparams
.fmt
= DE_LZH_FMT_LHARK
;
717 lzhparams
.zero_codes_block_behavior
= DE_LZH_ZCB_65536
;
718 lzhparams
.warn_about_zero_codes_block
= 1;
719 fmtutil_decompress_lzh(c
, dcmpri
, dcmpro
, dres
, &lzhparams
);
722 // Compression method will be selected based on id_raw[3], which
723 // should be '4'...'8'.
724 static void decompress_lh5x(deark
*c
, lctx
*d
, struct member_data
*md
,
725 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
726 struct de_dfilter_results
*dres
)
728 struct de_lzh_params lzhparams
;
730 if(d
->lhark_fmt
&& md
->cmi
->id_raw
[3]=='7') {
731 decompress_lhark_lh7(c
, d
, md
, dcmpri
, dcmpro
, dres
);
735 de_zeromem(&lzhparams
, sizeof(struct de_lzh_params
));
736 lzhparams
.fmt
= DE_LZH_FMT_LH5LIKE
;
737 lzhparams
.subfmt
= md
->cmi
->id_raw
[3];
738 lzhparams
.zero_codes_block_behavior
= DE_LZH_ZCB_65536
;
739 lzhparams
.warn_about_zero_codes_block
= 1;
740 fmtutil_decompress_lzh(c
, dcmpri
, dcmpro
, dres
, &lzhparams
);
743 static void decompress_lh5(deark
*c
, lctx
*d
, struct member_data
*md
,
744 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
745 struct de_dfilter_results
*dres
)
747 struct de_lzh_params lzhparams
;
749 de_zeromem(&lzhparams
, sizeof(struct de_lzh_params
));
750 lzhparams
.fmt
= DE_LZH_FMT_LH5LIKE
;
751 lzhparams
.subfmt
= '5';
752 lzhparams
.zero_codes_block_behavior
= DE_LZH_ZCB_65536
;
753 lzhparams
.warn_about_zero_codes_block
= 1;
754 fmtutil_decompress_lzh(c
, dcmpri
, dcmpro
, dres
, &lzhparams
);
757 static void decompress_lz5(deark
*c
, lctx
*d
, struct member_data
*md
,
758 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
759 struct de_dfilter_results
*dres
)
761 fmtutil_decompress_szdd(c
, dcmpri
, dcmpro
, dres
, 0x1);
764 struct cmpr_meth_array_item
{
768 decompressor_fn decompressor
;
771 // Compression methods with a decompressor or a description are usually
772 // listed here, but note that it is also possible for get_cmpr_meth_info()
773 // to handle them procedurally.
774 static const struct cmpr_meth_array_item cmpr_meth_arr
[] = {
775 { 0x00, CODE_lhd
, "directory", NULL
},
776 { 0x00, CODE_lh0
, "uncompressed", decompress_uncompressed
},
777 { 0x00, CODE_lh1
, "LZ77-4K, adaptive Huffman", NULL
},
778 { 0x00, CODE_lh4
, NULL
, decompress_lh5x
},
779 { 0x00, CODE_lh5
, "LZ77-8K, static Huffman", decompress_lh5
},
780 { 0x00, CODE_lh6
, "LZ77-32K, static Huffman", decompress_lh5x
},
781 { 0x00, CODE_lh7
, NULL
, decompress_lh5x
},
782 { 0x00, CODE_lh8
, NULL
, decompress_lh5x
},
783 { 0x00, CODE_lz4
, "uncompressed (LArc)", decompress_uncompressed
},
784 { 0x00, CODE_lz5
, "LZSS-4K (LArc)", decompress_lz5
},
785 { 0x00, CODE_pm0
, "uncompressed (PMArc)", decompress_uncompressed
},
786 { 0x00, CODE_lZ0
, "uncompressed (MicroFox PUT)", decompress_uncompressed
},
787 { 0x00, CODE_lZ1
, "MicroFox PUT lZ1", NULL
},
788 { 0x00, CODE_lZ5
, "MicroFox PUT lZ5", decompress_lh5
},
789 { 0x00, CODE_S_LH0
, "uncompressed (SAR)", decompress_uncompressed
},
790 { 0x00, CODE_S_LH5
, "SAR LH5", decompress_lh5
}
793 static const u32 other_known_cmpr_methods
[] = {
794 CODE_ah0
, CODE_ari
, CODE_hf0
,
795 CODE_lh2
, CODE_lh3
, CODE_lh4
, CODE_lh7
, CODE_lh8
, CODE_lh9
,
796 CODE_lha
, CODE_lhb
, CODE_lhc
, CODE_lhe
, CODE_lhx
, CODE_lx1
,
797 CODE_lz2
, CODE_lz3
, CODE_lz7
, CODE_lz8
, CODE_lzs
,
798 CODE_pc1
, CODE_pm1
, CODE_pm2
};
800 // Only call this after is_possible_cmpr_meth() return nonzero.
801 // Caller allocates cmi, and initializes to zeroes.
802 static void get_cmpr_meth_info(const u8 idbuf
[5], struct cmpr_meth_info
*cmi
)
805 const struct cmpr_meth_array_item
*cmai
= NULL
;
807 // The first 4 bytes are unique for all known methods.
808 cmi
->uniq_id
= (u32
)de_getu32be_direct(idbuf
);
810 de_memcpy(cmi
->id_raw
, idbuf
, 5);
812 // All "possible" methods only use printable characters.
813 de_memcpy(cmi
->id_printable_sz
, idbuf
, 5);
814 cmi
->id_printable_sz
[5] = '\0';
816 for(k
=0; k
<DE_ARRAYCOUNT(cmpr_meth_arr
); k
++) {
817 if(cmpr_meth_arr
[k
].uniq_id
== cmi
->uniq_id
) {
818 cmai
= &cmpr_meth_arr
[k
];
824 cmi
->is_recognized
= 1;
825 cmi
->decompressor
= cmai
->decompressor
;
828 for(k
=0; k
<DE_ARRAYCOUNT(other_known_cmpr_methods
); k
++) {
829 if(other_known_cmpr_methods
[k
] == cmi
->uniq_id
) {
830 cmi
->is_recognized
= 1;
836 if(cmai
&& cmai
->descr
) {
837 de_strlcpy(cmi
->descr
, cmai
->descr
, sizeof(cmi
->descr
));
839 else if(cmi
->is_recognized
) {
840 de_strlcpy(cmi
->descr
, "recognized, but no info avail.", sizeof(cmi
->descr
));
843 de_strlcpy(cmi
->descr
, "?", sizeof(cmi
->descr
));
847 static void our_writelistener_cb(dbuf
*f
, void *userdata
, const u8
*buf
, i64 buf_len
)
849 struct de_crcobj
*crco
= (struct de_crcobj
*)userdata
;
850 de_crcobj_addbuf(crco
, buf
, buf_len
);
853 static void do_extract_file(deark
*c
, lctx
*d
, struct member_data
*md
)
859 u8 dcmpr_attempted
= 0;
861 struct de_dfilter_in_params dcmpri
;
862 struct de_dfilter_out_params dcmpro
;
863 struct de_dfilter_results dres
;
865 if(!md
->cmi
) goto done
;
867 de_dbg(c
, "[not extracting special file]");
870 else if(md
->is_dir
) {
873 else if(!(md
->cmi
->decompressor
)) {
874 if(!d
->unsupp_warned
) {
875 de_info(c
, "Note: LHA support is incomplete. Some common "
876 "compression methods are not supported.");
877 d
->unsupp_warned
= 1;
879 de_err(c
, "%s: Unsupported compression method '%s'",
880 ucstring_getpsz_d(md
->fullfilename
), md
->cmi
->id_printable_sz
);
884 fi
= de_finfo_create(c
);
886 for(tsidx
=0; tsidx
<DE_TIMESTAMPIDX_COUNT
; tsidx
++) {
887 if(md
->tsdata
[tsidx
].ts
.is_valid
) {
888 fi
->timestamp
[tsidx
] = md
->tsdata
[tsidx
].ts
;
893 fi
->is_directory
= 1;
895 else if(md
->is_executable
) {
896 fi
->mode_flags
|= DE_MODEFLAG_EXE
;
898 else if(md
->is_nonexecutable
) {
899 fi
->mode_flags
|= DE_MODEFLAG_NONEXE
;
902 de_finfo_set_name_from_ucstring(c
, fi
, md
->fullfilename
, DE_SNFLAG_FULLPATH
);
903 fi
->original_filename_flag
= 1;
905 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0x0);
906 de_crcobj_reset(d
->crco
);
907 dbuf_set_writelistener(outf
, our_writelistener_cb
, (void*)d
->crco
);
909 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
910 dcmpri
.f
= c
->infile
;
911 dcmpri
.pos
= md
->compressed_data_pos
;
912 dcmpri
.len
= md
->compressed_data_len
;
914 dcmpro
.expected_len
= md
->orig_size
;
915 dcmpro
.len_known
= 1;
917 if(md
->is_dir
) goto done
; // For directories, we're done.
920 if(md
->cmi
->decompressor
) {
921 md
->cmi
->decompressor(c
, d
, md
, &dcmpri
, &dcmpro
, &dres
);
925 de_err(c
, "%s: Decompression failed: %s", ucstring_getpsz_d(md
->fullfilename
),
926 de_dfilter_get_errmsg(c
, &dres
));
930 crc_calc
= de_crcobj_getval(d
->crco
);
931 de_dbg(c
, "crc (calculated): 0x%04x", (unsigned int)crc_calc
);
932 if(crc_calc
!= md
->crc16
) {
933 de_err(c
, "%s: CRC check failed", ucstring_getpsz_d(md
->fullfilename
));
940 if(dcmpr_attempted
&& md
->cmi
&& md
->cmi
->uniq_id
==CODE_lh7
) {
942 d
->lh7_success_flag
= 1;
944 d
->lh7_failed_flag
= 1;
947 de_finfo_destroy(c
, fi
);
950 static int cksum_cbfn(struct de_bufferedreadctx
*brctx
, const u8
*buf
, i64 buf_len
)
952 UI
*pcksum
= (UI
*)brctx
->userdata
;
955 for(i
=0; i
<buf_len
; i
++) {
956 *pcksum
= (*pcksum
+ buf
[i
]) & 0xff;
961 static void do_check_header_crc(deark
*c
, lctx
*d
, struct member_data
*md
)
963 // LHA members don't have to have a header CRC field, though it's probably
964 // considered best practice to have one when the checksum field doesn't
965 // exist, or there are any extended headers.
966 if(!md
->have_hdr_crc_reported
) return;
967 de_crcobj_reset(d
->crco
);
969 // Everything before the CRC field:
970 de_crcobj_addslice(d
->crco
, c
->infile
, md
->member_pos
,
971 md
->hdr_crc_field_pos
- md
->member_pos
);
973 // The zeroed-out CRC field:
974 de_crcobj_addzeroes(d
->crco
, 2);
976 // Everything after the CRC field:
977 de_crcobj_addslice(d
->crco
, c
->infile
, md
->hdr_crc_field_pos
+2,
978 md
->compressed_data_pos
- (md
->hdr_crc_field_pos
+2));
980 md
->hdr_crc_calc
= de_crcobj_getval(d
->crco
);
981 de_dbg(c
, "header crc (calculated): 0x%04x", (UI
)md
->hdr_crc_calc
);
982 if(md
->hdr_crc_calc
!= md
->hdr_crc_reported
) {
983 de_err(c
, "Wrong header CRC: reported=0x%04x, calculated=0x%04x",
984 (UI
)md
->hdr_crc_reported
, (UI
)md
->hdr_crc_calc
);
988 enum lha_whats_next_enum
{
991 LHA_WN_TRAILER_AND_JUNK
,
996 static enum lha_whats_next_enum
lha_classify_whats_next(deark
*c
, lctx
*d
, i64 pos
, i64 len
)
1000 if(len
<=0) return LHA_WN_NOTHING
;
1001 b
[0] = de_getbyte(pos
);
1002 if(b
[0]==0 && len
<=2) return LHA_WN_TRAILER
;
1003 if(b
[0]==0 && len
<21) return LHA_WN_TRAILER_AND_JUNK
;
1004 de_read(&b
[1], pos
+1, sizeof(b
)-1);
1005 if(b
[0]==0 && b
[1]==0) return LHA_WN_TRAILER_AND_JUNK
;
1006 if(b
[0]==0 && b
[20]!=2) return LHA_WN_TRAILER_AND_JUNK
;
1007 if(b
[20]>3) return LHA_WN_JUNK
;
1008 if(is_possible_cmpr_meth(&b
[2])) return LHA_WN_MEMBER
;
1012 // This single function parses all the different header formats, using lots of
1013 // "if" statements. It is messy, but it's a no-win situation.
1014 // The alternative of four separate functions would be have a lot of redundant
1015 // code, and be harder to maintain.
1017 // Caller allocates and initializes md.
1018 // If the member was successfully parsed, sets md->total_size and returns nonzero.
1019 static int do_read_member(deark
*c
, lctx
*d
, struct member_data
*md
)
1022 i64 lev0_header_size
= 0;
1023 i64 lev1_base_header_size
= 0;
1024 i64 lev1_skip_size
= 0;
1025 i64 lev2_total_header_size
= 0;
1026 i64 lev3_header_size
= 0;
1027 i64 pos1
= md
->member_pos
;
1030 i64 exthdr_bytes_consumed
= 0;
1033 UI hdr_checksum_reported
= 0;
1034 u8 has_hdr_checksum
= 0;
1037 enum lha_whats_next_enum wn
;
1038 u8 cmpr_meth_raw
[5];
1039 int saved_indent_level
;
1041 de_dbg_indent_save(c
, &saved_indent_level
);
1043 nbytes_avail
= c
->infile
->len
- pos1
;
1044 wn
= lha_classify_whats_next(c
, d
, pos1
, nbytes_avail
);
1045 if(wn
!=LHA_WN_MEMBER
) {
1046 if(d
->member_count
==0) {
1047 de_err(c
, "Not an LHA file");
1049 else if(wn
==LHA_WN_TRAILER
|| wn
==LHA_WN_TRAILER_AND_JUNK
) {
1050 de_dbg(c
, "trailer at %"I64_FMT
, pos1
);
1051 if(wn
==LHA_WN_TRAILER_AND_JUNK
) {
1052 de_info(c
, "Note: %"I64_FMT
" extra bytes at end of file (offset %"I64_FMT
")",
1053 nbytes_avail
-1, pos1
+1);
1056 else if(wn
==LHA_WN_JUNK
) {
1057 de_warn(c
, "%"I64_FMT
" bytes of non-LHA data found at end of file (offset %"I64_FMT
")",
1058 nbytes_avail
, pos1
);
1063 de_dbg(c
, "member at %"I64_FMT
, pos1
);
1064 de_dbg_indent(c
, 1);
1066 // Look ahead to figure out the header format version.
1067 // This byte was originally the high byte of the "MS-DOS file attribute" field,
1068 // which happened to always be zero.
1069 // In later LHA versions, it is overloaded to identify the header format
1070 // version (called "header level" in LHA jargon).
1071 md
->hlev
= de_getbyte(pos1
+20);
1072 de_dbg(c
, "header level: %d", (int)md
->hlev
);
1074 goto done
; // Shouldn't be possible; checked in lha_classify_whats_next().
1077 if(d
->member_count
==0) {
1078 d
->hlev_of_first_member
= md
->hlev
;
1082 lev0_header_size
= (i64
)de_getbyte_p(&pos
);
1083 de_dbg(c
, "header size: (2+)%d", (int)lev0_header_size
);
1084 hdr_checksum_reported
= (UI
)de_getbyte_p(&pos
);
1085 has_hdr_checksum
= 1;
1086 dbuf_buffered_read(c
->infile
, pos
, lev0_header_size
, cksum_cbfn
, (void*)&md
->hdr_checksum_calc
);
1088 else if(md
->hlev
==1) {
1089 lev1_base_header_size
= (i64
)de_getbyte_p(&pos
);
1090 de_dbg(c
, "base header size: %d", (int)lev1_base_header_size
);
1091 hdr_checksum_reported
= (UI
)de_getbyte_p(&pos
);
1092 has_hdr_checksum
= 1;
1093 dbuf_buffered_read(c
->infile
, pos
, lev1_base_header_size
, cksum_cbfn
, (void*)&md
->hdr_checksum_calc
);
1095 else if(md
->hlev
==2) {
1096 lev2_total_header_size
= de_getu16le_p(&pos
);
1097 de_dbg(c
, "total header size: %d", (int)lev2_total_header_size
);
1099 else if(md
->hlev
==3) {
1101 lev3_word_size
= de_getu16le_p(&pos
);
1102 de_dbg(c
, "word size: %d", (int)lev3_word_size
);
1103 if(lev3_word_size
!=4) {
1104 de_err(c
, "Unsupported word size: %d", (int)lev3_word_size
);
1109 if(has_hdr_checksum
) {
1110 de_dbg(c
, "header checksum (reported): 0x%02x", hdr_checksum_reported
);
1111 de_dbg(c
, "header checksum (calculated): 0x%02x", md
->hdr_checksum_calc
);
1112 if(md
->hdr_checksum_calc
!= hdr_checksum_reported
) {
1113 de_err(c
, "Wrong header checksum: reported=0x%02x, calculated=0x%02x",
1114 hdr_checksum_reported
, md
->hdr_checksum_calc
);
1118 de_read(cmpr_meth_raw
, pos
, 5);
1119 md
->cmi
= de_malloc(c
, sizeof(struct cmpr_meth_info
));
1120 get_cmpr_meth_info(cmpr_meth_raw
, md
->cmi
);
1121 de_dbg(c
, "cmpr method: '%s' (%s)", md
->cmi
->id_printable_sz
, md
->cmi
->descr
);
1124 if(md
->cmi
->uniq_id
== CODE_lhd
) {
1128 else if(md
->cmi
->decompressor
== decompress_uncompressed
) {
1136 // lev1_skip_size is the distance from the third byte of the extended
1137 // header section, to the end of the compressed data.
1138 lev1_skip_size
= de_getu32le_p(&pos
);
1139 de_dbg(c
, "skip size: %u", (unsigned int)lev1_skip_size
);
1140 md
->total_size
= 2 + lev1_base_header_size
+ lev1_skip_size
;
1143 md
->compressed_data_len
= de_getu32le(pos
);
1144 de_dbg(c
, "compressed size: %"I64_FMT
, md
->compressed_data_len
);
1148 md
->total_size
= 2 + lev0_header_size
+ md
->compressed_data_len
;
1150 else if(md
->hlev
==2) {
1151 md
->total_size
= lev2_total_header_size
+ md
->compressed_data_len
;
1155 md
->orig_size
= de_getu32le(pos
);
1156 de_dbg(c
, "original size: %u", (unsigned int)md
->orig_size
);
1159 if(md
->hlev
==0 || md
->hlev
==1) {
1160 read_msdos_modtime(c
, d
, md
, pos
, "last-modified");
1161 pos
+= 4; // modification time/date (MS-DOS)
1163 else if(md
->hlev
==2 || md
->hlev
==3) {
1164 read_unix_timestamp(c
, d
, md
, pos
, DE_TIMESTAMPIDX_MODIFY
, "last-modified");
1165 pos
+= 4; // Unix time
1168 attribs
= (UI
)de_getbyte_p(&pos
);
1170 de_ucstring
*attr_descr
;
1172 // This is a 2-byte field, but the high byte must be 0 here because it's
1173 // also the header level.
1174 attr_descr
= ucstring_create(c
);
1175 de_describe_dos_attribs(c
, attribs
, attr_descr
, 0);
1176 de_dbg(c
, "attribs: 0x%04x (%s)", attribs
, ucstring_getpsz_d(attr_descr
));
1177 ucstring_destroy(attr_descr
);
1180 de_dbg(c
, "obsolete attribs low byte: 0x%02x", attribs
);
1182 pos
++; // header level or high byte of attribs, already handled
1185 fnlen
= de_getbyte(pos
++);
1186 de_dbg(c
, "filename len: %d", (int)fnlen
);
1188 read_filename_hlev0(c
, d
, md
, pos
, fnlen
);
1191 read_filename_hlev1_or_exthdr(c
, d
, md
, pos
, fnlen
);
1196 md
->crc16
= (u32
)de_getu16le_p(&pos
);
1197 de_dbg(c
, "crc16 (reported): 0x%04x", (unsigned int)md
->crc16
);
1199 if(md
->hlev
==1 || md
->hlev
==2 || md
->hlev
==3) {
1200 md
->os_id
= de_getbyte_p(&pos
);
1201 de_dbg(c
, "OS id: %d ('%c') (%s)", (int)md
->os_id
,
1202 de_byte_to_printable_char(md
->os_id
), get_os_name(md
->os_id
));
1206 lev3_header_size
= de_getu32le_p(&pos
);
1207 md
->total_size
= lev3_header_size
+ md
->compressed_data_len
;
1211 i64 ext_headers_size
= (2+lev0_header_size
) - (pos
-pos1
);
1212 md
->compressed_data_pos
= pos1
+ 2 + lev0_header_size
;
1213 if(ext_headers_size
>0) {
1214 de_dbg(c
, "extended header area at %d, len=%d", (int)pos
, (int)ext_headers_size
);
1215 de_dbg_indent(c
, 1);
1216 do_lev0_ext_area(c
, d
, md
, pos
, ext_headers_size
);
1217 de_dbg_indent(c
, -1);
1220 else if(md
->hlev
==1) {
1221 i64 first_ext_hdr_size
;
1223 // The last two bytes of the base header are the size of the first ext. header.
1224 pos
= pos1
+ 2 + lev1_base_header_size
- 2;
1225 // TODO: sanitize pos?
1226 first_ext_hdr_size
= de_getu16le_p(&pos
);
1227 de_dbg(c
, "first ext hdr size: %d", (int)first_ext_hdr_size
);
1229 ret
= do_read_ext_headers(c
, d
, md
, pos
, lev1_skip_size
, first_ext_hdr_size
,
1230 &exthdr_bytes_consumed
);
1233 de_err(c
, "Error parsing extended headers at %d. Cannot extract this file.",
1239 pos
+= exthdr_bytes_consumed
;
1240 md
->compressed_data_pos
= pos
;
1241 md
->compressed_data_len
= lev1_skip_size
- exthdr_bytes_consumed
;
1243 else if(md
->hlev
==2) {
1244 i64 first_ext_hdr_size
;
1246 if(md
->os_id
=='K') {
1247 // So that some lhasa test files will work.
1248 // TODO: The extended headers section is (usually?) self-terminating, so we
1249 // should be able to parse it and figure out if this bug is present. That
1250 // would be better than just guessing.
1251 lev2_total_header_size
+= 2;
1252 md
->total_size
= lev2_total_header_size
+ md
->compressed_data_len
;
1253 de_dbg(c
, "attempting bug workaround: changing total header size to %d",
1254 (int)lev2_total_header_size
);
1257 md
->compressed_data_pos
= pos1
+lev2_total_header_size
;
1259 first_ext_hdr_size
= de_getu16le_p(&pos
);
1260 de_dbg(c
, "first ext hdr size: %d", (int)first_ext_hdr_size
);
1262 do_read_ext_headers(c
, d
, md
, pos
, pos1
+lev2_total_header_size
-pos
,
1263 first_ext_hdr_size
, &exthdr_bytes_consumed
);
1265 else if(md
->hlev
==3) {
1266 i64 first_ext_hdr_size
;
1268 md
->compressed_data_pos
= pos1
+lev3_header_size
;
1270 first_ext_hdr_size
= de_getu32le_p(&pos
);
1271 de_dbg(c
, "first ext hdr size: %d", (int)first_ext_hdr_size
);
1273 do_read_ext_headers(c
, d
, md
, pos
, pos1
+lev3_header_size
-pos
,
1274 first_ext_hdr_size
, &exthdr_bytes_consumed
);
1277 do_check_header_crc(c
, d
, md
);
1279 de_dbg(c
, "member data (%scompressed) at %"I64_FMT
", len=%"I64_FMT
,
1280 is_compressed
?"":"un",
1281 md
->compressed_data_pos
, md
->compressed_data_len
);
1283 make_fullfilename(c
, d
, md
);
1285 de_dbg_indent(c
, 1);
1286 do_extract_file(c
, d
, md
);
1287 de_dbg_indent(c
, -1);
1291 de_dbg_indent_restore(c
, saved_indent_level
);
1295 static void de_run_lha(deark
*c
, de_module_params
*mparams
)
1299 struct member_data
*md
= NULL
;
1301 d
= de_malloc(c
, sizeof(lctx
));
1303 d
->lhark_fmt
= (u8
)de_get_ext_option_bool(c
, "lha:lhark", 0);
1305 // It's not really safe to guess CP437, because Japanese-encoded (CP932?)
1306 // filenames are common.
1307 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_ASCII
);
1309 d
->hlev_of_first_member
= 0xff;
1310 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_ARC
);
1314 if(pos
>= c
->infile
->len
) break;
1316 md
= de_malloc(c
, sizeof(struct member_data
));
1317 md
->encoding
= d
->input_encoding
;
1318 md
->member_pos
= pos
;
1319 if(!do_read_member(c
, d
, md
)) goto done
;
1320 if(md
->total_size
<1) goto done
;
1323 pos
+= md
->total_size
;
1325 destroy_member_data(c
, md
);
1330 destroy_member_data(c
, md
);
1332 if(!d
->lhark_fmt
&& d
->hlev_of_first_member
==1 && d
->lh7_failed_flag
&&
1333 !d
->lh7_success_flag
)
1335 de_info(c
, "Note: 'lh7' decompression failed. Maybe this file uses "
1336 "LHARK compression. Try \"-opt lha:lhark\".");
1339 de_crcobj_destroy(d
->crco
);
1344 static int de_identify_lha(deark
*c
)
1348 struct cmpr_meth_info cmi
;
1350 de_read(b
, 0, sizeof(b
));
1351 if(b
[20]>3) return 0; // header level
1353 if(!is_possible_cmpr_meth(&b
[2])) return 0;
1356 if(b
[0]<22) return 0;
1357 if(22 + (int)b
[21] + 2 > 2 + (int)b
[0]) return 0;
1360 if(b
[0]<25) return 0;
1361 if(22 + (int)b
[21] + 5 > 2 + (int)b
[0]) return 0;
1364 i64 hsize
= de_getu16le_direct(&b
[0]);
1365 if(hsize
< 26) return 0;
1368 if((b
[0]!=4 && b
[0]!=8) || b
[1]!=0) return 0;
1371 de_zeromem(&cmi
, sizeof(struct cmpr_meth_info
));
1372 get_cmpr_meth_info(&b
[2], &cmi
);
1373 if(!cmi
.is_recognized
) {
1377 if(de_input_file_has_ext(c
, "lzh") ||
1378 de_input_file_has_ext(c
, "lha"))
1383 if(has_ext
) return 100;
1384 return 80; // Must be less than car_lha
1387 static void de_help_lha(deark
*c
)
1389 de_msg(c
, "-opt lha:lhark : Enable LHARK mode (for 'lh7' compression)");
1392 void de_module_lha(deark
*c
, struct deark_module_info
*mi
)
1395 mi
->desc
= "LHA/LZH/PMA archive";
1396 mi
->run_fn
= de_run_lha
;
1397 mi
->identify_fn
= de_identify_lha
;
1398 mi
->help_fn
= de_help_lha
;
1401 /////////////////////// CAR (MylesHi!)
1403 struct car_member_data
{
1406 UI hdr_checksum_calc
;
1414 static int looks_like_car_member(deark
*c
, i64 pos
)
1418 de_read(b
, pos
, 16);
1419 if(b
[2]!='-' || b
[3]!='l'|| b
[4]!='h' || b
[6]!='-') return 0;
1420 if(b
[5]!='0' && b
[5]!='5') return 0;
1421 if((int)b
[0] != (int)b
[15] + 25) return 0;
1422 if(dbuf_memcmp(c
->infile
, pos
+ (i64
)b
[15] + 24, (const u8
*)"\x20\x00\x00", 3)) return 0;
1426 static int do_car_member(deark
*c
, struct car_ctx
*d
, struct car_member_data
*md
)
1428 i64 lev1_base_header_size
;
1431 i64 compressed_data_len
;
1432 i64 pos1
= md
->member_pos
;
1434 int saved_indent_level
;
1436 de_dbg_indent_save(c
, &saved_indent_level
);
1437 de_dbg(c
, "member at %"I64_FMT
, pos1
);
1438 de_dbg_indent(c
, 1);
1440 // Figure out where everything is...
1441 lev1_base_header_size
= (i64
)de_getbyte(pos1
);
1442 de_dbg(c
, "base header size: %d", (int)lev1_base_header_size
);
1443 hdr_endpos
= pos1
+ 2 + lev1_base_header_size
;
1444 fnlen
= lev1_base_header_size
- 25;
1445 de_dbg(c
, "implied filename len: %d", (int)fnlen
);
1446 if(fnlen
<0) goto done
;
1448 compressed_data_len
= de_getu32le(pos1
+ 7);
1449 de_dbg(c
, "compressed size: %"I64_FMT
, compressed_data_len
);
1450 if(hdr_endpos
+ compressed_data_len
> c
->infile
->len
) goto done
;
1452 // Convert to an LHA level-1 header
1453 dbuf_empty(d
->hdr_tmp
);
1455 // Fields through uncmpr_size are the same (we'll patch the checksum later)
1456 dbuf_copy(c
->infile
, pos1
, 15, d
->hdr_tmp
);
1458 dbuf_copy(c
->infile
, hdr_endpos
-7, 4, d
->hdr_tmp
); // timestamp
1460 // attribute (low byte)
1461 dbuf_copy(c
->infile
, hdr_endpos
-9, 1, d
->hdr_tmp
);
1462 dbuf_writebyte(d
->hdr_tmp
, 0x01); // level identifier
1464 // Fields starting with filename length, through crc
1465 dbuf_copy(c
->infile
, pos1
+15, 1+fnlen
+2, d
->hdr_tmp
);
1467 dbuf_writebyte(d
->hdr_tmp
, 77); // OS ID = 'M' = MS-DOS
1469 // Recalculate checksum
1470 dbuf_buffered_read(d
->hdr_tmp
, 2, lev1_base_header_size
, cksum_cbfn
,
1471 (void*)&md
->hdr_checksum_calc
);
1472 de_dbg(c
, "header checksum (calculated): 0x%02x", md
->hdr_checksum_calc
);
1473 dbuf_writebyte_at(d
->hdr_tmp
, 1, (u8
)md
->hdr_checksum_calc
);
1474 dbuf_truncate(d
->hdr_tmp
, 2+lev1_base_header_size
);
1476 // Write everything out
1477 dbuf_copy(d
->hdr_tmp
, 0, d
->hdr_tmp
->len
, d
->lha_outf
);
1478 de_dbg(c
, "member data at %"I64_FMT
", len=%"I64_FMT
, hdr_endpos
, compressed_data_len
);
1479 dbuf_copy(c
->infile
, hdr_endpos
, compressed_data_len
, d
->lha_outf
);
1480 md
->total_size
= (hdr_endpos
-md
->member_pos
) + compressed_data_len
;
1484 de_dbg_indent_restore(c
, saved_indent_level
);
1488 static void de_run_car_lha(deark
*c
, de_module_params
*mparams
)
1490 struct car_ctx
*d
= NULL
;
1491 struct car_member_data
*md
= NULL
;
1495 d
= de_malloc(c
, sizeof(struct car_ctx
));
1497 if(!looks_like_car_member(c
, 0)) {
1498 de_err(c
, "Not a CAR file");
1502 d
->lha_outf
= dbuf_create_output_file(c
, "lha", NULL
, 0);
1503 d
->hdr_tmp
= dbuf_create_membuf(c
, 0, 0);
1505 md
= de_malloc(c
, sizeof(struct car_member_data
));
1507 if(de_getbyte(pos
)==0) {
1508 de_dbg(c
, "trailer at %"I64_FMT
, pos
);
1509 dbuf_writebyte(d
->lha_outf
, 0);
1513 if(pos
+27 > c
->infile
->len
) goto done
;
1514 if(!looks_like_car_member(c
, pos
)) goto done
;
1516 de_zeromem(md
, sizeof(struct car_member_data
));
1517 md
->member_pos
= pos
;
1518 if(!do_car_member(c
, d
, md
)) goto done
;
1519 pos
+= md
->total_size
;
1526 dbuf_close(d
->lha_outf
);
1528 de_err(c
, "Conversion to LHA format failed");
1531 dbuf_close(d
->hdr_tmp
);
1536 static int de_identify_car_lha(deark
*c
)
1538 if(!de_input_file_has_ext(c
, "car")) return 0;
1539 if(looks_like_car_member(c
, 0)) {
1545 void de_module_car_lha(deark
*c
, struct deark_module_info
*mi
)
1548 mi
->desc
= "CAR (MylesHi!) LHA-like archive";
1549 mi
->run_fn
= de_run_car_lha
;
1550 mi
->identify_fn
= de_identify_car_lha
;
1553 /////////////////////// ARX
1555 struct arx_member_data
{
1565 static int looks_like_arx_member(deark
*c
, i64 pos
)
1569 de_read(b
, pos
, sizeof(b
));
1570 if(b
[2]!='-' || b
[3]!='l'|| b
[4]!='h' || b
[6]!='-') return 0;
1571 if(b
[21]!=0) return 0;
1575 static int do_arx_member(deark
*c
, struct arx_ctx
*d
, struct arx_member_data
*md
)
1577 i64 lev0_header_size
;
1579 i64 compressed_data_len
;
1581 i64 pos1
= md
->member_pos
;
1582 UI hdr_checksum_calc
= 0;
1583 int is_uncompressed
= 0;
1585 int saved_indent_level
;
1587 de_dbg_indent_save(c
, &saved_indent_level
);
1588 de_dbg(c
, "member at %"I64_FMT
, pos1
);
1589 de_dbg_indent(c
, 1);
1591 lev0_header_size
= (i64
)de_getbyte(pos1
);
1592 de_dbg(c
, "header size: %d", (int)lev0_header_size
);
1593 if(lev0_header_size
<22) goto done
;
1594 hdr_endpos
= pos1
+ 2 + lev0_header_size
;
1596 compressed_data_len
= de_getu32le(pos1
+8);
1597 de_dbg(c
, "compressed size: %"I64_FMT
, compressed_data_len
);
1599 unc_data_len
= de_getu32le(pos1
+12);
1600 de_dbg(c
, "uncmpr. size: %"I64_FMT
, unc_data_len
);
1602 if(compressed_data_len
==0) {
1603 is_uncompressed
= 1;
1604 compressed_data_len
= unc_data_len
;
1607 if(hdr_endpos
+ compressed_data_len
> c
->infile
->len
) goto done
;
1609 // Convert to an LHA header
1610 dbuf_empty(d
->hdr_tmp
);
1612 // Fields through cmpr meth. (We'll patch the checksum, and
1613 // compression method if necessary, later.)
1614 dbuf_copy(c
->infile
, pos1
, 7, d
->hdr_tmp
);
1616 dbuf_writeu32le(d
->hdr_tmp
, compressed_data_len
);
1617 dbuf_writeu32le(d
->hdr_tmp
, unc_data_len
);
1619 /// Rest of the header can be copied as-is.
1620 dbuf_copy(c
->infile
, pos1
+8+8, lev0_header_size
-6-8, d
->hdr_tmp
);
1622 // No source for the low byte of CRC. ARX doesn't save it.
1623 // (The extra byte after the compression method is not it.)
1624 // Until we support lh1 decompression, we have no way to recalculate it.
1625 // TODO: We could recalculate it for uncompressed files.
1626 dbuf_writebyte(d
->hdr_tmp
, 00);
1628 if(is_uncompressed
) {
1629 dbuf_writebyte_at(d
->hdr_tmp
, 5, '0'); // lh1 -> lh0
1632 // Recalculate checksum
1633 dbuf_buffered_read(d
->hdr_tmp
, 2, lev0_header_size
, cksum_cbfn
,
1634 (void*)&hdr_checksum_calc
);
1635 de_dbg(c
, "header checksum (calculated): 0x%02x", hdr_checksum_calc
);
1636 dbuf_writebyte_at(d
->hdr_tmp
, 1, (u8
)hdr_checksum_calc
);
1637 dbuf_truncate(d
->hdr_tmp
, 2+lev0_header_size
);
1639 // Write everything out
1640 dbuf_copy(d
->hdr_tmp
, 0, d
->hdr_tmp
->len
, d
->lha_outf
);
1641 de_dbg(c
, "member data at %"I64_FMT
", len=%"I64_FMT
, hdr_endpos
, compressed_data_len
);
1642 dbuf_copy(c
->infile
, hdr_endpos
, compressed_data_len
, d
->lha_outf
);
1643 md
->total_size
= 2 + lev0_header_size
+ compressed_data_len
;
1647 de_dbg_indent_restore(c
, saved_indent_level
);
1651 static void de_run_arx(deark
*c
, de_module_params
*mparams
)
1653 struct arx_ctx
*d
= NULL
;
1654 struct arx_member_data
*md
= NULL
;
1658 d
= de_malloc(c
, sizeof(struct arx_ctx
));
1660 if(!looks_like_arx_member(c
, 0)) {
1661 de_err(c
, "Not an ARX file");
1665 d
->lha_outf
= dbuf_create_output_file(c
, "lha", NULL
, 0);
1666 d
->hdr_tmp
= dbuf_create_membuf(c
, 0, 0);
1668 md
= de_malloc(c
, sizeof(struct arx_member_data
));
1670 if(de_getbyte(pos
)==0) {
1671 de_dbg(c
, "trailer at %"I64_FMT
, pos
);
1672 dbuf_writebyte(d
->lha_outf
, 0);
1676 if(pos
+27 > c
->infile
->len
) goto done
;
1677 if(!looks_like_arx_member(c
, pos
)) goto done
;
1679 de_zeromem(md
, sizeof(struct arx_member_data
));
1680 md
->member_pos
= pos
;
1681 if(!do_arx_member(c
, d
, md
)) goto done
;
1682 pos
+= md
->total_size
;
1689 dbuf_close(d
->lha_outf
);
1691 de_info(c
, "Note: Conversion from ARX to LHA is not fully implemented. "
1692 "The CRC fields will be incorrect.");
1695 de_err(c
, "Conversion to LHA format failed");
1698 dbuf_close(d
->hdr_tmp
);
1703 static int de_identify_arx(deark
*c
)
1705 if(dbuf_memcmp(c
->infile
, 2, "-lh1-", 5)) return 0;
1706 if(de_getbyte(20)!=0x20 || de_getbyte(21)!=0x00) return 0;
1707 if(de_input_file_has_ext(c
, "arx")) return 100;
1711 void de_module_arx(deark
*c
, struct deark_module_info
*mi
)
1714 mi
->desc
= "ARX LHA-like archive";
1715 mi
->run_fn
= de_run_arx
;
1716 mi
->identify_fn
= de_identify_arx
;