1 // This file is part of Deark.
2 // Copyright (C) 2017 Jason Summers
3 // See the file COPYING for terms of use.
5 // LHA/LZH compressed archive format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_lha
);
11 DE_DECLARE_MODULE(de_module_swg
);
12 DE_DECLARE_MODULE(de_module_car_lha
);
13 DE_DECLARE_MODULE(de_module_arx
);
15 #define MAX_SUBDIR_LEVEL 32
17 #define CODE_S_LH0 0x204c4830 // SAR
18 #define CODE_S_LH5 0x204c4835 // SAR
19 #define CODE_ah0 0x2d616830U // MAR
20 #define CODE_ari 0x2d617269U // MAR
21 #define CODE_hf0 0x2d686630U // MAR
22 #define CODE_lZ0 0x2d6c5a30U // PUT
23 #define CODE_lZ1 0x2d6c5a31U // PUT
24 #define CODE_lZ5 0x2d6c5a35U // PUT
25 #define CODE_lh0 0x2d6c6830U
26 #define CODE_lh1 0x2d6c6831U
27 #define CODE_lh2 0x2d6c6832U
28 #define CODE_lh3 0x2d6c6833U
29 #define CODE_lh4 0x2d6c6834U
30 #define CODE_lh5 0x2d6c6835U
31 #define CODE_lh6 0x2d6c6836U
32 #define CODE_lh7 0x2d6c6837U // standard, or LHARK
33 #define CODE_lh8 0x2d6c6838U
34 #define CODE_lh9 0x2d6c6839U
35 #define CODE_lha 0x2d6c6861U
36 #define CODE_lhb 0x2d6c6862U
37 #define CODE_lhc 0x2d6c6863U
38 #define CODE_lhd 0x2d6c6864U
39 #define CODE_lhe 0x2d6c6865U
40 #define CODE_lhx 0x2d6c6878U
41 #define CODE_lx1 0x2d6c7831U
42 #define CODE_lz2 0x2d6c7a32U
43 #define CODE_lz3 0x2d6c7a33U
44 #define CODE_lz4 0x2d6c7a34U
45 #define CODE_lz5 0x2d6c7a35U
46 #define CODE_lz7 0x2d6c7a37U
47 #define CODE_lz8 0x2d6c7a38U
48 #define CODE_lzs 0x2d6c7a73U
49 #define CODE_pc1 0x2d706331U
50 #define CODE_pm0 0x2d706d30U
51 #define CODE_pm1 0x2d706d31U
52 #define CODE_pm2 0x2d706d32U
53 #define CODE_sw0 0x2d737730U
54 #define CODE_sw1 0x2d737731U
56 #define TIMESTAMPIDX_INVALID (-1)
57 struct timestamp_data
{
58 struct de_timestamp ts
; // The best timestamp of this type found so far
62 struct cmpr_meth_info
;
65 u8 hlev
; // header level
69 struct cmpr_meth_info
*cmi
;
77 u8 have_hdr_crc_reported
;
80 i64 hdr_crc_field_pos
;
84 i64 compressed_data_pos
; // relative to beginning of file
85 i64 compressed_data_len
;
87 de_ucstring
*filename
;
88 de_ucstring
*fullfilename
;
89 struct timestamp_data tsdata
[DE_TIMESTAMPIDX_COUNT
];
92 typedef struct localctx_struct
{
93 de_encoding input_encoding
;
94 u8 hlev_of_first_member
;
102 struct de_crcobj
*crco
;
105 typedef void (*decompressor_fn
)(deark
*c
, lctx
*d
, struct member_data
*md
,
106 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
107 struct de_dfilter_results
*dres
);
109 struct cmpr_meth_info
{
112 decompressor_fn decompressor
;
114 char id_printable_sz
[6];
118 struct exthdr_type_info_struct
;
120 typedef void (*exthdr_decoder_fn
)(deark
*c
, lctx
*d
, struct member_data
*md
,
121 u8 id
, const struct exthdr_type_info_struct
*e
,
124 struct exthdr_type_info_struct
{
128 exthdr_decoder_fn decoder_fn
;
131 static int lha_isdigit(u8 x
)
133 return (x
>='0' && x
<='9');
136 static int lha_isalpha(u8 x
)
138 return ((x
>='A' && x
<='Z') || (x
>='a' && x
<='z'));
141 static int lha_isalnum(u8 x
)
143 return (lha_isdigit(x
) || lha_isalpha(x
));
146 static int is_possible_cmpr_meth(const u8 m
[5])
148 if(m
[0]!=m
[4]) return 0;
149 if(m
[0]==' ' && m
[1]=='L' && m
[2]=='H' && lha_isdigit(m
[3])) return 1;
150 if(m
[0]!='-') return 0;
151 if(!lha_isalpha(m
[1]) ||
152 !lha_isalnum(m
[2]) ||
160 static void apply_timestamp(deark
*c
, lctx
*d
, struct member_data
*md
,
161 int tsidx
, const struct de_timestamp
*ts
, int quality
)
163 if(!ts
->is_valid
) return;
164 if(tsidx
<0 || tsidx
>=DE_TIMESTAMPIDX_COUNT
) return;
165 if(quality
< md
->tsdata
[tsidx
].quality
) return;
166 md
->tsdata
[tsidx
].ts
= *ts
;
167 md
->tsdata
[tsidx
].quality
= quality
;
170 static void read_msdos_modtime(deark
*c
, lctx
*d
, struct member_data
*md
,
171 i64 pos
, const char *name
)
173 i64 mod_time_raw
, mod_date_raw
;
174 char timestamp_buf
[64];
175 struct de_timestamp tmp_timestamp
;
177 mod_time_raw
= de_getu16le(pos
);
178 mod_date_raw
= de_getu16le(pos
+2);
179 if(mod_time_raw
==0 && mod_date_raw
==0) {
180 de_dbg(c
, "%s: (not set)", name
);
183 de_dos_datetime_to_timestamp(&tmp_timestamp
, mod_date_raw
, mod_time_raw
);
184 tmp_timestamp
.tzcode
= DE_TZCODE_LOCAL
;
185 de_timestamp_to_string(&tmp_timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
186 de_dbg(c
, "%s: %s", name
, timestamp_buf
);
187 apply_timestamp(c
, d
, md
, DE_TIMESTAMPIDX_MODIFY
, &tmp_timestamp
, 10);
190 static void read_windows_FILETIME(deark
*c
, lctx
*d
, struct member_data
*md
,
191 i64 pos
, int tsidx
, const char *name
)
194 char timestamp_buf
[64];
195 struct de_timestamp tmp_timestamp
;
197 t_FILETIME
= de_geti64le(pos
);
198 de_FILETIME_to_timestamp(t_FILETIME
, &tmp_timestamp
, 0x1);
199 if(t_FILETIME
<=0) tmp_timestamp
.is_valid
= 0;
200 de_timestamp_to_string(&tmp_timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
201 de_dbg(c
, "%s: %"I64_FMT
" (%s)", name
, t_FILETIME
, timestamp_buf
);
202 apply_timestamp(c
, d
, md
, tsidx
, &tmp_timestamp
, 90);
205 static void read_unix_timestamp(deark
*c
, lctx
*d
, struct member_data
*md
,
206 i64 pos
, int tsidx
, const char *name
)
209 char timestamp_buf
[64];
210 struct de_timestamp tmp_timestamp
;
212 t
= de_geti32le(pos
);
213 de_unix_time_to_timestamp(t
, &tmp_timestamp
, 0x1);
214 de_timestamp_to_string(&tmp_timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
215 de_dbg(c
, "%s: %d (%s)", name
, (int)t
, timestamp_buf
);
216 apply_timestamp(c
, d
, md
, tsidx
, &tmp_timestamp
, 50);
219 static void rp_add_component(deark
*c
, lctx
*d
, struct member_data
*md
,
220 dbuf
*f
, i64 pos
, i64 len
, struct de_strarray
*sa
, de_ucstring
*tmpstr
)
223 ucstring_empty(tmpstr
);
224 dbuf_read_to_ucstring(f
, pos
, len
, tmpstr
, 0, md
->encoding
);
225 de_strarray_push(sa
, tmpstr
);
228 static void read_path_to_strarray(deark
*c
, lctx
*d
, struct member_data
*md
,
229 dbuf
*inf
, i64 pos
, i64 len
, struct de_strarray
*sa
, int is_exthdr_dirname
)
231 dbuf
*tmpdbuf
= NULL
;
232 de_ucstring
*tmpstr
= NULL
;
233 i64 component_startpos
;
237 tmpstr
= ucstring_create(c
);
239 tmpdbuf
= dbuf_create_membuf(c
, len
, 0);
240 dbuf_copy(inf
, pos
, len
, tmpdbuf
);
242 component_startpos
= 0;
245 for(i
=0; i
<len
; i
++) {
248 ch
= dbuf_getbyte(tmpdbuf
, i
);
249 if(ch
==0x00) break; // Tolerate NUL termination
250 if((is_exthdr_dirname
&& ch
==0xff) ||
251 (!is_exthdr_dirname
&& (ch
=='\\' || ch
=='/')))
253 component_len
= i
- component_startpos
;
254 rp_add_component(c
, d
, md
, tmpdbuf
, component_startpos
, component_len
, sa
, tmpstr
);
255 component_startpos
= i
+1;
262 rp_add_component(c
, d
, md
, tmpdbuf
, component_startpos
, component_len
, sa
, tmpstr
);
265 ucstring_destroy(tmpstr
);
268 static void read_filename_hlev0(deark
*c
, lctx
*d
, struct member_data
*md
,
271 struct de_strarray
*sa
= NULL
;
274 ucstring_empty(md
->filename
);
277 md
->filename
= ucstring_create(c
);
280 sa
= de_strarray_create(c
, MAX_SUBDIR_LEVEL
+2);
281 read_path_to_strarray(c
, d
, md
, c
->infile
, pos
, len
, sa
, 0);
283 de_strarray_make_path(sa
, md
->filename
, DE_MPFLAG_NOTRAILINGSLASH
);
284 de_dbg(c
, "filename (parsed): \"%s\"", ucstring_getpsz_d(md
->filename
));
286 de_strarray_destroy(sa
);
289 static void read_filename_hlev1_or_exthdr(deark
*c
, lctx
*d
, struct member_data
*md
,
295 ucstring_empty(md
->filename
);
298 md
->filename
= ucstring_create(c
);
301 // Some files seem to assume NUL termination is allowed.
302 dbuf_read_to_ucstring(c
->infile
, pos
, len
,
303 md
->filename
, DE_CONVFLAG_STOP_AT_NUL
, md
->encoding
);
304 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(md
->filename
));
306 // I don't think slashes are allowed
307 for(i
=0; i
<md
->filename
->len
; i
++) {
308 if(md
->filename
->str
[i
]=='/') {
309 md
->filename
->str
[i
]='_';
314 static void exthdr_common(deark
*c
, lctx
*d
, struct member_data
*md
,
315 u8 id
, const struct exthdr_type_info_struct
*e
,
319 md
->hdr_crc_reported
= (u32
)de_getu16le(pos
);
320 md
->have_hdr_crc_reported
= 1;
321 md
->hdr_crc_field_pos
= pos
;
322 de_dbg(c
, "header crc (reported): 0x%04x", (unsigned int)md
->hdr_crc_reported
);
323 // TODO: Additional information
326 static void exthdr_filename(deark
*c
, lctx
*d
, struct member_data
*md
,
327 u8 id
, const struct exthdr_type_info_struct
*e
,
330 read_filename_hlev1_or_exthdr(c
, d
, md
, pos
, dlen
);
333 static void exthdr_dirname(deark
*c
, lctx
*d
, struct member_data
*md
,
334 u8 id
, const struct exthdr_type_info_struct
*e
,
337 struct de_strarray
*dirname_sa
= NULL
;
340 ucstring_empty(md
->dirname
);
343 md
->dirname
= ucstring_create(c
);
346 dirname_sa
= de_strarray_create(c
, MAX_SUBDIR_LEVEL
+2);
347 // 0xff is used as the path separator. Don't know what happens if a directory
348 // name contains an actual 0xff byte.
349 read_path_to_strarray(c
, d
, md
, c
->infile
, pos
, dlen
, dirname_sa
, 1);
350 de_strarray_make_path(dirname_sa
, md
->dirname
, DE_MPFLAG_NOTRAILINGSLASH
);
351 de_dbg(c
, "%s (parsed): \"%s\"", e
->name
, ucstring_getpsz_d(md
->dirname
));
353 de_strarray_destroy(dirname_sa
);
356 static void exthdr_msdosattribs(deark
*c
, lctx
*d
, struct member_data
*md
,
357 u8 id
, const struct exthdr_type_info_struct
*e
,
361 de_ucstring
*descr
= NULL
;
363 if(dlen
<2) goto done
;
364 attribs
= (u32
)de_getu16le(pos
);
365 descr
= ucstring_create(c
);
366 de_describe_dos_attribs(c
, (UI
)attribs
, descr
, 0);
367 de_dbg(c
, "%s: 0x%04x (%s)", e
->name
, (UI
)attribs
, ucstring_getpsz_d(descr
));
369 ucstring_destroy(descr
);
372 static void exthdr_filesize(deark
*c
, lctx
*d
, struct member_data
*md
,
373 u8 id
, const struct exthdr_type_info_struct
*e
,
376 // TODO: Support this
377 de_warn(c
, "Unsupported \"file size\" extended header found. This may prevent "
378 "the rest of the file from being processed correctly.");
381 static void exthdr_windowstimestamp(deark
*c
, lctx
*d
, struct member_data
*md
,
382 u8 id
, const struct exthdr_type_info_struct
*e
,
386 read_windows_FILETIME(c
, d
, md
, pos
, DE_TIMESTAMPIDX_CREATE
, "create time");
387 read_windows_FILETIME(c
, d
, md
, pos
+8, DE_TIMESTAMPIDX_MODIFY
, "mod time ");
388 read_windows_FILETIME(c
, d
, md
, pos
+16, DE_TIMESTAMPIDX_ACCESS
, "access time");
391 static void interpret_unix_perms(deark
*c
, lctx
*d
, struct member_data
*md
, unsigned int mode
)
393 if(mode
& 0100000) { // regular file
394 if(mode
& 0111) { // executable
395 md
->is_executable
= 1;
398 md
->is_nonexecutable
= 1;
402 if((mode
& 0170000) == 0120000) {
403 md
->is_special
= 1; // symlink
407 static void exthdr_unixperms(deark
*c
, lctx
*d
, struct member_data
*md
,
408 u8 id
, const struct exthdr_type_info_struct
*e
,
414 mode
= (unsigned int)de_getu16le(pos
);
415 de_dbg(c
, "mode: octal(%06o)", mode
);
416 interpret_unix_perms(c
, d
, md
, mode
);
419 static void exthdr_unixuidgid(deark
*c
, lctx
*d
, struct member_data
*md
,
420 u8 id
, const struct exthdr_type_info_struct
*e
,
426 // It's strange that the GID comes first, while the UID comes first in the
427 // level-0 "extended area".
428 gid
= de_getu16le(pos
);
429 de_dbg(c
, "gid: %d", (int)gid
);
430 uid
= de_getu16le(pos
+2);
431 de_dbg(c
, "uid: %d", (int)uid
);
434 static void exthdr_unixtimestamp(deark
*c
, lctx
*d
, struct member_data
*md
,
435 u8 id
, const struct exthdr_type_info_struct
*e
,
439 read_unix_timestamp(c
, d
, md
, pos
, DE_TIMESTAMPIDX_MODIFY
, "last-modified");
442 static void exthdr_lev3newattribs2(deark
*c
, lctx
*d
, struct member_data
*md
,
443 u8 id
, const struct exthdr_type_info_struct
*e
,
451 // [Documented as "creation time", but this is a Unix-style header, so I
452 // wonder if someone mistranslated "ctime" (=change time).]
453 read_unix_timestamp(c
, d
, md
, pos
+12, TIMESTAMPIDX_INVALID
, "create(?) time");
455 read_unix_timestamp(c
, d
, md
, pos
+16, DE_TIMESTAMPIDX_ACCESS
, "access time ");
458 static void exthdr_codepage(deark
*c
, lctx
*d
, struct member_data
*md
,
459 u8 id
, const struct exthdr_type_info_struct
*e
,
463 de_encoding n_encoding
;
467 n_codepage
= (int)de_geti32le(pos
);
468 n_encoding
= de_windows_codepage_to_encoding(c
, n_codepage
, descr
, sizeof(descr
), 0);
469 de_dbg(c
, "codepage: %d (%s)", n_codepage
, descr
);
470 if(n_encoding
!= DE_ENCODING_UNKNOWN
) {
471 md
->encoding
= n_encoding
;
475 static const struct exthdr_type_info_struct exthdr_type_info_arr
[] = {
476 { 0x00, 0, "common", exthdr_common
},
477 { 0x01, 0, "filename", exthdr_filename
},
478 { 0x02, 0, "dir name", exthdr_dirname
},
479 { 0x39, 0, "multi-disc", NULL
},
480 { 0x3f, 0, "comment", NULL
},
481 { 0x40, 0, "MS-DOS file attribs", exthdr_msdosattribs
},
482 { 0x41, 0, "Windows timestamp", exthdr_windowstimestamp
},
483 { 0x42, 0, "MS-DOS file size", exthdr_filesize
},
484 { 0x43, 0, "time zone", NULL
},
485 { 0x44, 0, "UTF-16 filename", NULL
},
486 { 0x45, 0, "UTF-16 dir name", NULL
},
487 { 0x46, 0, "codepage", exthdr_codepage
},
488 { 0x50, 0, "Unix perms", exthdr_unixperms
},
489 { 0x51, 0, "Unix UID/GID", exthdr_unixuidgid
},
490 { 0x52, 0, "Unix group name", NULL
},
491 { 0x53, 0, "Unix username", NULL
},
492 { 0x54, 0, "Unix timestamp", exthdr_unixtimestamp
},
493 { 0x7d, 0, "capsule", NULL
},
494 { 0x7e, 0, "OS/2 extended attribs", NULL
},
495 { 0x7f, 0, "level 3 new attribs type-1", NULL
}, // (OS/2 only)
496 { 0xff, 0, "level 3 new attribs type-2", exthdr_lev3newattribs2
}
499 static void destroy_member_data(deark
*c
, struct member_data
*md
)
502 ucstring_destroy(md
->dirname
);
503 ucstring_destroy(md
->filename
);
504 ucstring_destroy(md
->fullfilename
);
509 static const struct exthdr_type_info_struct
*get_exthdr_type_info(u8 id
)
513 for(i
=0; i
<DE_ARRAYCOUNT(exthdr_type_info_arr
); i
++) {
514 if(id
== exthdr_type_info_arr
[i
].id
) {
515 return &exthdr_type_info_arr
[i
];
521 static void do_read_ext_header(deark
*c
, lctx
*d
, struct member_data
*md
,
522 i64 pos1
, i64 len
, i64 dlen
)
526 const struct exthdr_type_info_struct
*e
= NULL
;
529 id
= de_getbyte(pos1
);
530 e
= get_exthdr_type_info(id
);
532 name
= e
? e
->name
: "?";
534 de_dbg(c
, "ext header at %d, len=%d (1+%d+%d), id=0x%02x (%s)", (int)pos1
, (int)len
,
535 (int)(dlen
-1), (int)(len
-dlen
), (unsigned int)id
, name
);
537 if(dlen
<1) return; // Invalid header, too short to even have an id field
539 if(e
&& e
->decoder_fn
) {
541 e
->decoder_fn(c
, d
, md
, id
, e
, pos1
+1, dlen
-1);
542 de_dbg_indent(c
, -1);
545 if(c
->debug_level
>=2) {
546 de_dbg_hexdump(c
, c
->infile
, pos1
+1, dlen
-1, 256, NULL
, 0x1);
551 static const char *get_os_name(u8 id
)
553 const char *name
= NULL
;
555 case ' ': name
="unspecified"; break;
556 case '2': name
="OS/2"; break;
557 case '3': name
="OS/386?"; break;
558 case '9': name
="OS-9"; break;
559 case 'A': name
="Amiga"; break;
560 case 'C': name
="CP/M"; break;
561 case 'F': name
="FLEX"; break;
562 case 'H': name
="Human68K"; break;
563 case 'J': name
="JVM"; break;
564 case 'K': name
="OS-9/68K"; break;
565 case 'M': name
="DOS"; break;
566 case 'R': name
="RUNser"; break;
567 case 'T': name
="TownsOS"; break;
568 case 'U': name
="Unix"; break;
569 case 'W': name
="Windows NT"; break;
570 case 'a': name
="Atari ST?"; break;
571 case 'm': name
="Macintosh"; break;
572 case 'w': name
="Windows"; break;
574 return name
?name
:"?";
577 static void do_lev0_ext_area(deark
*c
, lctx
*d
, struct member_data
*md
,
581 md
->os_id
= de_getbyte(pos1
);
582 de_dbg(c
, "OS id: %d ('%c') (%s)", (int)md
->os_id
,
583 de_byte_to_printable_char(md
->os_id
), get_os_name(md
->os_id
));
590 if(len
<12) goto done
;
592 read_unix_timestamp(c
, d
, md
, pos1
+2, DE_TIMESTAMPIDX_MODIFY
, "last-modified");
594 mode
= (unsigned int)de_getu16le(pos1
+6);
595 de_dbg(c
, "mode: octal(%06o)", mode
);
596 interpret_unix_perms(c
, d
, md
, mode
);
598 uid
= de_getu16le(pos1
+8);
599 de_dbg(c
, "uid: %d", (int)uid
);
600 gid
= de_getu16le(pos1
+10);
601 de_dbg(c
, "gid: %d", (int)gid
);
607 // AFAICT, we're expected to think of the extended headers as a kind of linked
608 // list. The last field in each node is the "size of next node" (instead of
609 // "pointer to next node", as a real linked list would have). A size of 0 is
610 // like a "nil" pointer, and marks the end of the list.
611 // The "size of the first node" field (analogous to the "head" pointer) is
612 // conceptually not part of the extended headers section.
614 // Note that if we simply shift our frame of reference, this format is identical
615 // to a more typical length-prefixed format. But our code follows the
616 // linked-list model, to make it more consistent with most LHA documentation,
617 // and the various "size" fields.
619 // A return value of 0 means we failed to calculate the size of the
620 // extended headers segment.
621 static int do_read_ext_headers(deark
*c
, lctx
*d
, struct member_data
*md
,
622 i64 pos1
, i64 len
, i64 first_ext_hdr_size
, i64
*tot_bytes_consumed
)
625 i64 this_ext_hdr_size
, next_ext_hdr_size
;
627 i64 size_of_size_field
;
629 *tot_bytes_consumed
= 0;
631 if(first_ext_hdr_size
==0) {
635 de_dbg(c
, "ext headers section at %d", (int)pos
);
638 size_of_size_field
= (md
->hlev
==3) ? 4 : 2;
640 next_ext_hdr_size
= first_ext_hdr_size
;
642 this_ext_hdr_size
= next_ext_hdr_size
;
643 if(this_ext_hdr_size
==0) {
645 *tot_bytes_consumed
= pos
- pos1
;
648 if(this_ext_hdr_size
<size_of_size_field
) goto done
;
649 if(pos
+this_ext_hdr_size
> pos1
+len
) goto done
;
651 do_read_ext_header(c
, d
, md
, pos
, this_ext_hdr_size
, this_ext_hdr_size
-size_of_size_field
);
653 // Each ext header ends with a "size of next header" field.
654 // We'll read it at this level, instead of in do_read_ext_header().
655 pos
+= this_ext_hdr_size
-size_of_size_field
;
656 if(size_of_size_field
==2) {
657 next_ext_hdr_size
= de_getu16le(pos
);
660 next_ext_hdr_size
= de_getu32le(pos
);
662 pos
+= size_of_size_field
;
667 de_dbg(c
, "size of ext headers section: %d", (int)*tot_bytes_consumed
);
670 de_dbg(c
, "failed to parse all extended headers");
672 de_dbg_indent(c
, -1);
676 static void make_fullfilename(deark
*c
, lctx
*d
, struct member_data
*md
)
678 if(md
->fullfilename
) return;
681 md
->filename
= ucstring_create(c
);
683 md
->fullfilename
= ucstring_create(c
);
686 ucstring_append_ucstring(md
->fullfilename
, md
->filename
);
690 ucstring_append_ucstring(md
->fullfilename
, md
->dirname
);
693 if(ucstring_isnonempty(md
->dirname
)) {
694 ucstring_append_ucstring(md
->fullfilename
, md
->dirname
);
695 ucstring_append_sz(md
->fullfilename
, "/", DE_ENCODING_LATIN1
);
697 if(ucstring_isnonempty(md
->filename
)) {
698 ucstring_append_ucstring(md
->fullfilename
, md
->filename
);
701 ucstring_append_char(md
->fullfilename
, '_');
707 static void decompress_uncompressed(deark
*c
, lctx
*d
, struct member_data
*md
,
708 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
709 struct de_dfilter_results
*dres
)
711 fmtutil_decompress_uncompressed(c
, dcmpri
, dcmpro
, dres
, 0);
714 static void decompress_lh1(deark
*c
, lctx
*d
, struct member_data
*md
,
715 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
716 struct de_dfilter_results
*dres
)
718 fmtutil_lh1_codectype1(c
, dcmpri
, dcmpro
, dres
, NULL
);
721 // Caller supplies fmt (DE_LH5X_FMT_*).
722 static void decompress_lh5x_internal(deark
*c
, lctx
*d
,
723 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
724 struct de_dfilter_results
*dres
, int fmt
)
726 struct de_lh5x_params lzhparams
;
728 de_zeromem(&lzhparams
, sizeof(struct de_lh5x_params
));
730 lzhparams
.zero_codes_block_behavior
= DE_LH5X_ZCB_65536
;
731 lzhparams
.warn_about_zero_codes_block
= 1;
732 lzhparams
.history_fill_val
= 0x20;
733 fmtutil_decompress_lh5x(c
, dcmpri
, dcmpro
, dres
, &lzhparams
);
736 // Compression method will be selected based on id_raw[3] (which
737 // should be '4'...'8'), etc.
738 static void decompress_lh5x_auto(deark
*c
, lctx
*d
, struct member_data
*md
,
739 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
740 struct de_dfilter_results
*dres
)
744 switch(md
->cmi
->id_raw
[3]) {
746 fmt
= DE_LH5X_FMT_LH5
;
749 fmt
= DE_LH5X_FMT_LH6
;
753 fmt
= DE_LH5X_FMT_LHARK
;
756 fmt
= DE_LH5X_FMT_LH7
;
760 fmt
= DE_LH5X_FMT_LH7
;
766 decompress_lh5x_internal(c
, d
, dcmpri
, dcmpro
, dres
, fmt
);
769 static void decompress_lh5(deark
*c
, lctx
*d
, struct member_data
*md
,
770 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
771 struct de_dfilter_results
*dres
)
773 decompress_lh5x_internal(c
, d
, dcmpri
, dcmpro
, dres
, DE_LH5X_FMT_LH5
);
776 static void decompress_lz5(deark
*c
, lctx
*d
, struct member_data
*md
,
777 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
778 struct de_dfilter_results
*dres
)
780 fmtutil_decompress_szdd(c
, dcmpri
, dcmpro
, dres
, 0x1);
783 struct cmpr_meth_array_item
{
787 decompressor_fn decompressor
;
790 // Compression methods with a decompressor or a description are usually
791 // listed here, but note that it is also possible for get_cmpr_meth_info()
792 // to handle them procedurally.
793 static const struct cmpr_meth_array_item cmpr_meth_arr
[] = {
794 { 0x00, CODE_lhd
, "directory", NULL
},
795 { 0x00, CODE_lh0
, "uncompressed", decompress_uncompressed
},
796 { 0x00, CODE_lh1
, "LZ77-4K, adaptive Huffman", decompress_lh1
},
797 { 0x00, CODE_lh4
, NULL
, decompress_lh5x_auto
},
798 { 0x00, CODE_lh5
, "LZ77-8K, static Huffman", decompress_lh5
},
799 { 0x00, CODE_lh6
, "LZ77-32K, static Huffman", decompress_lh5x_auto
},
800 { 0x00, CODE_lh7
, NULL
, decompress_lh5x_auto
},
801 { 0x00, CODE_lh8
, NULL
, decompress_lh5x_auto
},
802 { 0x00, CODE_lz4
, "uncompressed (LArc)", decompress_uncompressed
},
803 { 0x00, CODE_lz5
, "LZSS-4K (LArc)", decompress_lz5
},
804 { 0x00, CODE_pm0
, "uncompressed (PMArc)", decompress_uncompressed
},
805 { 0x00, CODE_lZ0
, "uncompressed (MicroFox PUT)", decompress_uncompressed
},
806 { 0x00, CODE_lZ1
, "MicroFox PUT lZ1", decompress_lh1
},
807 { 0x00, CODE_lZ5
, "MicroFox PUT lZ5", decompress_lh5
},
808 { 0x00, CODE_S_LH0
, "uncompressed (SAR)", decompress_uncompressed
},
809 { 0x00, CODE_S_LH5
, "SAR LH5", decompress_lh5
},
810 { 0x00, CODE_sw0
, NULL
, decompress_uncompressed
},
811 { 0x00, CODE_sw1
, NULL
, NULL
}
814 static const u32 other_known_cmpr_methods
[] = {
815 CODE_ah0
, CODE_ari
, CODE_hf0
,
816 CODE_lh2
, CODE_lh3
, CODE_lh4
, CODE_lh7
, CODE_lh8
, CODE_lh9
,
817 CODE_lha
, CODE_lhb
, CODE_lhc
, CODE_lhe
, CODE_lhx
, CODE_lx1
,
818 CODE_lz2
, CODE_lz3
, CODE_lz7
, CODE_lz8
, CODE_lzs
,
819 CODE_pc1
, CODE_pm1
, CODE_pm2
};
821 // Only call this after is_possible_cmpr_meth() return nonzero.
822 // Caller allocates cmi, and initializes to zeroes.
823 static void get_cmpr_meth_info(const u8 idbuf
[5], struct cmpr_meth_info
*cmi
)
826 const struct cmpr_meth_array_item
*cmai
= NULL
;
828 // The first 4 bytes are unique for all known methods.
829 cmi
->uniq_id
= (u32
)de_getu32be_direct(idbuf
);
831 de_memcpy(cmi
->id_raw
, idbuf
, 5);
833 // All "possible" methods only use printable characters.
834 de_memcpy(cmi
->id_printable_sz
, idbuf
, 5);
835 cmi
->id_printable_sz
[5] = '\0';
837 for(k
=0; k
<DE_ARRAYCOUNT(cmpr_meth_arr
); k
++) {
838 if(cmpr_meth_arr
[k
].uniq_id
== cmi
->uniq_id
) {
839 cmai
= &cmpr_meth_arr
[k
];
845 cmi
->is_recognized
= 1;
846 cmi
->decompressor
= cmai
->decompressor
;
849 for(k
=0; k
<DE_ARRAYCOUNT(other_known_cmpr_methods
); k
++) {
850 if(other_known_cmpr_methods
[k
] == cmi
->uniq_id
) {
851 cmi
->is_recognized
= 1;
857 if(cmai
&& cmai
->descr
) {
858 de_strlcpy(cmi
->descr
, cmai
->descr
, sizeof(cmi
->descr
));
860 else if(cmi
->is_recognized
) {
861 de_strlcpy(cmi
->descr
, "recognized, but no info avail.", sizeof(cmi
->descr
));
864 de_strlcpy(cmi
->descr
, "?", sizeof(cmi
->descr
));
868 static void our_writelistener_cb(dbuf
*f
, void *userdata
, const u8
*buf
, i64 buf_len
)
870 struct de_crcobj
*crco
= (struct de_crcobj
*)userdata
;
871 de_crcobj_addbuf(crco
, buf
, buf_len
);
874 static void do_extract_file(deark
*c
, lctx
*d
, struct member_data
*md
)
880 u8 dcmpr_disabled
= 0;
881 u8 dcmpr_attempted
= 0;
883 struct de_dfilter_in_params dcmpri
;
884 struct de_dfilter_out_params dcmpro
;
885 struct de_dfilter_results dres
;
887 if(!md
->cmi
) goto done
;
890 de_dbg(c
, "[not extracting special file]");
893 else if(md
->is_dir
) {
896 else if((!md
->cmi
->decompressor
) || dcmpr_disabled
) {
897 de_err(c
, "%s: Unsupported compression method '%s'",
898 ucstring_getpsz_d(md
->fullfilename
), md
->cmi
->id_printable_sz
);
902 fi
= de_finfo_create(c
);
904 for(tsidx
=0; tsidx
<DE_TIMESTAMPIDX_COUNT
; tsidx
++) {
905 if(md
->tsdata
[tsidx
].ts
.is_valid
) {
906 fi
->timestamp
[tsidx
] = md
->tsdata
[tsidx
].ts
;
911 fi
->is_directory
= 1;
913 else if(md
->is_executable
) {
914 fi
->mode_flags
|= DE_MODEFLAG_EXE
;
916 else if(md
->is_nonexecutable
) {
917 fi
->mode_flags
|= DE_MODEFLAG_NONEXE
;
920 de_finfo_set_name_from_ucstring(c
, fi
, md
->fullfilename
, DE_SNFLAG_FULLPATH
);
921 fi
->original_filename_flag
= 1;
923 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0x0);
924 de_crcobj_reset(d
->crco
);
925 dbuf_set_writelistener(outf
, our_writelistener_cb
, (void*)d
->crco
);
927 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
928 dcmpri
.f
= c
->infile
;
929 dcmpri
.pos
= md
->compressed_data_pos
;
930 dcmpri
.len
= md
->compressed_data_len
;
932 dcmpro
.expected_len
= md
->orig_size
;
933 dcmpro
.len_known
= 1;
935 if(md
->is_dir
) goto done
; // For directories, we're done.
938 if(md
->cmi
->decompressor
) {
939 md
->cmi
->decompressor(c
, d
, md
, &dcmpri
, &dcmpro
, &dres
);
943 de_err(c
, "%s: Decompression failed: %s", ucstring_getpsz_d(md
->fullfilename
),
944 de_dfilter_get_errmsg(c
, &dres
));
948 crc_calc
= de_crcobj_getval(d
->crco
);
949 de_dbg(c
, "crc (calculated): 0x%04x", (unsigned int)crc_calc
);
950 if(crc_calc
!= md
->crc16
) {
951 de_err(c
, "%s: CRC check failed", ucstring_getpsz_d(md
->fullfilename
));
958 if(dcmpr_attempted
&& md
->cmi
&& md
->cmi
->uniq_id
==CODE_lh7
) {
960 d
->lh7_success_flag
= 1;
962 d
->lh7_failed_flag
= 1;
965 de_finfo_destroy(c
, fi
);
968 static int cksum_cbfn(struct de_bufferedreadctx
*brctx
, const u8
*buf
, i64 buf_len
)
970 UI
*pcksum
= (UI
*)brctx
->userdata
;
973 for(i
=0; i
<buf_len
; i
++) {
974 *pcksum
= (*pcksum
+ buf
[i
]) & 0xff;
979 static void do_check_header_crc(deark
*c
, lctx
*d
, struct member_data
*md
)
981 // LHA members don't have to have a header CRC field, though it's probably
982 // considered best practice to have one when the checksum field doesn't
983 // exist, or there are any extended headers.
984 if(!md
->have_hdr_crc_reported
) return;
985 de_crcobj_reset(d
->crco
);
987 // Everything before the CRC field:
988 de_crcobj_addslice(d
->crco
, c
->infile
, md
->member_pos
,
989 md
->hdr_crc_field_pos
- md
->member_pos
);
991 // The zeroed-out CRC field:
992 de_crcobj_addzeroes(d
->crco
, 2);
994 // Everything after the CRC field:
995 de_crcobj_addslice(d
->crco
, c
->infile
, md
->hdr_crc_field_pos
+2,
996 md
->compressed_data_pos
- (md
->hdr_crc_field_pos
+2));
998 md
->hdr_crc_calc
= de_crcobj_getval(d
->crco
);
999 de_dbg(c
, "header crc (calculated): 0x%04x", (UI
)md
->hdr_crc_calc
);
1000 if(md
->hdr_crc_calc
!= md
->hdr_crc_reported
) {
1001 de_err(c
, "Wrong header CRC: reported=0x%04x, calculated=0x%04x",
1002 (UI
)md
->hdr_crc_reported
, (UI
)md
->hdr_crc_calc
);
1006 enum lha_whats_next_enum
{
1009 LHA_WN_TRAILER_AND_JUNK
, // Note: No longer handled differently from TRAILER
1014 static enum lha_whats_next_enum
lha_classify_whats_next(deark
*c
, lctx
*d
, i64 pos
, i64 len
)
1019 if(len
<=0) return LHA_WN_NOTHING
;
1020 b
[0] = de_getbyte(pos
);
1021 if(b
[0]==0 && len
<=2) return LHA_WN_TRAILER
;
1022 if(b
[0]==0 && len
<21) return LHA_WN_TRAILER_AND_JUNK
;
1023 de_read(&b
[1], pos
+1, sizeof(b
)-1);
1024 if(d
->swg_fmt
) hlev
= 0;
1026 if(b
[0]==0 && b
[1]==0) return LHA_WN_TRAILER_AND_JUNK
;
1027 if(b
[0]==0 && hlev
!=2) return LHA_WN_TRAILER_AND_JUNK
;
1028 if(hlev
>3) return LHA_WN_JUNK
;
1029 if(is_possible_cmpr_meth(&b
[2])) return LHA_WN_MEMBER
;
1033 static void do_swg_string_field(deark
*c
, lctx
*d
,
1034 de_ucstring
*s
, i64 pos
, i64 fldlen
, const char *name
)
1036 i64 dlen
= (i64
)de_getbyte(pos
);
1037 if(dlen
>fldlen
-1) dlen
= fldlen
-1;
1039 dbuf_read_to_ucstring(c
->infile
, pos
+1, dlen
, s
, 0, d
->input_encoding
);
1040 ucstring_strip_trailing_spaces(s
);
1041 de_dbg(c
, "SWG %s: \"%s\"", name
, ucstring_getpsz_d(s
));
1044 static void do_special_swg_fields(deark
*c
, lctx
*d
, struct member_data
*md
, i64 pos1
)
1048 de_ucstring
*s
= NULL
;
1050 crc32
= (u32
)de_getu32le_p(&pos
);
1051 de_dbg(c
, "SWG crc32 (reported): 0x%08x", (UI
)crc32
);
1052 s
= ucstring_create(c
);
1053 do_swg_string_field(c
, d
, s
, pos
, 13, "stored filename");
1055 do_swg_string_field(c
, d
, s
, pos
, 41, "subject");
1057 do_swg_string_field(c
, d
, s
, pos
, 36, "contributor");
1059 do_swg_string_field(c
, d
, s
, pos
, 71, "search keys");
1060 ucstring_destroy(s
);
1063 // This single function parses all the different header formats, using lots of
1064 // "if" statements. It is messy, but it's a no-win situation.
1065 // The alternative of four separate functions would be have a lot of redundant
1066 // code, and be harder to maintain.
1068 // Caller allocates and initializes md.
1069 // If the member was successfully parsed, sets md->total_size and returns nonzero.
1070 static int do_read_member(deark
*c
, lctx
*d
, struct member_data
*md
)
1073 i64 lev0_header_size
= 0;
1074 i64 lev1_base_header_size
= 0;
1075 i64 lev1_skip_size
= 0;
1076 i64 lev2_total_header_size
= 0;
1077 i64 lev3_header_size
= 0;
1078 i64 pos1
= md
->member_pos
;
1081 i64 exthdr_bytes_consumed
= 0;
1084 UI hdr_checksum_reported
= 0;
1085 u8 has_hdr_checksum
= 0;
1088 enum lha_whats_next_enum wn
;
1089 u8 cmpr_meth_raw
[5];
1090 int saved_indent_level
;
1092 de_dbg_indent_save(c
, &saved_indent_level
);
1094 nbytes_avail
= c
->infile
->len
- pos1
;
1095 wn
= lha_classify_whats_next(c
, d
, pos1
, nbytes_avail
);
1096 if(wn
!=LHA_WN_MEMBER
) {
1097 if(d
->member_count
==0) {
1098 de_err(c
, "Not an LHA file");
1100 else if(wn
==LHA_WN_TRAILER
|| wn
==LHA_WN_TRAILER_AND_JUNK
) {
1101 d
->trailer_found
= 1;
1102 d
->trailer_pos
= pos1
;
1103 de_dbg(c
, "trailer at %"I64_FMT
, d
->trailer_pos
);
1105 else if(wn
==LHA_WN_JUNK
) {
1106 de_warn(c
, "%"I64_FMT
" bytes of non-LHA data found at end of file (offset %"I64_FMT
")",
1107 nbytes_avail
, pos1
);
1112 de_dbg(c
, "member at %"I64_FMT
, pos1
);
1113 de_dbg_indent(c
, 1);
1115 // Look ahead to figure out the header format version.
1116 // This byte was originally the high byte of the "MS-DOS file attribute" field,
1117 // which happened to always be zero.
1118 // In later LHA versions, it is overloaded to identify the header format
1119 // version (called "header level" in LHA jargon).
1121 md
->hlev
= 0; // SWG is most similar to header level 0
1124 md
->hlev
= de_getbyte(pos1
+20);
1126 de_dbg(c
, "header level: %d", (int)md
->hlev
);
1128 goto done
; // Shouldn't be possible; checked in lha_classify_whats_next().
1131 if(d
->member_count
==0) {
1132 d
->hlev_of_first_member
= md
->hlev
;
1136 lev0_header_size
= (i64
)de_getbyte_p(&pos
);
1137 de_dbg(c
, "header size: (2+)%d", (int)lev0_header_size
);
1138 hdr_checksum_reported
= (UI
)de_getbyte_p(&pos
);
1139 has_hdr_checksum
= 1;
1140 dbuf_buffered_read(c
->infile
, pos
, lev0_header_size
, cksum_cbfn
, (void*)&md
->hdr_checksum_calc
);
1142 else if(md
->hlev
==1) {
1143 lev1_base_header_size
= (i64
)de_getbyte_p(&pos
);
1144 de_dbg(c
, "base header size: %d", (int)lev1_base_header_size
);
1145 hdr_checksum_reported
= (UI
)de_getbyte_p(&pos
);
1146 has_hdr_checksum
= 1;
1147 dbuf_buffered_read(c
->infile
, pos
, lev1_base_header_size
, cksum_cbfn
, (void*)&md
->hdr_checksum_calc
);
1149 else if(md
->hlev
==2) {
1150 lev2_total_header_size
= de_getu16le_p(&pos
);
1151 de_dbg(c
, "total header size: %d", (int)lev2_total_header_size
);
1153 else if(md
->hlev
==3) {
1155 lev3_word_size
= de_getu16le_p(&pos
);
1156 de_dbg(c
, "word size: %d", (int)lev3_word_size
);
1157 if(lev3_word_size
!=4) {
1158 de_err(c
, "Unsupported word size: %d", (int)lev3_word_size
);
1163 if(has_hdr_checksum
) {
1164 de_dbg(c
, "header checksum (reported): 0x%02x", hdr_checksum_reported
);
1165 de_dbg(c
, "header checksum (calculated): 0x%02x", md
->hdr_checksum_calc
);
1166 if(md
->hdr_checksum_calc
!= hdr_checksum_reported
) {
1167 de_err(c
, "Wrong header checksum: reported=0x%02x, calculated=0x%02x",
1168 hdr_checksum_reported
, md
->hdr_checksum_calc
);
1172 de_read(cmpr_meth_raw
, pos
, 5);
1173 md
->cmi
= de_malloc(c
, sizeof(struct cmpr_meth_info
));
1174 get_cmpr_meth_info(cmpr_meth_raw
, md
->cmi
);
1175 de_dbg(c
, "cmpr method: '%s' (%s)", md
->cmi
->id_printable_sz
, md
->cmi
->descr
);
1178 if(md
->cmi
->uniq_id
== CODE_lhd
) {
1182 else if(md
->cmi
->decompressor
== decompress_uncompressed
) {
1190 // lev1_skip_size is the distance from the third byte of the extended
1191 // header section, to the end of the compressed data.
1192 lev1_skip_size
= de_getu32le_p(&pos
);
1193 de_dbg(c
, "skip size: %u", (unsigned int)lev1_skip_size
);
1194 md
->total_size
= 2 + lev1_base_header_size
+ lev1_skip_size
;
1197 md
->compressed_data_len
= de_getu32le(pos
);
1198 de_dbg(c
, "compressed size: %"I64_FMT
, md
->compressed_data_len
);
1202 md
->total_size
= 2 + lev0_header_size
+ md
->compressed_data_len
;
1204 else if(md
->hlev
==2) {
1205 md
->total_size
= lev2_total_header_size
+ md
->compressed_data_len
;
1209 md
->orig_size
= de_getu32le(pos
);
1210 de_dbg(c
, "original size: %u", (unsigned int)md
->orig_size
);
1213 if(md
->hlev
==0 || md
->hlev
==1) {
1214 read_msdos_modtime(c
, d
, md
, pos
, "last-modified");
1215 pos
+= 4; // modification time/date (MS-DOS)
1217 else if(md
->hlev
==2 || md
->hlev
==3) {
1218 read_unix_timestamp(c
, d
, md
, pos
, DE_TIMESTAMPIDX_MODIFY
, "last-modified");
1219 pos
+= 4; // Unix time
1223 de_ucstring
*attr_descr
;
1225 // Normally, the high byte can only be 0 here, because it's
1226 // also the header level.
1227 attribs
= (UI
)de_getu16le_p(&pos
);
1229 attr_descr
= ucstring_create(c
);
1230 de_describe_dos_attribs(c
, attribs
, attr_descr
, 0);
1231 de_dbg(c
, "attribs: 0x%04x (%s)", attribs
, ucstring_getpsz_d(attr_descr
));
1232 ucstring_destroy(attr_descr
);
1235 attribs
= (UI
)de_getbyte_p(&pos
);
1236 de_dbg(c
, "obsolete attribs low byte: 0x%02x", attribs
);
1237 pos
++; // header level, already handled
1241 do_special_swg_fields(c
, d
, md
, pos
);
1246 fnlen
= de_getbyte(pos
++);
1247 de_dbg(c
, "filename len: %d", (int)fnlen
);
1249 read_filename_hlev0(c
, d
, md
, pos
, fnlen
);
1252 read_filename_hlev1_or_exthdr(c
, d
, md
, pos
, fnlen
);
1257 md
->crc16
= (u32
)de_getu16le_p(&pos
);
1258 de_dbg(c
, "crc16 (reported): 0x%04x", (unsigned int)md
->crc16
);
1260 if(md
->hlev
==1 || md
->hlev
==2 || md
->hlev
==3) {
1261 md
->os_id
= de_getbyte_p(&pos
);
1262 de_dbg(c
, "OS id: %d ('%c') (%s)", (int)md
->os_id
,
1263 de_byte_to_printable_char(md
->os_id
), get_os_name(md
->os_id
));
1267 lev3_header_size
= de_getu32le_p(&pos
);
1268 md
->total_size
= lev3_header_size
+ md
->compressed_data_len
;
1272 i64 ext_headers_size
= (2+lev0_header_size
) - (pos
-pos1
);
1273 md
->compressed_data_pos
= pos1
+ 2 + lev0_header_size
;
1274 if(ext_headers_size
>0) {
1275 de_dbg(c
, "extended header area at %d, len=%d", (int)pos
, (int)ext_headers_size
);
1276 de_dbg_indent(c
, 1);
1277 do_lev0_ext_area(c
, d
, md
, pos
, ext_headers_size
);
1278 de_dbg_indent(c
, -1);
1281 else if(md
->hlev
==1) {
1282 i64 first_ext_hdr_size
;
1284 // The last two bytes of the base header are the size of the first ext. header.
1285 pos
= pos1
+ 2 + lev1_base_header_size
- 2;
1286 // TODO: sanitize pos?
1287 first_ext_hdr_size
= de_getu16le_p(&pos
);
1288 de_dbg(c
, "first ext hdr size: %d", (int)first_ext_hdr_size
);
1290 ret
= do_read_ext_headers(c
, d
, md
, pos
, lev1_skip_size
, first_ext_hdr_size
,
1291 &exthdr_bytes_consumed
);
1294 de_err(c
, "Error parsing extended headers at %d. Cannot extract this file.",
1300 pos
+= exthdr_bytes_consumed
;
1301 md
->compressed_data_pos
= pos
;
1302 md
->compressed_data_len
= lev1_skip_size
- exthdr_bytes_consumed
;
1304 else if(md
->hlev
==2) {
1305 i64 first_ext_hdr_size
;
1307 if(md
->os_id
=='K') {
1308 // So that some lhasa test files will work.
1309 // TODO: The extended headers section is (usually?) self-terminating, so we
1310 // should be able to parse it and figure out if this bug is present. That
1311 // would be better than just guessing.
1312 lev2_total_header_size
+= 2;
1313 md
->total_size
= lev2_total_header_size
+ md
->compressed_data_len
;
1314 de_dbg(c
, "attempting bug workaround: changing total header size to %d",
1315 (int)lev2_total_header_size
);
1318 md
->compressed_data_pos
= pos1
+lev2_total_header_size
;
1320 first_ext_hdr_size
= de_getu16le_p(&pos
);
1321 de_dbg(c
, "first ext hdr size: %d", (int)first_ext_hdr_size
);
1323 do_read_ext_headers(c
, d
, md
, pos
, pos1
+lev2_total_header_size
-pos
,
1324 first_ext_hdr_size
, &exthdr_bytes_consumed
);
1326 else if(md
->hlev
==3) {
1327 i64 first_ext_hdr_size
;
1329 md
->compressed_data_pos
= pos1
+lev3_header_size
;
1331 first_ext_hdr_size
= de_getu32le_p(&pos
);
1332 de_dbg(c
, "first ext hdr size: %d", (int)first_ext_hdr_size
);
1334 do_read_ext_headers(c
, d
, md
, pos
, pos1
+lev3_header_size
-pos
,
1335 first_ext_hdr_size
, &exthdr_bytes_consumed
);
1338 do_check_header_crc(c
, d
, md
);
1340 de_dbg(c
, "member data (%scompressed) at %"I64_FMT
", len=%"I64_FMT
,
1341 is_compressed
?"":"un",
1342 md
->compressed_data_pos
, md
->compressed_data_len
);
1344 make_fullfilename(c
, d
, md
);
1346 de_dbg_indent(c
, 1);
1347 do_extract_file(c
, d
, md
);
1348 de_dbg_indent(c
, -1);
1352 de_dbg_indent_restore(c
, saved_indent_level
);
1356 static void do_swg_footer(deark
*c
, lctx
*d
, i64 pos1
)
1360 de_ucstring
*s
= NULL
;
1362 de_dbg(c
, "SWG footer at %"I64_FMT
, pos1
);
1363 de_dbg_indent(c
, 1);
1364 s
= ucstring_create(c
);
1365 do_swg_string_field(c
, d
, s
, pos
, 61, "message");
1367 do_swg_string_field(c
, d
, s
, pos
, 66, "title");
1369 n
= de_getu16le_p(&pos
);
1370 de_dbg(c
, "SWG number of items: %d", (int)n
);
1371 de_dbg_indent(c
, -1);
1372 ucstring_destroy(s
);
1375 static void do_lha_footer(deark
*c
, lctx
*d
)
1377 i64 extra_bytes_pos
, extra_bytes_len
;
1379 if(!d
->trailer_found
) goto done
;
1380 extra_bytes_pos
= d
->trailer_pos
+1;
1381 extra_bytes_len
= c
->infile
->len
- extra_bytes_pos
;
1382 if(extra_bytes_len
<=1) goto done
;
1384 if(d
->swg_fmt
&& extra_bytes_len
==129) {
1385 do_swg_footer(c
, d
, extra_bytes_pos
);
1389 de_info(c
, "Note: %"I64_FMT
" extra bytes at end of file (offset %"I64_FMT
")",
1390 extra_bytes_len
, extra_bytes_pos
);
1395 static void do_run_lha_internal(deark
*c
, de_module_params
*mparams
, int is_swg
)
1399 struct member_data
*md
= NULL
;
1401 d
= de_malloc(c
, sizeof(lctx
));
1402 if(is_swg
) d
->swg_fmt
= 1;
1404 d
->lhark_fmt
= (u8
)de_get_ext_option_bool(c
, "lha:lhark", 0);
1406 // It's not really safe to guess CP437, because Japanese-encoded (CP932?)
1407 // filenames are common.
1408 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_ASCII
);
1410 d
->hlev_of_first_member
= 0xff;
1411 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_ARC
);
1415 if(pos
>= c
->infile
->len
) break;
1417 md
= de_malloc(c
, sizeof(struct member_data
));
1418 md
->encoding
= d
->input_encoding
;
1419 md
->member_pos
= pos
;
1420 if(!do_read_member(c
, d
, md
)) goto done
;
1421 if(md
->total_size
<1) goto done
;
1424 pos
+= md
->total_size
;
1426 destroy_member_data(c
, md
);
1431 do_lha_footer(c
, d
);
1433 destroy_member_data(c
, md
);
1435 if(!d
->lhark_fmt
&& d
->hlev_of_first_member
==1 && d
->lh7_failed_flag
&&
1436 !d
->lh7_success_flag
)
1438 de_info(c
, "Note: 'lh7' decompression failed. Maybe this file uses "
1439 "LHARK compression. Try \"-opt lha:lhark\".");
1442 de_crcobj_destroy(d
->crco
);
1447 static void de_run_lha(deark
*c
, de_module_params
*mparams
)
1449 do_run_lha_internal(c
, mparams
, 0);
1452 static int is_swg_sig(const u8
*b
)
1454 return b
[0]=='-' && b
[1]=='s' && b
[2]=='w' &&
1455 (b
[3]=='0' || b
[3]=='1') && b
[4]=='-';
1458 static int de_identify_lha(deark
*c
)
1462 struct cmpr_meth_info cmi
;
1464 de_read(b
, 0, sizeof(b
));
1465 if(b
[20]>3) return 0; // header level
1467 if(!is_possible_cmpr_meth(&b
[2])) return 0;
1468 if(is_swg_sig(&b
[2])) return 0; // Handled by the swg module
1471 if(b
[0]<22) return 0;
1472 if(22 + (int)b
[21] + 2 > 2 + (int)b
[0]) return 0;
1475 if(b
[0]<25) return 0;
1476 if(22 + (int)b
[21] + 5 > 2 + (int)b
[0]) return 0;
1479 i64 hsize
= de_getu16le_direct(&b
[0]);
1480 if(hsize
< 26) return 0;
1483 if((b
[0]!=4 && b
[0]!=8) || b
[1]!=0) return 0;
1486 de_zeromem(&cmi
, sizeof(struct cmpr_meth_info
));
1487 get_cmpr_meth_info(&b
[2], &cmi
);
1488 if(!cmi
.is_recognized
) {
1492 if(de_input_file_has_ext(c
, "lzh") ||
1493 de_input_file_has_ext(c
, "lha"))
1498 if(has_ext
) return 100;
1499 return 80; // Must be less than car_lha
1502 static void de_help_lha(deark
*c
)
1504 de_msg(c
, "-opt lha:lhark : Enable LHARK mode (for 'lh7' compression)");
1507 void de_module_lha(deark
*c
, struct deark_module_info
*mi
)
1510 mi
->desc
= "LHA/LZH/PMA archive";
1511 mi
->run_fn
= de_run_lha
;
1512 mi
->identify_fn
= de_identify_lha
;
1513 mi
->help_fn
= de_help_lha
;
1516 /////////////////////// SWG / SWAG
1518 // This module works almost just like lha, except that all members are assumed
1519 // to use the SWG header format. (For lha, the SWG header format is never used.)
1521 static void de_run_swg(deark
*c
, de_module_params
*mparams
)
1523 de_declare_fmt(c
, "SWAG packet (LHA-like)");
1524 // TODO?: Some diagnostic messages in the LHA module are hardcoded to
1525 // say "LHA", which is likely to be confusing to uses who know this as
1527 do_run_lha_internal(c
, mparams
, 1);
1530 static int de_identify_swg(deark
*c
)
1534 de_read(b
, 2, sizeof(b
));
1536 if(de_input_file_has_ext(c
, "swg")) return 100;
1542 void de_module_swg(deark
*c
, struct deark_module_info
*mi
)
1545 mi
->desc
= "SWAG packet";
1546 mi
->run_fn
= de_run_swg
;
1547 mi
->identify_fn
= de_identify_swg
;
1550 /////////////////////// CAR (MylesHi!)
1552 struct car_member_data
{
1555 UI hdr_checksum_calc
;
1563 static int looks_like_car_member(deark
*c
, i64 pos
)
1567 de_read(b
, pos
, 16);
1568 if(b
[2]!='-' || b
[3]!='l'|| b
[4]!='h' || b
[6]!='-') return 0;
1569 if(b
[5]!='0' && b
[5]!='5') return 0;
1570 if((int)b
[0] != (int)b
[15] + 25) return 0;
1571 if(dbuf_memcmp(c
->infile
, pos
+ (i64
)b
[15] + 24, (const u8
*)"\x20\x00\x00", 3)) return 0;
1575 static int do_car_member(deark
*c
, struct car_ctx
*d
, struct car_member_data
*md
)
1577 i64 lev1_base_header_size
;
1580 i64 compressed_data_len
;
1581 i64 pos1
= md
->member_pos
;
1583 int saved_indent_level
;
1585 de_dbg_indent_save(c
, &saved_indent_level
);
1586 de_dbg(c
, "member at %"I64_FMT
, pos1
);
1587 de_dbg_indent(c
, 1);
1589 // Figure out where everything is...
1590 lev1_base_header_size
= (i64
)de_getbyte(pos1
);
1591 de_dbg(c
, "base header size: %d", (int)lev1_base_header_size
);
1592 hdr_endpos
= pos1
+ 2 + lev1_base_header_size
;
1593 fnlen
= lev1_base_header_size
- 25;
1594 de_dbg(c
, "implied filename len: %d", (int)fnlen
);
1595 if(fnlen
<0) goto done
;
1597 compressed_data_len
= de_getu32le(pos1
+ 7);
1598 de_dbg(c
, "compressed size: %"I64_FMT
, compressed_data_len
);
1599 if(hdr_endpos
+ compressed_data_len
> c
->infile
->len
) goto done
;
1601 // Convert to an LHA level-1 header
1602 dbuf_empty(d
->hdr_tmp
);
1604 // Fields through uncmpr_size are the same (we'll patch the checksum later)
1605 dbuf_copy(c
->infile
, pos1
, 15, d
->hdr_tmp
);
1607 dbuf_copy(c
->infile
, hdr_endpos
-7, 4, d
->hdr_tmp
); // timestamp
1609 // attribute (low byte)
1610 dbuf_copy(c
->infile
, hdr_endpos
-9, 1, d
->hdr_tmp
);
1611 dbuf_writebyte(d
->hdr_tmp
, 0x01); // level identifier
1613 // Fields starting with filename length, through crc
1614 dbuf_copy(c
->infile
, pos1
+15, 1+fnlen
+2, d
->hdr_tmp
);
1616 dbuf_writebyte(d
->hdr_tmp
, 77); // OS ID = 'M' = MS-DOS
1618 // Recalculate checksum
1619 dbuf_buffered_read(d
->hdr_tmp
, 2, lev1_base_header_size
, cksum_cbfn
,
1620 (void*)&md
->hdr_checksum_calc
);
1621 de_dbg(c
, "header checksum (calculated): 0x%02x", md
->hdr_checksum_calc
);
1622 dbuf_writebyte_at(d
->hdr_tmp
, 1, (u8
)md
->hdr_checksum_calc
);
1623 dbuf_truncate(d
->hdr_tmp
, 2+lev1_base_header_size
);
1625 // Write everything out
1626 dbuf_copy(d
->hdr_tmp
, 0, d
->hdr_tmp
->len
, d
->lha_outf
);
1627 de_dbg(c
, "member data at %"I64_FMT
", len=%"I64_FMT
, hdr_endpos
, compressed_data_len
);
1628 dbuf_copy(c
->infile
, hdr_endpos
, compressed_data_len
, d
->lha_outf
);
1629 md
->total_size
= (hdr_endpos
-md
->member_pos
) + compressed_data_len
;
1633 de_dbg_indent_restore(c
, saved_indent_level
);
1637 static void de_run_car_lha(deark
*c
, de_module_params
*mparams
)
1639 struct car_ctx
*d
= NULL
;
1640 struct car_member_data
*md
= NULL
;
1644 d
= de_malloc(c
, sizeof(struct car_ctx
));
1646 if(!looks_like_car_member(c
, 0)) {
1647 de_err(c
, "Not a CAR file");
1651 d
->lha_outf
= dbuf_create_output_file(c
, "lha", NULL
, 0);
1652 d
->hdr_tmp
= dbuf_create_membuf(c
, 0, 0);
1654 md
= de_malloc(c
, sizeof(struct car_member_data
));
1656 if(de_getbyte(pos
)==0) {
1657 de_dbg(c
, "trailer at %"I64_FMT
, pos
);
1658 dbuf_writebyte(d
->lha_outf
, 0);
1662 if(pos
+27 > c
->infile
->len
) goto done
;
1663 if(!looks_like_car_member(c
, pos
)) goto done
;
1665 de_zeromem(md
, sizeof(struct car_member_data
));
1666 md
->member_pos
= pos
;
1667 if(!do_car_member(c
, d
, md
)) goto done
;
1668 pos
+= md
->total_size
;
1675 dbuf_close(d
->lha_outf
);
1677 de_err(c
, "Conversion to LHA format failed");
1680 dbuf_close(d
->hdr_tmp
);
1685 static int de_identify_car_lha(deark
*c
)
1687 if(!de_input_file_has_ext(c
, "car")) return 0;
1688 if(looks_like_car_member(c
, 0)) {
1694 void de_module_car_lha(deark
*c
, struct deark_module_info
*mi
)
1697 mi
->desc
= "CAR (MylesHi!) LHA-like archive";
1698 mi
->run_fn
= de_run_car_lha
;
1699 mi
->identify_fn
= de_identify_car_lha
;
1702 /////////////////////// ARX
1704 struct arx_member_data
{
1708 i64 compressed_data_len
;
1710 int is_uncompressed
;
1717 struct de_crcobj
*crco
;
1720 static int looks_like_arx_member(deark
*c
, i64 pos
)
1724 de_read(b
, pos
, sizeof(b
));
1725 if(b
[2]!='-' || b
[3]!='l'|| b
[4]!='h' || b
[6]!='-') return 0;
1726 if(b
[21]!=0) return 0;
1730 // Decompress the file, discarding the output, just to figure out the CRC.
1731 static void arx_recalc_lh1(deark
*c
, struct arx_ctx
*d
, struct arx_member_data
*md
)
1734 struct de_dfilter_in_params dcmpri
;
1735 struct de_dfilter_out_params dcmpro
;
1736 struct de_dfilter_results dres
;
1738 outf
= dbuf_create_custom_dbuf(c
, md
->unc_data_len
, 0);
1739 dbuf_set_writelistener(outf
, our_writelistener_cb
, (void*)d
->crco
);
1741 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
1742 dcmpri
.f
= c
->infile
;
1743 dcmpri
.pos
= md
->hdr_endpos
;
1744 dcmpri
.len
= md
->compressed_data_len
;
1746 dcmpro
.expected_len
= md
->unc_data_len
;
1747 dcmpro
.len_known
= 1;
1749 fmtutil_lh1_codectype1(c
, &dcmpri
, &dcmpro
, &dres
, NULL
);
1754 static void arx_recalc_crc(deark
*c
, struct arx_ctx
*d
, struct arx_member_data
*md
)
1756 de_crcobj_reset(d
->crco
);
1757 if(md
->is_uncompressed
) {
1758 de_crcobj_addslice(d
->crco
, c
->infile
, md
->hdr_endpos
, md
->compressed_data_len
);
1761 arx_recalc_lh1(c
, d
, md
);
1763 md
->crc_calc
= de_crcobj_getval(d
->crco
);
1766 static int do_arx_member(deark
*c
, struct arx_ctx
*d
, struct arx_member_data
*md
)
1768 i64 lev0_header_size
;
1769 i64 pos1
= md
->member_pos
;
1770 UI hdr_checksum_calc
= 0;
1773 int saved_indent_level
;
1775 de_dbg_indent_save(c
, &saved_indent_level
);
1776 de_dbg(c
, "member at %"I64_FMT
, pos1
);
1777 de_dbg_indent(c
, 1);
1779 lev0_header_size
= (i64
)de_getbyte(pos1
);
1780 de_dbg(c
, "header size: %d", (int)lev0_header_size
);
1781 if(lev0_header_size
<22) goto done
;
1782 md
->hdr_endpos
= pos1
+ 2 + lev0_header_size
;
1784 md
->compressed_data_len
= de_getu32le(pos1
+8);
1785 de_dbg(c
, "compressed size: %"I64_FMT
, md
->compressed_data_len
);
1787 md
->unc_data_len
= de_getu32le(pos1
+12);
1788 de_dbg(c
, "uncmpr. size: %"I64_FMT
, md
->unc_data_len
);
1790 if(md
->compressed_data_len
==0) {
1791 md
->is_uncompressed
= 1;
1792 md
->compressed_data_len
= md
->unc_data_len
;
1795 if(md
->hdr_endpos
+ md
->compressed_data_len
> c
->infile
->len
) goto done
;
1797 // Just the first byte of the CRC is present, and apparently not even
1798 // that for non-compressed files.
1799 extra_crc_byte
= de_getbyte(md
->hdr_endpos
-1);
1800 de_dbg(c
, "crc (reported): 0x??%02x", (UI
)extra_crc_byte
);
1802 // Find the correct CRC of the file data.
1803 arx_recalc_crc(c
, d
, md
);
1804 de_dbg(c
, "crc (calculated): 0x%04x", (UI
)md
->crc_calc
);
1805 if(!md
->is_uncompressed
) {
1806 if((u8
)(md
->crc_calc
& 0xff) != extra_crc_byte
) {
1807 de_warn(c
, "CRC mismatch. Conversion to LHA may have failed.");
1811 // Convert to an LHA header
1812 dbuf_empty(d
->hdr_tmp
);
1814 // Fields through cmpr meth. (We'll patch the checksum, and
1815 // compression method if necessary, later.)
1816 dbuf_copy(c
->infile
, pos1
, 7, d
->hdr_tmp
);
1818 dbuf_writeu32le(d
->hdr_tmp
, md
->compressed_data_len
);
1819 dbuf_writeu32le(d
->hdr_tmp
, md
->unc_data_len
);
1821 /// This part of the header can be copied as-is.
1822 dbuf_copy(c
->infile
, pos1
+8+8, lev0_header_size
-1-6-8, d
->hdr_tmp
);
1825 dbuf_writebyte(d
->hdr_tmp
, (u8
)(md
->crc_calc
& 0xff));
1826 dbuf_writebyte(d
->hdr_tmp
, (u8
)((md
->crc_calc
& 0xff00)>>8));
1828 if(md
->is_uncompressed
) {
1829 dbuf_writebyte_at(d
->hdr_tmp
, 5, '0'); // lh1 -> lh0
1832 // Recalculate header checksum
1833 dbuf_buffered_read(d
->hdr_tmp
, 2, lev0_header_size
, cksum_cbfn
,
1834 (void*)&hdr_checksum_calc
);
1835 de_dbg(c
, "header checksum (calculated): 0x%02x", hdr_checksum_calc
);
1836 dbuf_writebyte_at(d
->hdr_tmp
, 1, (u8
)hdr_checksum_calc
);
1837 dbuf_truncate(d
->hdr_tmp
, 2+lev0_header_size
);
1839 // Write everything out
1840 dbuf_copy(d
->hdr_tmp
, 0, d
->hdr_tmp
->len
, d
->lha_outf
);
1841 de_dbg(c
, "member data at %"I64_FMT
", len=%"I64_FMT
, md
->hdr_endpos
, md
->compressed_data_len
);
1842 dbuf_copy(c
->infile
, md
->hdr_endpos
, md
->compressed_data_len
, d
->lha_outf
);
1843 md
->total_size
= 2 + lev0_header_size
+ md
->compressed_data_len
;
1847 de_dbg_indent_restore(c
, saved_indent_level
);
1851 static void de_run_arx(deark
*c
, de_module_params
*mparams
)
1853 struct arx_ctx
*d
= NULL
;
1854 struct arx_member_data
*md
= NULL
;
1858 d
= de_malloc(c
, sizeof(struct arx_ctx
));
1860 if(!looks_like_arx_member(c
, 0)) {
1861 de_err(c
, "Not an ARX file");
1865 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_ARC
);
1866 d
->lha_outf
= dbuf_create_output_file(c
, "lha", NULL
, 0);
1867 d
->hdr_tmp
= dbuf_create_membuf(c
, 0, 0);
1869 md
= de_malloc(c
, sizeof(struct arx_member_data
));
1871 if(de_getbyte(pos
)==0) {
1872 de_dbg(c
, "trailer at %"I64_FMT
, pos
);
1873 dbuf_writebyte(d
->lha_outf
, 0);
1877 if(pos
+27 > c
->infile
->len
) goto done
;
1878 if(!looks_like_arx_member(c
, pos
)) goto done
;
1880 de_zeromem(md
, sizeof(struct arx_member_data
));
1881 md
->member_pos
= pos
;
1882 if(!do_arx_member(c
, d
, md
)) goto done
;
1883 pos
+= md
->total_size
;
1890 dbuf_close(d
->lha_outf
);
1892 de_err(c
, "Conversion to LHA format failed");
1895 dbuf_close(d
->hdr_tmp
);
1896 de_crcobj_destroy(d
->crco
);
1901 static int de_identify_arx(deark
*c
)
1903 if(dbuf_memcmp(c
->infile
, 2, "-lh1-", 5)) return 0;
1904 if(de_getbyte(20)!=0x20 || de_getbyte(21)!=0x00) return 0;
1905 if(de_input_file_has_ext(c
, "arx")) return 100;
1909 void de_module_arx(deark
*c
, struct deark_module_info
*mi
)
1912 mi
->desc
= "ARX LHA-like archive";
1913 mi
->run_fn
= de_run_arx
;
1914 mi
->identify_fn
= de_identify_arx
;