1 // This file is part of Deark.
2 // Copyright (C) 2017 Jason Summers
3 // See the file COPYING for terms of use.
5 // LHA/LZH compressed archive format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_lha
);
11 DE_DECLARE_MODULE(de_module_swg
);
12 DE_DECLARE_MODULE(de_module_pakleo
);
13 DE_DECLARE_MODULE(de_module_car_lha
);
14 DE_DECLARE_MODULE(de_module_arx
);
15 DE_DECLARE_MODULE(de_module_ar001
);
16 DE_DECLARE_MODULE(de_module_lharc_sfx_com
);
18 #define MAX_SUBDIR_LEVEL 32
20 #define CODE_S_LH0 0x204c4830 // SAR
21 #define CODE_S_LH5 0x204c4835 // SAR
22 #define CODE_ah0 0x2d616830U // MAR
23 #define CODE_ari 0x2d617269U // MAR
24 #define CODE_hf0 0x2d686630U // MAR
25 #define CODE_lZ0 0x2d6c5a30U // PUT
26 #define CODE_lZ1 0x2d6c5a31U // PUT
27 #define CODE_lZ5 0x2d6c5a35U // PUT
28 #define CODE_lh0 0x2d6c6830U
29 #define CODE_lh1 0x2d6c6831U
30 #define CODE_lh2 0x2d6c6832U
31 #define CODE_lh3 0x2d6c6833U
32 #define CODE_lh4 0x2d6c6834U
33 #define CODE_lh5 0x2d6c6835U
34 #define CODE_lh6 0x2d6c6836U
35 #define CODE_lh7 0x2d6c6837U // standard, or LHARK
36 #define CODE_lh8 0x2d6c6838U
37 #define CODE_lh9 0x2d6c6839U
38 #define CODE_lha 0x2d6c6861U
39 #define CODE_lhb 0x2d6c6862U
40 #define CODE_lhc 0x2d6c6863U
41 #define CODE_lhd 0x2d6c6864U
42 #define CODE_lhe 0x2d6c6865U
43 #define CODE_lhx 0x2d6c6878U
44 #define CODE_ll0 0x2d6c6c30U
45 #define CODE_ll1 0x2d6c6c31U
46 #define CODE_lx1 0x2d6c7831U
47 #define CODE_lz2 0x2d6c7a32U
48 #define CODE_lz3 0x2d6c7a33U
49 #define CODE_lz4 0x2d6c7a34U
50 #define CODE_lz5 0x2d6c7a35U
51 #define CODE_lz7 0x2d6c7a37U
52 #define CODE_lz8 0x2d6c7a38U
53 #define CODE_lzs 0x2d6c7a73U
54 #define CODE_pm0 0x2d706d30U
55 #define CODE_pm1 0x2d706d31U
56 #define CODE_pm2 0x2d706d32U
57 #define CODE_sw0 0x2d737730U
58 #define CODE_sw1 0x2d737731U
60 enum lha_basefmt_enum
{
61 BASEFMT_LHA
= 0, // LHarc/LHA and other formats that are parsed the same
66 #define TIMESTAMPIDX_INVALID (-1)
67 struct timestamp_data
{
68 struct de_timestamp ts
; // The best timestamp of this type found so far
72 struct cmpr_meth_info
;
75 u8 hlev
; // header level
79 struct cmpr_meth_info
*cmi
;
85 u32 hdr_checksum_calc
;
87 u8 have_hdr_crc_reported
;
90 i64 hdr_crc_field_pos
;
94 i64 compressed_data_pos
; // relative to beginning of file
95 i64 compressed_data_len
;
97 de_ucstring
*filename
;
98 de_ucstring
*fullfilename
;
99 struct timestamp_data tsdata
[DE_TIMESTAMPIDX_COUNT
];
102 typedef struct localctx_struct
{
103 de_encoding input_encoding
;
104 int lhark_policy
; // -1=detect, 0=no, 1=yes
106 enum lha_basefmt_enum basefmt
;
107 const char *basefmt_name
;
108 u8 hlev_of_first_member
;
109 u8 lh7_success_flag
; // currently unused
110 u8 lh7_failed_flag
; // currently unused
114 struct de_crcobj
*crco
;
115 struct de_crcobj
*crco_cksum
;
118 typedef void (*decompressor_fn
)(deark
*c
, lctx
*d
, struct member_data
*md
,
119 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
120 struct de_dfilter_results
*dres
);
122 struct cmpr_meth_info
{
125 decompressor_fn decompressor
;
127 char id_printable_sz
[6];
131 struct exthdr_type_info_struct
;
133 typedef void (*exthdr_decoder_fn
)(deark
*c
, lctx
*d
, struct member_data
*md
,
134 u8 id
, const struct exthdr_type_info_struct
*e
,
137 struct exthdr_type_info_struct
{
141 exthdr_decoder_fn decoder_fn
;
144 static int lha_isdigit(u8 x
)
146 return (x
>='0' && x
<='9');
149 static int lha_isalpha(u8 x
)
151 return ((x
>='A' && x
<='Z') || (x
>='a' && x
<='z'));
154 static int lha_isalnum(u8 x
)
156 return (lha_isdigit(x
) || lha_isalpha(x
));
159 static int is_possible_cmpr_meth(const u8 m
[5])
161 if(m
[0]!=m
[4]) return 0;
162 if(m
[0]==' ' && m
[1]=='L' && m
[2]=='H' && lha_isdigit(m
[3])) return 1;
163 if(m
[0]!='-') return 0;
164 if(!lha_isalpha(m
[1]) ||
165 !lha_isalnum(m
[2]) ||
173 static void apply_timestamp(deark
*c
, lctx
*d
, struct member_data
*md
,
174 int tsidx
, const struct de_timestamp
*ts
, int quality
)
176 if(!ts
->is_valid
) return;
177 if(tsidx
<0 || tsidx
>=DE_TIMESTAMPIDX_COUNT
) return;
178 if(quality
< md
->tsdata
[tsidx
].quality
) return;
179 md
->tsdata
[tsidx
].ts
= *ts
;
180 md
->tsdata
[tsidx
].quality
= quality
;
183 static void read_msdos_modtime(deark
*c
, lctx
*d
, struct member_data
*md
,
184 i64 pos
, const char *name
)
186 i64 mod_time_raw
, mod_date_raw
;
187 char timestamp_buf
[64];
188 struct de_timestamp tmp_timestamp
;
190 mod_time_raw
= de_getu16le(pos
);
191 mod_date_raw
= de_getu16le(pos
+2);
192 if(mod_time_raw
==0 && mod_date_raw
==0) {
193 de_dbg(c
, "%s: (not set)", name
);
196 de_dos_datetime_to_timestamp(&tmp_timestamp
, mod_date_raw
, mod_time_raw
);
197 tmp_timestamp
.tzcode
= DE_TZCODE_LOCAL
;
198 de_timestamp_to_string(&tmp_timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
199 de_dbg(c
, "%s: %s", name
, timestamp_buf
);
200 apply_timestamp(c
, d
, md
, DE_TIMESTAMPIDX_MODIFY
, &tmp_timestamp
, 10);
203 static void read_windows_FILETIME(deark
*c
, lctx
*d
, struct member_data
*md
,
204 i64 pos
, int tsidx
, const char *name
)
207 char timestamp_buf
[64];
208 struct de_timestamp tmp_timestamp
;
210 t_FILETIME
= de_geti64le(pos
);
211 de_FILETIME_to_timestamp(t_FILETIME
, &tmp_timestamp
, 0x1);
212 if(t_FILETIME
<=0) tmp_timestamp
.is_valid
= 0;
213 de_timestamp_to_string(&tmp_timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
214 de_dbg(c
, "%s: %"I64_FMT
" (%s)", name
, t_FILETIME
, timestamp_buf
);
215 apply_timestamp(c
, d
, md
, tsidx
, &tmp_timestamp
, 90);
218 static void read_unix_timestamp(deark
*c
, lctx
*d
, struct member_data
*md
,
219 i64 pos
, int tsidx
, const char *name
)
222 char timestamp_buf
[64];
223 struct de_timestamp tmp_timestamp
;
225 t
= de_geti32le(pos
);
226 de_unix_time_to_timestamp(t
, &tmp_timestamp
, 0x1);
227 de_timestamp_to_string(&tmp_timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
228 de_dbg(c
, "%s: %d (%s)", name
, (int)t
, timestamp_buf
);
229 apply_timestamp(c
, d
, md
, tsidx
, &tmp_timestamp
, 50);
232 static void rp_add_component(deark
*c
, lctx
*d
, struct member_data
*md
,
233 dbuf
*f
, i64 pos
, i64 len
, struct de_strarray
*sa
, de_ucstring
*tmpstr
)
236 ucstring_empty(tmpstr
);
237 dbuf_read_to_ucstring(f
, pos
, len
, tmpstr
, 0, md
->encoding
);
238 de_strarray_push(sa
, tmpstr
);
241 static void read_path_to_strarray(deark
*c
, lctx
*d
, struct member_data
*md
,
242 dbuf
*inf
, i64 pos
, i64 len
, struct de_strarray
*sa
, int is_exthdr_dirname
)
244 dbuf
*tmpdbuf
= NULL
;
245 de_ucstring
*tmpstr
= NULL
;
246 i64 component_startpos
;
250 tmpstr
= ucstring_create(c
);
252 tmpdbuf
= dbuf_create_membuf(c
, len
, 0);
253 dbuf_copy(inf
, pos
, len
, tmpdbuf
);
255 component_startpos
= 0;
258 for(i
=0; i
<len
; i
++) {
261 ch
= dbuf_getbyte(tmpdbuf
, i
);
262 if(ch
==0x00) break; // Tolerate NUL termination
263 if((is_exthdr_dirname
&& ch
==0xff) ||
264 (!is_exthdr_dirname
&& (ch
=='\\' || ch
=='/')))
266 component_len
= i
- component_startpos
;
267 rp_add_component(c
, d
, md
, tmpdbuf
, component_startpos
, component_len
, sa
, tmpstr
);
268 component_startpos
= i
+1;
275 rp_add_component(c
, d
, md
, tmpdbuf
, component_startpos
, component_len
, sa
, tmpstr
);
278 ucstring_destroy(tmpstr
);
281 static void read_filename_hlev0(deark
*c
, lctx
*d
, struct member_data
*md
,
284 struct de_strarray
*sa
= NULL
;
287 ucstring_empty(md
->filename
);
290 md
->filename
= ucstring_create(c
);
293 sa
= de_strarray_create(c
, MAX_SUBDIR_LEVEL
+2);
294 read_path_to_strarray(c
, d
, md
, c
->infile
, pos
, len
, sa
, 0);
296 de_strarray_make_path(sa
, md
->filename
, DE_MPFLAG_NOTRAILINGSLASH
);
297 de_dbg(c
, "filename (parsed): \"%s\"", ucstring_getpsz_d(md
->filename
));
299 de_strarray_destroy(sa
);
302 static void read_filename_hlev1_or_exthdr(deark
*c
, lctx
*d
, struct member_data
*md
,
308 ucstring_empty(md
->filename
);
311 md
->filename
= ucstring_create(c
);
314 // Some files seem to assume NUL termination is allowed.
315 dbuf_read_to_ucstring(c
->infile
, pos
, len
,
316 md
->filename
, DE_CONVFLAG_STOP_AT_NUL
, md
->encoding
);
317 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(md
->filename
));
319 // I don't think slashes are allowed
320 for(i
=0; i
<md
->filename
->len
; i
++) {
321 if(md
->filename
->str
[i
]=='/') {
322 md
->filename
->str
[i
]='_';
327 static void exthdr_common(deark
*c
, lctx
*d
, struct member_data
*md
,
328 u8 id
, const struct exthdr_type_info_struct
*e
,
332 md
->hdr_crc_reported
= (u32
)de_getu16le(pos
);
333 md
->have_hdr_crc_reported
= 1;
334 md
->hdr_crc_field_pos
= pos
;
335 de_dbg(c
, "header crc (reported): 0x%04x", (UI
)md
->hdr_crc_reported
);
336 // TODO: Additional information
339 static void exthdr_filename(deark
*c
, lctx
*d
, struct member_data
*md
,
340 u8 id
, const struct exthdr_type_info_struct
*e
,
343 read_filename_hlev1_or_exthdr(c
, d
, md
, pos
, dlen
);
346 static void exthdr_dirname(deark
*c
, lctx
*d
, struct member_data
*md
,
347 u8 id
, const struct exthdr_type_info_struct
*e
,
350 struct de_strarray
*dirname_sa
= NULL
;
353 ucstring_empty(md
->dirname
);
356 md
->dirname
= ucstring_create(c
);
359 dirname_sa
= de_strarray_create(c
, MAX_SUBDIR_LEVEL
+2);
360 // 0xff is used as the path separator. Don't know what happens if a directory
361 // name contains an actual 0xff byte.
362 read_path_to_strarray(c
, d
, md
, c
->infile
, pos
, dlen
, dirname_sa
, 1);
363 de_strarray_make_path(dirname_sa
, md
->dirname
, DE_MPFLAG_NOTRAILINGSLASH
);
364 de_dbg(c
, "%s (parsed): \"%s\"", e
->name
, ucstring_getpsz_d(md
->dirname
));
366 de_strarray_destroy(dirname_sa
);
369 static void exthdr_msdosattribs(deark
*c
, lctx
*d
, struct member_data
*md
,
370 u8 id
, const struct exthdr_type_info_struct
*e
,
374 de_ucstring
*descr
= NULL
;
376 if(dlen
<2) goto done
;
377 attribs
= (u32
)de_getu16le(pos
);
378 descr
= ucstring_create(c
);
379 de_describe_dos_attribs(c
, (UI
)attribs
, descr
, 0);
380 de_dbg(c
, "%s: 0x%04x (%s)", e
->name
, (UI
)attribs
, ucstring_getpsz_d(descr
));
382 ucstring_destroy(descr
);
385 static void exthdr_filesize(deark
*c
, lctx
*d
, struct member_data
*md
,
386 u8 id
, const struct exthdr_type_info_struct
*e
,
389 // TODO: Support this
390 de_warn(c
, "Unsupported \"file size\" extended header found. This may prevent "
391 "the rest of the file from being processed correctly.");
394 static void exthdr_windowstimestamp(deark
*c
, lctx
*d
, struct member_data
*md
,
395 u8 id
, const struct exthdr_type_info_struct
*e
,
399 read_windows_FILETIME(c
, d
, md
, pos
, DE_TIMESTAMPIDX_CREATE
, "create time");
400 read_windows_FILETIME(c
, d
, md
, pos
+8, DE_TIMESTAMPIDX_MODIFY
, "mod time ");
401 read_windows_FILETIME(c
, d
, md
, pos
+16, DE_TIMESTAMPIDX_ACCESS
, "access time");
404 static void interpret_unix_perms(deark
*c
, lctx
*d
, struct member_data
*md
, UI mode
)
406 if(mode
& 0100000) { // regular file
407 if(mode
& 0111) { // executable
408 md
->is_executable
= 1;
411 md
->is_nonexecutable
= 1;
415 if((mode
& 0170000) == 0120000) {
416 md
->is_special
= 1; // symlink
420 static void exthdr_unixperms(deark
*c
, lctx
*d
, struct member_data
*md
,
421 u8 id
, const struct exthdr_type_info_struct
*e
,
427 mode
= (UI
)de_getu16le(pos
);
428 de_dbg(c
, "mode: octal(%06o)", mode
);
429 interpret_unix_perms(c
, d
, md
, mode
);
432 static void exthdr_unixuidgid(deark
*c
, lctx
*d
, struct member_data
*md
,
433 u8 id
, const struct exthdr_type_info_struct
*e
,
439 // It's strange that the GID comes first, while the UID comes first in the
440 // level-0 "extended area".
441 gid
= de_getu16le(pos
);
442 de_dbg(c
, "gid: %d", (int)gid
);
443 uid
= de_getu16le(pos
+2);
444 de_dbg(c
, "uid: %d", (int)uid
);
447 static void exthdr_unixtimestamp(deark
*c
, lctx
*d
, struct member_data
*md
,
448 u8 id
, const struct exthdr_type_info_struct
*e
,
452 read_unix_timestamp(c
, d
, md
, pos
, DE_TIMESTAMPIDX_MODIFY
, "last-modified");
455 static void exthdr_lev3newattribs2(deark
*c
, lctx
*d
, struct member_data
*md
,
456 u8 id
, const struct exthdr_type_info_struct
*e
,
464 // [Documented as "creation time", but this is a Unix-style header, so I
465 // wonder if someone mistranslated "ctime" (=change time).]
466 read_unix_timestamp(c
, d
, md
, pos
+12, TIMESTAMPIDX_INVALID
, "create(?) time");
468 read_unix_timestamp(c
, d
, md
, pos
+16, DE_TIMESTAMPIDX_ACCESS
, "access time ");
471 static void exthdr_codepage(deark
*c
, lctx
*d
, struct member_data
*md
,
472 u8 id
, const struct exthdr_type_info_struct
*e
,
476 de_encoding n_encoding
;
480 n_codepage
= (int)de_geti32le(pos
);
481 n_encoding
= de_windows_codepage_to_encoding(c
, n_codepage
, descr
, sizeof(descr
), 0);
482 de_dbg(c
, "codepage: %d (%s)", n_codepage
, descr
);
483 if(n_encoding
!= DE_ENCODING_UNKNOWN
) {
484 md
->encoding
= n_encoding
;
488 static const struct exthdr_type_info_struct exthdr_type_info_arr
[] = {
489 { 0x00, 0, "common", exthdr_common
},
490 { 0x01, 0, "filename", exthdr_filename
},
491 { 0x02, 0, "dir name", exthdr_dirname
},
492 { 0x39, 0, "multi-disc", NULL
},
493 { 0x3f, 0, "comment", NULL
},
494 { 0x40, 0, "MS-DOS file attribs", exthdr_msdosattribs
},
495 { 0x41, 0, "Windows timestamp", exthdr_windowstimestamp
},
496 { 0x42, 0, "MS-DOS file size", exthdr_filesize
},
497 { 0x43, 0, "time zone", NULL
},
498 { 0x44, 0, "UTF-16 filename", NULL
},
499 { 0x45, 0, "UTF-16 dir name", NULL
},
500 { 0x46, 0, "codepage", exthdr_codepage
},
501 { 0x50, 0, "Unix perms", exthdr_unixperms
},
502 { 0x51, 0, "Unix UID/GID", exthdr_unixuidgid
},
503 { 0x52, 0, "Unix group name", NULL
},
504 { 0x53, 0, "Unix username", NULL
},
505 { 0x54, 0, "Unix timestamp", exthdr_unixtimestamp
},
506 { 0x7d, 0, "capsule", NULL
},
507 { 0x7e, 0, "OS/2 extended attribs", NULL
},
508 { 0x7f, 0, "level 3 new attribs type-1", NULL
}, // (OS/2 only)
509 { 0xff, 0, "level 3 new attribs type-2", exthdr_lev3newattribs2
}
512 static void destroy_member_data(deark
*c
, struct member_data
*md
)
515 ucstring_destroy(md
->dirname
);
516 ucstring_destroy(md
->filename
);
517 ucstring_destroy(md
->fullfilename
);
522 static const struct exthdr_type_info_struct
*get_exthdr_type_info(u8 id
)
526 for(i
=0; i
<DE_ARRAYCOUNT(exthdr_type_info_arr
); i
++) {
527 if(id
== exthdr_type_info_arr
[i
].id
) {
528 return &exthdr_type_info_arr
[i
];
534 static void do_read_ext_header(deark
*c
, lctx
*d
, struct member_data
*md
,
535 i64 pos1
, i64 len
, i64 dlen
)
539 const struct exthdr_type_info_struct
*e
= NULL
;
542 id
= de_getbyte(pos1
);
543 e
= get_exthdr_type_info(id
);
545 name
= e
? e
->name
: "?";
547 de_dbg(c
, "ext header at %"I64_FMT
", len=%"I64_FMT
" (1+%"I64_FMT
"+%"I64_FMT
"), id=0x%02x (%s)",
548 pos1
, len
, dlen
-1, len
-dlen
, (UI
)id
, name
);
550 if(dlen
<1) return; // Invalid header, too short to even have an id field
553 if(e
&& e
->decoder_fn
) {
554 e
->decoder_fn(c
, d
, md
, id
, e
, pos1
+1, dlen
-1);
557 if(c
->debug_level
>=2) {
558 de_dbg_hexdump(c
, c
->infile
, pos1
+1, dlen
-1, 256, NULL
, 0x1);
561 de_dbg_indent(c
, -1);
564 static const char *get_os_name(u8 id
)
566 const char *name
= NULL
;
568 case ' ': name
="unspecified"; break;
569 case '2': name
="OS/2"; break;
570 case '3': name
="OS/386?"; break;
571 case '9': name
="OS-9"; break;
572 case 'A': name
="Amiga"; break;
573 case 'C': name
="CP/M"; break;
574 case 'F': name
="FLEX"; break;
575 case 'H': name
="Human68K"; break;
576 case 'J': name
="JVM"; break;
577 case 'K': name
="OS-9/68K"; break;
578 case 'M': name
="DOS"; break;
579 case 'R': name
="RUNser"; break;
580 case 'T': name
="TownsOS"; break;
581 case 'U': name
="Unix"; break;
582 case 'W': name
="Windows NT"; break;
583 case 'a': name
="Atari ST?"; break;
584 case 'm': name
="Macintosh"; break;
585 case 'w': name
="Windows"; break;
587 return name
?name
:"?";
590 static void do_lev0_ext_area(deark
*c
, lctx
*d
, struct member_data
*md
,
594 md
->os_id
= de_getbyte(pos1
);
595 de_dbg(c
, "OS id: %d ('%c') (%s)", (int)md
->os_id
,
596 de_byte_to_printable_char(md
->os_id
), get_os_name(md
->os_id
));
603 if(len
<12) goto done
;
605 read_unix_timestamp(c
, d
, md
, pos1
+2, DE_TIMESTAMPIDX_MODIFY
, "last-modified");
607 mode
= (UI
)de_getu16le(pos1
+6);
608 de_dbg(c
, "mode: octal(%06o)", mode
);
609 interpret_unix_perms(c
, d
, md
, mode
);
611 uid
= de_getu16le(pos1
+8);
612 de_dbg(c
, "uid: %d", (int)uid
);
613 gid
= de_getu16le(pos1
+10);
614 de_dbg(c
, "gid: %d", (int)gid
);
620 // AFAICT, we're expected to think of the extended headers as a kind of linked
621 // list. The last field in each node is the "size of next node" (instead of
622 // "pointer to next node", as a real linked list would have). A size of 0 is
623 // like a "nil" pointer, and marks the end of the list.
624 // The "size of the first node" field (analogous to the "head" pointer) is
625 // conceptually not part of the extended headers section.
627 // Note that if we simply shift our frame of reference, this format is identical
628 // to a more typical length-prefixed format. But our code follows the
629 // linked-list model, to make it more consistent with most LHA documentation,
630 // and the various "size" fields.
632 // A return value of 0 means we failed to calculate the size of the
633 // extended headers segment.
634 static int do_read_ext_headers(deark
*c
, lctx
*d
, struct member_data
*md
,
635 i64 pos1
, i64 len
, i64 first_ext_hdr_size
, i64
*tot_bytes_consumed
)
638 i64 this_ext_hdr_size
, next_ext_hdr_size
;
640 i64 size_of_size_field
;
642 *tot_bytes_consumed
= 0;
644 if(first_ext_hdr_size
==0) {
648 de_dbg(c
, "ext headers section at %"I64_FMT
, pos
);
651 size_of_size_field
= (md
->hlev
==3) ? 4 : 2;
653 next_ext_hdr_size
= first_ext_hdr_size
;
655 this_ext_hdr_size
= next_ext_hdr_size
;
656 if(this_ext_hdr_size
==0) {
658 *tot_bytes_consumed
= pos
- pos1
;
661 if(this_ext_hdr_size
<size_of_size_field
) goto done
;
662 if(pos
+this_ext_hdr_size
> pos1
+len
) goto done
;
664 do_read_ext_header(c
, d
, md
, pos
, this_ext_hdr_size
, this_ext_hdr_size
-size_of_size_field
);
666 // Each ext header ends with a "size of next header" field.
667 // We'll read it at this level, instead of in do_read_ext_header().
668 pos
+= this_ext_hdr_size
-size_of_size_field
;
669 if(size_of_size_field
==2) {
670 next_ext_hdr_size
= de_getu16le(pos
);
673 next_ext_hdr_size
= de_getu32le(pos
);
675 pos
+= size_of_size_field
;
680 de_dbg(c
, "size of ext headers section: %"I64_FMT
, (i64
)*tot_bytes_consumed
);
683 de_dbg(c
, "failed to parse all extended headers");
685 de_dbg_indent(c
, -1);
689 static void make_fullfilename(deark
*c
, lctx
*d
, struct member_data
*md
)
691 if(md
->fullfilename
) return;
694 md
->filename
= ucstring_create(c
);
696 md
->fullfilename
= ucstring_create(c
);
699 ucstring_append_ucstring(md
->fullfilename
, md
->filename
);
703 ucstring_append_ucstring(md
->fullfilename
, md
->dirname
);
706 if(ucstring_isnonempty(md
->dirname
)) {
707 ucstring_append_ucstring(md
->fullfilename
, md
->dirname
);
708 ucstring_append_sz(md
->fullfilename
, "/", DE_ENCODING_LATIN1
);
710 if(ucstring_isnonempty(md
->filename
)) {
711 ucstring_append_ucstring(md
->fullfilename
, md
->filename
);
714 ucstring_append_char(md
->fullfilename
, '_');
720 static void decompress_uncompressed(deark
*c
, lctx
*d
, struct member_data
*md
,
721 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
722 struct de_dfilter_results
*dres
)
724 fmtutil_decompress_uncompressed(c
, dcmpri
, dcmpro
, dres
, 0);
727 static void decompress_lh1(deark
*c
, lctx
*d
, struct member_data
*md
,
728 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
729 struct de_dfilter_results
*dres
)
731 fmtutil_lh1_codectype1(c
, dcmpri
, dcmpro
, dres
, NULL
);
734 // Caller supplies fmt (DE_LH5X_FMT_*).
735 static void decompress_lh5x_internal(deark
*c
, lctx
*d
,
736 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
737 struct de_dfilter_results
*dres
, int fmt
)
739 struct de_lh5x_params lzhparams
;
741 de_zeromem(&lzhparams
, sizeof(struct de_lh5x_params
));
743 lzhparams
.zero_codes_block_behavior
= DE_LH5X_ZCB_65536
;
744 lzhparams
.warn_about_zero_codes_block
= 1;
745 lzhparams
.history_fill_val
= 0x20;
746 fmtutil_decompress_lh5x(c
, dcmpri
, dcmpro
, dres
, &lzhparams
);
749 static int decompress_lh5x_dry_run(deark
*c
, lctx
*d
, struct member_data
*md
,
750 struct de_dfilter_in_params
*dcmpri
, int fmt
)
754 struct de_crcobj
*crco
= NULL
;
757 struct de_dfilter_out_params dcmpro
;
758 struct de_dfilter_results dres
;
760 // Make a "dummy" dbuf to write to, which doesn't store the data, but
761 // tracks the size and CRC.
762 outf
= dbuf_create_custom_dbuf(c
, 0, 0);
763 dbuf_enable_wbuffer(outf
);
764 crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_ARC
);
765 dbuf_set_writelistener(outf
, de_writelistener_for_crc
, crco
);
767 de_dfilter_init_objects(c
, NULL
, &dcmpro
, &dres
);
769 dcmpro
.len_known
= 1;
770 dcmpro
.expected_len
= md
->orig_size
;
772 old_debug_level
= c
->debug_level
;
773 c
->debug_level
= 0; // hack
774 decompress_lh5x_internal(c
, d
, dcmpri
, &dcmpro
, &dres
, fmt
);
775 c
->debug_level
= old_debug_level
;
778 if(dres
.errcode
) goto done
;
779 if(outf
->len
!= md
->orig_size
) goto done
;
780 // Note: Another possible test would be if
781 // (dres.bytes_consumed == md->compressed_data_len).
782 crc_calc
= de_crcobj_getval(crco
);
783 if(crc_calc
!= md
->crc_reported
) goto done
;
788 de_crcobj_destroy(crco
);
792 // Sets d->lhark_policy.
793 // This detection is slow, so we only do it for the first lh7 member in a file,
794 // and assume all other lh7 members use the same format.
795 static void detect_lhark(deark
*c
, lctx
*d
, struct member_data
*md
,
796 struct de_dfilter_in_params
*dcmpri
)
800 const char *fmt_name
;
802 if(d
->lhark_policy
>=0) goto done
; // shouldn't get here
803 if(d
->lhark_req
>=0) {
804 d
->lhark_policy
= d
->lhark_req
; // shouldn't get here
808 de_dbg(c
, "[detecting lh7 format]");
811 if(md
->hlev
!=1 || md
->os_id
!=0x20) {
817 ret
= decompress_lh5x_dry_run(c
, d
, md
, dcmpri
, DE_LH5X_FMT_LH7
);
824 ret
= decompress_lh5x_dry_run(c
, d
, md
, dcmpri
, DE_LH5X_FMT_LHARK
);
835 if(d
->lhark_policy
>0)
838 fmt_name
= "standard lh7";
841 fmt_name
= "unknown, assuming standard lh7";
843 de_dbg(c
, "detected lh7 format: %s", fmt_name
);
844 de_dbg_indent(c
, -1);
847 // Compression method will be selected based on id_raw[3] (which
848 // should be '4'...'8'), etc.
849 static void decompress_lh5x_auto(deark
*c
, lctx
*d
, struct member_data
*md
,
850 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
851 struct de_dfilter_results
*dres
)
855 switch(md
->cmi
->id_raw
[3]) {
857 fmt
= DE_LH5X_FMT_LH5
;
860 fmt
= DE_LH5X_FMT_LH6
;
863 if(d
->lhark_policy
<0) {
864 detect_lhark(c
, d
, md
, dcmpri
);
866 if(d
->lhark_policy
>0) {
867 fmt
= DE_LH5X_FMT_LHARK
;
870 fmt
= DE_LH5X_FMT_LH7
;
874 fmt
= DE_LH5X_FMT_LH7
;
880 decompress_lh5x_internal(c
, d
, dcmpri
, dcmpro
, dres
, fmt
);
883 static void decompress_lh5(deark
*c
, lctx
*d
, struct member_data
*md
,
884 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
885 struct de_dfilter_results
*dres
)
887 decompress_lh5x_internal(c
, d
, dcmpri
, dcmpro
, dres
, DE_LH5X_FMT_LH5
);
890 static void decompress_lz5(deark
*c
, lctx
*d
, struct member_data
*md
,
891 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
892 struct de_dfilter_results
*dres
)
894 fmtutil_decompress_lzss1(c
, dcmpri
, dcmpro
, dres
, 0x2);
897 static void decompress_pakleo(deark
*c
, lctx
*d
, struct member_data
*md
,
898 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
899 struct de_dfilter_results
*dres
)
901 struct de_lzw_params delzwp
;
903 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
904 delzwp
.fmt
= DE_LZWFMT_PAKLEO
;
905 fmtutil_decompress_lzw(c
, dcmpri
, dcmpro
, dres
, &delzwp
);
908 struct cmpr_meth_array_item
{
909 enum lha_basefmt_enum basefmt
;
913 decompressor_fn decompressor
;
916 // Compression methods with a decompressor or a description are usually
917 // listed here, but note that it is also possible for get_cmpr_meth_info()
918 // to handle them procedurally.
919 static const struct cmpr_meth_array_item cmpr_meth_arr
[] = {
920 { BASEFMT_LHA
, 0x00, CODE_lhd
, "directory", NULL
},
921 { BASEFMT_LHA
, 0x00, CODE_lh0
, "uncompressed", decompress_uncompressed
},
922 { BASEFMT_LHA
, 0x00, CODE_lh1
, "LZ77-4K, adaptive Huffman", decompress_lh1
},
923 { BASEFMT_LHA
, 0x00, CODE_lh4
, "LZ77-4K, static Huffman", decompress_lh5x_auto
},
924 { BASEFMT_LHA
, 0x00, CODE_lh5
, "LZ77-8K, static Huffman", decompress_lh5
},
925 { BASEFMT_LHA
, 0x00, CODE_lh6
, "LZ77-32K, static Huffman", decompress_lh5x_auto
},
926 { BASEFMT_LHA
, 0x00, CODE_lh7
, NULL
, decompress_lh5x_auto
},
927 { BASEFMT_LHA
, 0x00, CODE_lh8
, NULL
, decompress_lh5x_auto
},
928 { BASEFMT_LHA
, 0x00, CODE_lz4
, "uncompressed (LArc)", decompress_uncompressed
},
929 { BASEFMT_LHA
, 0x00, CODE_lz5
, "LZSS-4K (LArc)", decompress_lz5
},
930 { BASEFMT_LHA
, 0x00, CODE_pm0
, "uncompressed (PMArc)", decompress_uncompressed
},
931 { BASEFMT_LHA
, 0x00, CODE_lZ0
, "uncompressed (MicroFox PUT)", decompress_uncompressed
},
932 { BASEFMT_LHA
, 0x00, CODE_lZ1
, "MicroFox PUT lZ1", decompress_lh1
},
933 { BASEFMT_LHA
, 0x00, CODE_lZ5
, "MicroFox PUT lZ5", decompress_lh5
},
934 { BASEFMT_LHA
, 0x00, CODE_S_LH0
, "uncompressed (SAR)", decompress_uncompressed
},
935 { BASEFMT_LHA
, 0x00, CODE_S_LH5
, "SAR LH5", decompress_lh5
},
936 { BASEFMT_SWG
, 0x00, CODE_sw0
, "uncompressed", decompress_uncompressed
},
937 { BASEFMT_SWG
, 0x00, CODE_sw1
, NULL
, NULL
},
938 { BASEFMT_PAKLEO
, 0x00, CODE_ll0
, "uncompressed", decompress_uncompressed
},
939 { BASEFMT_PAKLEO
, 0x00, CODE_ll1
, "LZW", decompress_pakleo
}
942 // For basefmt==BASEFMT_LHA only
943 static const u32 other_known_cmpr_methods
[] = {
944 CODE_ah0
, CODE_ari
, CODE_hf0
,
945 CODE_lh2
, CODE_lh3
, CODE_lh9
,
946 CODE_lha
, CODE_lhb
, CODE_lhc
, CODE_lhe
, CODE_lhx
, CODE_lx1
,
947 CODE_lz2
, CODE_lz3
, CODE_lz7
, CODE_lz8
, CODE_lzs
,
948 CODE_pm1
, CODE_pm2
};
950 // Only call this after is_possible_cmpr_meth() return nonzero.
951 // Caller allocates cmi, and initializes to zeroes.
952 static void get_cmpr_meth_info(const u8 idbuf
[5], enum lha_basefmt_enum basefmt
,
953 struct cmpr_meth_info
*cmi
)
956 const struct cmpr_meth_array_item
*cmai
= NULL
;
958 // The first 4 bytes are unique for all known methods.
959 cmi
->uniq_id
= (u32
)de_getu32be_direct(idbuf
);
961 de_memcpy(cmi
->id_raw
, idbuf
, 5);
963 // All "possible" methods only use printable characters.
964 de_memcpy(cmi
->id_printable_sz
, idbuf
, 5);
965 cmi
->id_printable_sz
[5] = '\0';
967 for(k
=0; k
<DE_ARRAYCOUNT(cmpr_meth_arr
); k
++) {
968 if(cmpr_meth_arr
[k
].basefmt
!= basefmt
) continue;
969 if(cmpr_meth_arr
[k
].uniq_id
== cmi
->uniq_id
) {
970 cmai
= &cmpr_meth_arr
[k
];
976 cmi
->is_recognized
= 1;
977 cmi
->decompressor
= cmai
->decompressor
;
979 else if(basefmt
==BASEFMT_LHA
) {
980 for(k
=0; k
<DE_ARRAYCOUNT(other_known_cmpr_methods
); k
++) {
981 if(other_known_cmpr_methods
[k
] == cmi
->uniq_id
) {
982 cmi
->is_recognized
= 1;
988 if(cmai
&& cmai
->descr
) {
989 de_strlcpy(cmi
->descr
, cmai
->descr
, sizeof(cmi
->descr
));
991 else if(cmi
->is_recognized
) {
992 de_strlcpy(cmi
->descr
, "recognized, but no info avail.", sizeof(cmi
->descr
));
995 de_strlcpy(cmi
->descr
, "?", sizeof(cmi
->descr
));
999 static void do_extract_file(deark
*c
, lctx
*d
, struct member_data
*md
)
1001 de_finfo
*fi
= NULL
;
1005 u8 dcmpr_disabled
= 0;
1006 u8 dcmpr_attempted
= 0;
1008 struct de_dfilter_in_params dcmpri
;
1009 struct de_dfilter_out_params dcmpro
;
1010 struct de_dfilter_results dres
;
1012 if(!md
->cmi
) goto done
;
1014 if(md
->is_special
) {
1015 de_dbg(c
, "[not extracting special file]");
1018 else if(md
->is_dir
) {
1021 else if((!md
->cmi
->decompressor
) || dcmpr_disabled
) {
1022 de_err(c
, "%s: Unsupported compression method '%s'",
1023 ucstring_getpsz_d(md
->fullfilename
), md
->cmi
->id_printable_sz
);
1027 if(md
->compressed_data_pos
+md
->compressed_data_len
> c
->infile
->len
) {
1028 de_err(c
, "%s: Data goes beyond end of file", ucstring_getpsz_d(md
->fullfilename
));
1032 fi
= de_finfo_create(c
);
1034 for(tsidx
=0; tsidx
<DE_TIMESTAMPIDX_COUNT
; tsidx
++) {
1035 if(md
->tsdata
[tsidx
].ts
.is_valid
) {
1036 fi
->timestamp
[tsidx
] = md
->tsdata
[tsidx
].ts
;
1041 fi
->is_directory
= 1;
1043 else if(md
->is_executable
) {
1044 fi
->mode_flags
|= DE_MODEFLAG_EXE
;
1046 else if(md
->is_nonexecutable
) {
1047 fi
->mode_flags
|= DE_MODEFLAG_NONEXE
;
1050 de_finfo_set_name_from_ucstring(c
, fi
, md
->fullfilename
, DE_SNFLAG_FULLPATH
);
1051 fi
->original_filename_flag
= 1;
1053 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0x0);
1054 dbuf_enable_wbuffer(outf
);
1055 de_crcobj_reset(d
->crco
);
1056 dbuf_set_writelistener(outf
, de_writelistener_for_crc
, (void*)d
->crco
);
1058 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
1059 dcmpri
.f
= c
->infile
;
1060 dcmpri
.pos
= md
->compressed_data_pos
;
1061 dcmpri
.len
= md
->compressed_data_len
;
1063 dcmpro
.expected_len
= md
->orig_size
;
1064 dcmpro
.len_known
= 1;
1066 if(md
->is_dir
) goto done
; // For directories, we're done.
1068 dcmpr_attempted
= 1;
1069 if(md
->cmi
->decompressor
) {
1070 md
->cmi
->decompressor(c
, d
, md
, &dcmpri
, &dcmpro
, &dres
);
1072 dbuf_flush(dcmpro
.f
);
1075 de_err(c
, "%s: Decompression failed: %s", ucstring_getpsz_d(md
->fullfilename
),
1076 de_dfilter_get_errmsg(c
, &dres
));
1080 crc_calc
= de_crcobj_getval(d
->crco
);
1081 if(d
->basefmt
==BASEFMT_PAKLEO
) {
1082 de_dbg(c
, "crc (calculated): 0x%08x", (UI
)crc_calc
);
1085 de_dbg(c
, "crc (calculated): 0x%04x", (UI
)crc_calc
);
1087 if(crc_calc
!= md
->crc_reported
) {
1088 de_err(c
, "%s: CRC check failed", ucstring_getpsz_d(md
->fullfilename
));
1095 if(dcmpr_attempted
&& md
->cmi
&& md
->cmi
->uniq_id
==CODE_lh7
) {
1097 d
->lh7_success_flag
= 1;
1099 d
->lh7_failed_flag
= 1;
1102 de_finfo_destroy(c
, fi
);
1105 // Simple checksum used by some header formats.
1106 // Caller supplies a crcobj to use.
1107 static u32
lha_calc_checksum(dbuf
*f
, i64 pos
, i64 len
, struct de_crcobj
*crco_cksum
)
1111 de_crcobj_reset(crco_cksum
);
1112 de_crcobj_addslice(crco_cksum
, f
, pos
, len
);
1113 v
= de_crcobj_getval(crco_cksum
);
1117 static void do_check_header_crc(deark
*c
, lctx
*d
, struct member_data
*md
)
1119 // LHA members don't have to have a header CRC field, though it's probably
1120 // considered best practice to have one when the checksum field doesn't
1121 // exist, or there are any extended headers.
1122 if(!md
->have_hdr_crc_reported
) return;
1123 de_crcobj_reset(d
->crco
);
1125 // Everything before the CRC field:
1126 de_crcobj_addslice(d
->crco
, c
->infile
, md
->member_pos
,
1127 md
->hdr_crc_field_pos
- md
->member_pos
);
1129 // The zeroed-out CRC field:
1130 de_crcobj_addzeroes(d
->crco
, 2);
1132 // Everything after the CRC field:
1133 de_crcobj_addslice(d
->crco
, c
->infile
, md
->hdr_crc_field_pos
+2,
1134 md
->compressed_data_pos
- (md
->hdr_crc_field_pos
+2));
1136 md
->hdr_crc_calc
= de_crcobj_getval(d
->crco
);
1137 de_dbg(c
, "header crc (calculated): 0x%04x", (UI
)md
->hdr_crc_calc
);
1138 if(md
->hdr_crc_calc
!= md
->hdr_crc_reported
) {
1139 de_err(c
, "Wrong header CRC: reported=0x%04x, calculated=0x%04x",
1140 (UI
)md
->hdr_crc_reported
, (UI
)md
->hdr_crc_calc
);
1144 enum lha_whats_next_enum
{
1147 LHA_WN_TRAILER_AND_JUNK
, // Note: No longer handled differently from TRAILER
1152 static enum lha_whats_next_enum
pakleo_classify_whats_next(deark
*c
, lctx
*d
,
1156 if(len
<=0) return LHA_WN_NOTHING
;
1157 de_read(b
, pos
, sizeof(b
));
1158 if(is_possible_cmpr_meth(&b
[2])) return LHA_WN_MEMBER
;
1162 static enum lha_whats_next_enum
lha_classify_whats_next(deark
*c
, lctx
*d
, i64 pos
, i64 len
)
1167 if(d
->basefmt
==BASEFMT_PAKLEO
) {
1168 return pakleo_classify_whats_next(c
, d
, pos
, len
);
1171 if(len
<=0) return LHA_WN_NOTHING
;
1172 b
[0] = de_getbyte(pos
);
1173 if(b
[0]==0 && len
<=2) return LHA_WN_TRAILER
;
1174 if(b
[0]==0 && len
<21) return LHA_WN_TRAILER_AND_JUNK
;
1175 de_read(&b
[1], pos
+1, sizeof(b
)-1);
1176 if(d
->basefmt
==BASEFMT_SWG
) hlev
= 0;
1178 if(b
[0]==0 && b
[1]==0) return LHA_WN_TRAILER_AND_JUNK
;
1179 if(b
[0]==0 && hlev
!=2) return LHA_WN_TRAILER_AND_JUNK
;
1180 if(hlev
>3) return LHA_WN_JUNK
;
1181 if(is_possible_cmpr_meth(&b
[2])) return LHA_WN_MEMBER
;
1185 static void do_swg_string_field(deark
*c
, lctx
*d
,
1186 de_ucstring
*s
, i64 pos
, i64 fldlen
, const char *name
)
1188 i64 dlen
= (i64
)de_getbyte(pos
);
1189 if(dlen
>fldlen
-1) dlen
= fldlen
-1;
1191 dbuf_read_to_ucstring(c
->infile
, pos
+1, dlen
, s
, 0, d
->input_encoding
);
1192 ucstring_strip_trailing_spaces(s
);
1193 de_dbg(c
, "SWG %s: \"%s\"", name
, ucstring_getpsz_d(s
));
1196 static void do_special_swg_fields(deark
*c
, lctx
*d
, struct member_data
*md
, i64 pos1
)
1200 de_ucstring
*s
= NULL
;
1202 crc32
= (u32
)de_getu32le_p(&pos
);
1203 de_dbg(c
, "SWG crc32 (reported): 0x%08x", (UI
)crc32
);
1204 s
= ucstring_create(c
);
1205 do_swg_string_field(c
, d
, s
, pos
, 13, "stored filename");
1207 do_swg_string_field(c
, d
, s
, pos
, 41, "subject");
1209 do_swg_string_field(c
, d
, s
, pos
, 36, "contributor");
1211 do_swg_string_field(c
, d
, s
, pos
, 71, "search keys");
1212 ucstring_destroy(s
);
1215 // This single function parses all the different header formats, using lots of
1216 // "if" statements. It is messy, but it's a no-win situation.
1217 // The alternative of four separate functions would be have a lot of redundant
1218 // code, and be harder to maintain.
1220 // Caller allocates and initializes md.
1221 // If the member was successfully parsed, sets md->total_size and returns nonzero.
1222 static int do_read_member(deark
*c
, lctx
*d
, struct member_data
*md
)
1225 i64 lev0_header_size
= 0;
1226 i64 lev1_base_header_size
= 0;
1227 i64 lev1_skip_size
= 0;
1228 i64 lev2_total_header_size
= 0;
1229 i64 lev3_header_size
= 0;
1230 i64 pos1
= md
->member_pos
;
1233 i64 exthdr_bytes_consumed
= 0;
1236 UI hdr_checksum_reported
= 0;
1237 u8 has_hdr_checksum
= 0;
1240 enum lha_whats_next_enum wn
;
1241 u8 cmpr_meth_raw
[5];
1242 int saved_indent_level
;
1244 de_dbg_indent_save(c
, &saved_indent_level
);
1246 nbytes_avail
= c
->infile
->len
- pos1
;
1247 wn
= lha_classify_whats_next(c
, d
, pos1
, nbytes_avail
);
1248 if(wn
!=LHA_WN_MEMBER
) {
1249 if(d
->member_count
==0) {
1250 de_err(c
, "Not a%s %s file",
1251 ((d
->basefmt
==BASEFMT_LHA
|| d
->basefmt
==BASEFMT_SWG
)?"n":""),
1254 else if(wn
==LHA_WN_TRAILER
|| wn
==LHA_WN_TRAILER_AND_JUNK
) {
1255 d
->trailer_found
= 1;
1256 d
->trailer_pos
= pos1
;
1257 de_dbg(c
, "trailer at %"I64_FMT
, d
->trailer_pos
);
1259 else if(wn
==LHA_WN_JUNK
) {
1260 de_warn(c
, "%"I64_FMT
" bytes of non-%s data found at end of file (offset %"I64_FMT
")",
1261 nbytes_avail
, d
->basefmt_name
, pos1
);
1266 de_dbg(c
, "member at %"I64_FMT
, pos1
);
1267 de_dbg_indent(c
, 1);
1269 // Look ahead to figure out the header format version.
1270 // This byte was originally the high byte of the "MS-DOS file attribute" field,
1271 // which happened to always be zero.
1272 // In later LHA versions, it is overloaded to identify the header format
1273 // version (called "header level" in LHA jargon).
1274 if(d
->basefmt
==BASEFMT_SWG
) {
1275 md
->hlev
= 0; // SWG is most similar to header level 0
1277 else if(d
->basefmt
==BASEFMT_PAKLEO
) {
1278 md
->hlev
= 0; // hlev field is present, but we only support one format
1281 md
->hlev
= de_getbyte(pos1
+20);
1283 de_dbg(c
, "header level: %d", (int)md
->hlev
);
1285 goto done
; // Shouldn't be possible; checked in lha_classify_whats_next().
1288 if(d
->member_count
==0) {
1289 d
->hlev_of_first_member
= md
->hlev
;
1292 if(d
->basefmt
==BASEFMT_PAKLEO
) {
1293 pos
+= 2; // What is this field?
1295 else if(md
->hlev
==0) {
1296 lev0_header_size
= (i64
)de_getbyte_p(&pos
);
1297 de_dbg(c
, "header size: (2+)%d", (int)lev0_header_size
);
1298 hdr_checksum_reported
= (UI
)de_getbyte_p(&pos
);
1299 has_hdr_checksum
= 1;
1300 md
->hdr_checksum_calc
= lha_calc_checksum(c
->infile
, pos
, lev0_header_size
,
1303 else if(md
->hlev
==1) {
1304 lev1_base_header_size
= (i64
)de_getbyte_p(&pos
);
1305 de_dbg(c
, "base header size: %d", (int)lev1_base_header_size
);
1306 hdr_checksum_reported
= (UI
)de_getbyte_p(&pos
);
1307 has_hdr_checksum
= 1;
1308 md
->hdr_checksum_calc
= lha_calc_checksum(c
->infile
, pos
, lev1_base_header_size
,
1311 else if(md
->hlev
==2) {
1312 lev2_total_header_size
= de_getu16le_p(&pos
);
1313 de_dbg(c
, "total header size: %d", (int)lev2_total_header_size
);
1315 else if(md
->hlev
==3) {
1317 lev3_word_size
= de_getu16le_p(&pos
);
1318 de_dbg(c
, "word size: %d", (int)lev3_word_size
);
1319 if(lev3_word_size
!=4) {
1320 de_err(c
, "Unsupported word size: %d", (int)lev3_word_size
);
1325 if(has_hdr_checksum
) {
1326 de_dbg(c
, "header checksum (reported): 0x%02x", hdr_checksum_reported
);
1327 de_dbg(c
, "header checksum (calculated): 0x%02x", (UI
)md
->hdr_checksum_calc
);
1328 if(md
->hdr_checksum_calc
!= hdr_checksum_reported
) {
1329 de_err(c
, "Wrong header checksum: reported=0x%02x, calculated=0x%02x",
1330 hdr_checksum_reported
, md
->hdr_checksum_calc
);
1334 de_read(cmpr_meth_raw
, pos
, 5);
1335 md
->cmi
= de_malloc(c
, sizeof(struct cmpr_meth_info
));
1336 get_cmpr_meth_info(cmpr_meth_raw
, d
->basefmt
, md
->cmi
);
1337 de_dbg(c
, "cmpr method: '%s' (%s)", md
->cmi
->id_printable_sz
, md
->cmi
->descr
);
1340 if(md
->cmi
->uniq_id
== CODE_lhd
) {
1344 else if(md
->cmi
->decompressor
== decompress_uncompressed
) {
1352 // lev1_skip_size is the distance from the third byte of the extended
1353 // header section, to the end of the compressed data.
1354 lev1_skip_size
= de_getu32le_p(&pos
);
1355 de_dbg(c
, "skip size: %u", (UI
)lev1_skip_size
);
1356 md
->total_size
= 2 + lev1_base_header_size
+ lev1_skip_size
;
1359 md
->compressed_data_len
= de_getu32le(pos
);
1360 de_dbg(c
, "compressed size: %"I64_FMT
, md
->compressed_data_len
);
1363 if(md
->hlev
==0 && d
->basefmt
!=BASEFMT_PAKLEO
) {
1364 md
->total_size
= 2 + lev0_header_size
+ md
->compressed_data_len
;
1366 else if(md
->hlev
==2) {
1367 md
->total_size
= lev2_total_header_size
+ md
->compressed_data_len
;
1371 md
->orig_size
= de_getu32le(pos
);
1372 de_dbg(c
, "original size: %u", (UI
)md
->orig_size
);
1375 if(md
->hlev
==0 || md
->hlev
==1) {
1376 read_msdos_modtime(c
, d
, md
, pos
, "last-modified");
1377 pos
+= 4; // modification time/date (MS-DOS)
1379 else if(md
->hlev
==2 || md
->hlev
==3) {
1380 read_unix_timestamp(c
, d
, md
, pos
, DE_TIMESTAMPIDX_MODIFY
, "last-modified");
1381 pos
+= 4; // Unix time
1385 de_ucstring
*attr_descr
;
1387 // Normally, the high byte can only be 0 here, because it's
1388 // also the header level.
1389 attribs
= (UI
)de_getu16le_p(&pos
);
1391 attr_descr
= ucstring_create(c
);
1392 de_describe_dos_attribs(c
, attribs
, attr_descr
, 0);
1393 de_dbg(c
, "attribs: 0x%04x (%s)", attribs
, ucstring_getpsz_d(attr_descr
));
1394 ucstring_destroy(attr_descr
);
1397 attribs
= (UI
)de_getbyte_p(&pos
);
1398 de_dbg(c
, "obsolete attribs low byte: 0x%02x", attribs
);
1399 pos
++; // header level, already handled
1402 if(d
->basefmt
==BASEFMT_SWG
) {
1403 do_special_swg_fields(c
, d
, md
, pos
);
1407 if(d
->basefmt
==BASEFMT_PAKLEO
) {
1408 md
->crc_reported
= (u32
)de_getu32le_p(&pos
);
1409 de_dbg(c
, "crc32 (reported): 0x%08x", (UI
)md
->crc_reported
);
1413 fnlen
= de_getbyte(pos
++);
1414 de_dbg(c
, "filename len: %d", (int)fnlen
);
1416 read_filename_hlev0(c
, d
, md
, pos
, fnlen
);
1419 read_filename_hlev1_or_exthdr(c
, d
, md
, pos
, fnlen
);
1424 if(d
->basefmt
!=BASEFMT_PAKLEO
) {
1425 md
->crc_reported
= (u32
)de_getu16le_p(&pos
);
1426 de_dbg(c
, "crc16 (reported): 0x%04x", (UI
)md
->crc_reported
);
1429 if(md
->hlev
==1 || md
->hlev
==2 || md
->hlev
==3) {
1430 md
->os_id
= de_getbyte_p(&pos
);
1431 de_dbg(c
, "OS id: %u ('%c') (%s)", (UI
)md
->os_id
,
1432 de_byte_to_printable_char(md
->os_id
), get_os_name(md
->os_id
));
1436 lev3_header_size
= de_getu32le_p(&pos
);
1437 md
->total_size
= lev3_header_size
+ md
->compressed_data_len
;
1440 if(d
->basefmt
==BASEFMT_PAKLEO
) {
1441 md
->compressed_data_pos
= pos
;
1442 md
->total_size
= md
->compressed_data_pos
+ md
->compressed_data_len
- md
->member_pos
;
1444 else if(md
->hlev
==0) {
1445 i64 ext_headers_size
= (2+lev0_header_size
) - (pos
-pos1
);
1446 md
->compressed_data_pos
= pos1
+ 2 + lev0_header_size
;
1447 if(ext_headers_size
>0) {
1448 de_dbg(c
, "extended header area at %"I64_FMT
", len=%"I64_FMT
, pos
, ext_headers_size
);
1449 de_dbg_indent(c
, 1);
1450 do_lev0_ext_area(c
, d
, md
, pos
, ext_headers_size
);
1451 de_dbg_indent(c
, -1);
1454 else if(md
->hlev
==1) {
1455 i64 first_ext_hdr_size
;
1457 // The last two bytes of the base header are the size of the first ext. header.
1458 pos
= pos1
+ 2 + lev1_base_header_size
- 2;
1459 // TODO: sanitize pos?
1460 first_ext_hdr_size
= de_getu16le_p(&pos
);
1461 de_dbg(c
, "first ext hdr size: %"I64_FMT
, first_ext_hdr_size
);
1463 ret
= do_read_ext_headers(c
, d
, md
, pos
, lev1_skip_size
, first_ext_hdr_size
,
1464 &exthdr_bytes_consumed
);
1467 de_err(c
, "Error parsing extended headers at %"I64_FMT
". Cannot extract this file.",
1473 pos
+= exthdr_bytes_consumed
;
1474 md
->compressed_data_pos
= pos
;
1475 md
->compressed_data_len
= lev1_skip_size
- exthdr_bytes_consumed
;
1477 else if(md
->hlev
==2) {
1478 i64 first_ext_hdr_size
;
1480 if(md
->os_id
=='K') {
1481 // So that some lhasa test files will work.
1482 // TODO: The extended headers section is (usually?) self-terminating, so we
1483 // should be able to parse it and figure out if this bug is present. That
1484 // would be better than just guessing.
1485 lev2_total_header_size
+= 2;
1486 md
->total_size
= lev2_total_header_size
+ md
->compressed_data_len
;
1487 de_dbg(c
, "attempting bug workaround: changing total header size to %"I64_FMT
,
1488 lev2_total_header_size
);
1491 md
->compressed_data_pos
= pos1
+lev2_total_header_size
;
1493 first_ext_hdr_size
= de_getu16le_p(&pos
);
1494 de_dbg(c
, "first ext hdr size: %"I64_FMT
, first_ext_hdr_size
);
1496 do_read_ext_headers(c
, d
, md
, pos
, pos1
+lev2_total_header_size
-pos
,
1497 first_ext_hdr_size
, &exthdr_bytes_consumed
);
1499 else if(md
->hlev
==3) {
1500 i64 first_ext_hdr_size
;
1502 md
->compressed_data_pos
= pos1
+lev3_header_size
;
1504 first_ext_hdr_size
= de_getu32le_p(&pos
);
1505 de_dbg(c
, "first ext hdr size: %"I64_FMT
, first_ext_hdr_size
);
1507 do_read_ext_headers(c
, d
, md
, pos
, pos1
+lev3_header_size
-pos
,
1508 first_ext_hdr_size
, &exthdr_bytes_consumed
);
1511 do_check_header_crc(c
, d
, md
);
1513 de_dbg(c
, "member data (%scompressed) at %"I64_FMT
", len=%"I64_FMT
,
1514 is_compressed
?"":"un",
1515 md
->compressed_data_pos
, md
->compressed_data_len
);
1517 make_fullfilename(c
, d
, md
);
1519 de_dbg_indent(c
, 1);
1520 do_extract_file(c
, d
, md
);
1521 de_dbg_indent(c
, -1);
1525 de_dbg_indent_restore(c
, saved_indent_level
);
1529 static void do_swg_footer(deark
*c
, lctx
*d
, i64 pos1
)
1533 de_ucstring
*s
= NULL
;
1535 de_dbg(c
, "SWG footer at %"I64_FMT
, pos1
);
1536 de_dbg_indent(c
, 1);
1537 s
= ucstring_create(c
);
1538 do_swg_string_field(c
, d
, s
, pos
, 61, "message");
1540 do_swg_string_field(c
, d
, s
, pos
, 66, "title");
1542 n
= de_getu16le_p(&pos
);
1543 de_dbg(c
, "SWG number of items: %d", (int)n
);
1544 de_dbg_indent(c
, -1);
1545 ucstring_destroy(s
);
1548 static void do_lha_footer(deark
*c
, lctx
*d
)
1550 i64 extra_bytes_pos
, extra_bytes_len
;
1552 if(!d
->trailer_found
) goto done
;
1553 extra_bytes_pos
= d
->trailer_pos
+1;
1554 extra_bytes_len
= c
->infile
->len
- extra_bytes_pos
;
1555 if(extra_bytes_len
<=1) goto done
;
1557 if(d
->basefmt
==BASEFMT_SWG
&& extra_bytes_len
==129) {
1558 do_swg_footer(c
, d
, extra_bytes_pos
);
1562 de_info(c
, "Note: %"I64_FMT
" extra bytes at end of file (offset %"I64_FMT
")",
1563 extra_bytes_len
, extra_bytes_pos
);
1568 static lctx
*lha_create_lctx(deark
*c
)
1572 d
= de_malloc(c
, sizeof(lctx
));
1576 static void lha_destroy_lctx(deark
*c
, lctx
*d
)
1579 de_crcobj_destroy(d
->crco
);
1580 de_crcobj_destroy(d
->crco_cksum
);
1584 static void do_run_lha_internal(deark
*c
, lctx
*d
, de_module_params
*mparams
)
1587 struct member_data
*md
= NULL
;
1589 if(!d
->basefmt_name
) {
1590 d
->basefmt_name
= "LHA";
1592 d
->lhark_req
= de_get_ext_option_bool(c
, "lha:lhark", -1);
1593 d
->lhark_policy
= d
->lhark_req
;
1595 // It's not really safe to guess CP437, because Japanese-encoded (CP932?)
1596 // filenames are common.
1597 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_ASCII
);
1599 d
->hlev_of_first_member
= 0xff;
1600 if(d
->basefmt
==BASEFMT_PAKLEO
) {
1601 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC32_PL
);
1604 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_ARC
);
1606 d
->crco_cksum
= de_crcobj_create(c
, DE_CRCOBJ_SUM_BYTES
);
1609 if(d
->basefmt
==BASEFMT_PAKLEO
) pos
+= 37;
1612 if(pos
>= c
->infile
->len
) break;
1614 md
= de_malloc(c
, sizeof(struct member_data
));
1615 md
->encoding
= d
->input_encoding
;
1616 md
->member_pos
= pos
;
1617 if(!do_read_member(c
, d
, md
)) goto done
;
1618 if(md
->total_size
<1) goto done
;
1621 pos
+= md
->total_size
;
1623 destroy_member_data(c
, md
);
1628 do_lha_footer(c
, d
);
1629 destroy_member_data(c
, md
);
1632 static void de_run_lha(deark
*c
, de_module_params
*mparams
)
1636 d
= lha_create_lctx(c
);
1637 d
->basefmt
= BASEFMT_LHA
;
1638 do_run_lha_internal(c
, d
, mparams
);
1639 lha_destroy_lctx(c
, d
);
1642 static int is_swg_sig(const u8
*b
)
1644 return b
[0]=='-' && b
[1]=='s' && b
[2]=='w' &&
1645 (b
[3]=='0' || b
[3]=='1') && b
[4]=='-';
1648 static int de_identify_lha(deark
*c
)
1652 struct cmpr_meth_info cmi
;
1654 de_read(b
, 0, sizeof(b
));
1655 if(b
[20]>3) return 0; // header level
1657 if(!is_possible_cmpr_meth(&b
[2])) return 0;
1658 if(is_swg_sig(&b
[2])) return 0; // Handled by the swg module
1661 if(b
[0]<22) return 0;
1662 if(22 + (int)b
[21] + 2 > 2 + (int)b
[0]) return 0;
1665 if(b
[0]<25) return 0;
1666 if(22 + (int)b
[21] + 5 > 2 + (int)b
[0]) return 0;
1669 i64 hsize
= de_getu16le_direct(&b
[0]);
1670 if(hsize
< 26) return 0;
1673 if((b
[0]!=4 && b
[0]!=8) || b
[1]!=0) return 0;
1676 de_zeromem(&cmi
, sizeof(struct cmpr_meth_info
));
1677 get_cmpr_meth_info(&b
[2], BASEFMT_LHA
, &cmi
);
1678 if(!cmi
.is_recognized
) {
1682 if(de_input_file_has_ext(c
, "lzh") ||
1683 de_input_file_has_ext(c
, "lha") ||
1684 ((b
[4]=='z') && de_input_file_has_ext(c
, "lzs")))
1689 if(has_ext
) return 100;
1690 return 80; // Must be less than car_lha
1693 static void de_help_lha(deark
*c
)
1695 de_msg(c
, "-opt lha:lhark=<0|1> : LHARK mode (for 'lh7' compression)");
1698 void de_module_lha(deark
*c
, struct deark_module_info
*mi
)
1701 mi
->desc
= "LHA/LZH/PMA archive";
1702 mi
->run_fn
= de_run_lha
;
1703 mi
->identify_fn
= de_identify_lha
;
1704 mi
->help_fn
= de_help_lha
;
1707 /////////////////////// SWG / SWAG
1709 // This module works almost just like lha, except that all members are assumed
1710 // to use the SWG header format. (For lha, the SWG header format is never used.)
1712 static void de_run_swg(deark
*c
, de_module_params
*mparams
)
1716 d
= lha_create_lctx(c
);
1717 d
->basefmt
= BASEFMT_SWG
;
1718 d
->basefmt_name
= "SWG";
1719 de_declare_fmt(c
, "SWAG packet");
1720 do_run_lha_internal(c
, d
, mparams
);
1721 lha_destroy_lctx(c
, d
);
1724 static int de_identify_swg(deark
*c
)
1728 de_read(b
, 2, sizeof(b
));
1730 if(de_input_file_has_ext(c
, "swg")) return 100;
1736 void de_module_swg(deark
*c
, struct deark_module_info
*mi
)
1739 mi
->desc
= "SWAG packet";
1740 mi
->run_fn
= de_run_swg
;
1741 mi
->identify_fn
= de_identify_swg
;
1742 mi
->flags
|= DE_MODFLAG_WARNPARSEONLY
;
1745 /////////////////////// PAKLEO
1747 static void de_run_pakleo(deark
*c
, de_module_params
*mparams
)
1751 d
= lha_create_lctx(c
);
1752 d
->basefmt
= BASEFMT_PAKLEO
;
1753 d
->basefmt_name
= "PAKLEO";
1754 de_declare_fmt(c
, "PAKLEO");
1755 do_run_lha_internal(c
, d
, mparams
);
1756 lha_destroy_lctx(c
, d
);
1759 static int de_identify_pakleo(deark
*c
)
1761 if(dbuf_memcmp(c
->infile
, 0, "LEOLZW", 6)) return 0;
1762 if(dbuf_memcmp(c
->infile
, 39, "-l", 2)) return 0;
1766 void de_module_pakleo(deark
*c
, struct deark_module_info
*mi
)
1769 mi
->desc
= "PAKLEO archive";
1770 mi
->run_fn
= de_run_pakleo
;
1771 mi
->identify_fn
= de_identify_pakleo
;
1774 /////////////////////// CAR (MylesHi!)
1776 struct car_member_data
{
1779 u32 hdr_checksum_calc
;
1785 struct de_crcobj
*crco_cksum
;
1788 static int looks_like_car_member(deark
*c
, i64 pos
)
1792 de_read(b
, pos
, 16);
1793 if(b
[2]!='-' || b
[3]!='l'|| b
[4]!='h' || b
[6]!='-') return 0;
1794 if(b
[5]!='0' && b
[5]!='5') return 0;
1795 if((int)b
[0] != (int)b
[15] + 25) return 0;
1796 if(dbuf_memcmp(c
->infile
, pos
+ (i64
)b
[15] + 24, (const u8
*)"\x20\x00\x00", 3)) return 0;
1800 static int do_car_member(deark
*c
, struct car_ctx
*d
, struct car_member_data
*md
)
1802 i64 lev1_base_header_size
;
1805 i64 compressed_data_len
;
1806 i64 pos1
= md
->member_pos
;
1808 int saved_indent_level
;
1810 de_dbg_indent_save(c
, &saved_indent_level
);
1811 de_dbg(c
, "member at %"I64_FMT
, pos1
);
1812 de_dbg_indent(c
, 1);
1814 // Figure out where everything is...
1815 lev1_base_header_size
= (i64
)de_getbyte(pos1
);
1816 de_dbg(c
, "base header size: %d", (int)lev1_base_header_size
);
1817 hdr_endpos
= pos1
+ 2 + lev1_base_header_size
;
1818 fnlen
= lev1_base_header_size
- 25;
1819 de_dbg(c
, "implied filename len: %d", (int)fnlen
);
1820 if(fnlen
<0) goto done
;
1822 compressed_data_len
= de_getu32le(pos1
+ 7);
1823 de_dbg(c
, "compressed size: %"I64_FMT
, compressed_data_len
);
1824 if(hdr_endpos
+ compressed_data_len
> c
->infile
->len
) goto done
;
1826 // Convert to an LHA level-1 header
1827 dbuf_empty(d
->hdr_tmp
);
1829 // Fields through uncmpr_size are the same (we'll patch the checksum later)
1830 dbuf_copy(c
->infile
, pos1
, 15, d
->hdr_tmp
);
1832 dbuf_copy(c
->infile
, hdr_endpos
-7, 4, d
->hdr_tmp
); // timestamp
1834 // attribute (low byte)
1835 dbuf_copy(c
->infile
, hdr_endpos
-9, 1, d
->hdr_tmp
);
1836 dbuf_writebyte(d
->hdr_tmp
, 0x01); // level identifier
1838 // Fields starting with filename length, through crc
1839 dbuf_copy(c
->infile
, pos1
+15, 1+fnlen
+2, d
->hdr_tmp
);
1841 dbuf_writebyte(d
->hdr_tmp
, 77); // OS ID = 'M' = MS-DOS
1843 // Recalculate checksum
1844 md
->hdr_checksum_calc
= lha_calc_checksum(d
->hdr_tmp
, 2, lev1_base_header_size
, d
->crco_cksum
);
1845 de_dbg(c
, "header checksum (calculated): 0x%02x", (UI
)md
->hdr_checksum_calc
);
1846 dbuf_writebyte_at(d
->hdr_tmp
, 1, (u8
)md
->hdr_checksum_calc
);
1847 dbuf_truncate(d
->hdr_tmp
, 2+lev1_base_header_size
);
1849 // Write everything out
1850 dbuf_copy(d
->hdr_tmp
, 0, d
->hdr_tmp
->len
, d
->lha_outf
);
1851 de_dbg(c
, "member data at %"I64_FMT
", len=%"I64_FMT
, hdr_endpos
, compressed_data_len
);
1852 dbuf_copy(c
->infile
, hdr_endpos
, compressed_data_len
, d
->lha_outf
);
1853 md
->total_size
= (hdr_endpos
-md
->member_pos
) + compressed_data_len
;
1857 de_dbg_indent_restore(c
, saved_indent_level
);
1861 static void de_run_car_lha(deark
*c
, de_module_params
*mparams
)
1863 struct car_ctx
*d
= NULL
;
1864 struct car_member_data
*md
= NULL
;
1868 d
= de_malloc(c
, sizeof(struct car_ctx
));
1870 if(!looks_like_car_member(c
, 0)) {
1871 de_err(c
, "Not a CAR file");
1875 d
->crco_cksum
= de_crcobj_create(c
, DE_CRCOBJ_SUM_BYTES
);
1876 d
->lha_outf
= dbuf_create_output_file(c
, "lha", NULL
, 0);
1877 d
->hdr_tmp
= dbuf_create_membuf(c
, 0, 0);
1879 md
= de_malloc(c
, sizeof(struct car_member_data
));
1881 if(de_getbyte(pos
)==0) {
1882 de_dbg(c
, "trailer at %"I64_FMT
, pos
);
1883 dbuf_writebyte(d
->lha_outf
, 0);
1887 if(pos
+27 > c
->infile
->len
) goto done
;
1888 if(!looks_like_car_member(c
, pos
)) goto done
;
1890 de_zeromem(md
, sizeof(struct car_member_data
));
1891 md
->member_pos
= pos
;
1892 if(!do_car_member(c
, d
, md
)) goto done
;
1893 pos
+= md
->total_size
;
1900 de_crcobj_destroy(d
->crco_cksum
);
1901 dbuf_close(d
->lha_outf
);
1903 de_err(c
, "Conversion to LHA format failed");
1906 dbuf_close(d
->hdr_tmp
);
1911 static int de_identify_car_lha(deark
*c
)
1913 if(!de_input_file_has_ext(c
, "car")) return 0;
1914 if(looks_like_car_member(c
, 0)) {
1920 void de_module_car_lha(deark
*c
, struct deark_module_info
*mi
)
1923 mi
->desc
= "CAR (MylesHi!) LHA-like archive";
1924 mi
->run_fn
= de_run_car_lha
;
1925 mi
->identify_fn
= de_identify_car_lha
;
1928 /////////////////////// ARX
1930 struct arx_member_data
{
1934 i64 compressed_data_len
;
1936 int is_uncompressed
;
1938 u32 hdr_checksum_calc
;
1944 struct de_crcobj
*crco
;
1945 struct de_crcobj
*crco_cksum
;
1948 static int looks_like_arx_member(deark
*c
, i64 pos
)
1952 de_read(b
, pos
, sizeof(b
));
1953 if(b
[2]!='-' || b
[3]!='l'|| b
[4]!='h' || b
[6]!='-') return 0;
1954 if(b
[21]!=0) return 0;
1958 // Decompress the file, discarding the output, just to figure out the CRC.
1959 static void arx_recalc_lh1(deark
*c
, struct arx_ctx
*d
, struct arx_member_data
*md
)
1962 struct de_dfilter_in_params dcmpri
;
1963 struct de_dfilter_out_params dcmpro
;
1964 struct de_dfilter_results dres
;
1966 outf
= dbuf_create_custom_dbuf(c
, md
->unc_data_len
, 0);
1967 dbuf_set_writelistener(outf
, de_writelistener_for_crc
, (void*)d
->crco
);
1969 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
1970 dcmpri
.f
= c
->infile
;
1971 dcmpri
.pos
= md
->hdr_endpos
;
1972 dcmpri
.len
= md
->compressed_data_len
;
1974 dcmpro
.expected_len
= md
->unc_data_len
;
1975 dcmpro
.len_known
= 1;
1977 fmtutil_lh1_codectype1(c
, &dcmpri
, &dcmpro
, &dres
, NULL
);
1982 static void arx_recalc_crc(deark
*c
, struct arx_ctx
*d
, struct arx_member_data
*md
)
1984 de_crcobj_reset(d
->crco
);
1985 if(md
->is_uncompressed
) {
1986 de_crcobj_addslice(d
->crco
, c
->infile
, md
->hdr_endpos
, md
->compressed_data_len
);
1989 arx_recalc_lh1(c
, d
, md
);
1991 md
->crc_calc
= de_crcobj_getval(d
->crco
);
1994 static int do_arx_member(deark
*c
, struct arx_ctx
*d
, struct arx_member_data
*md
)
1996 i64 lev0_header_size
;
1997 i64 pos1
= md
->member_pos
;
2000 int saved_indent_level
;
2002 de_dbg_indent_save(c
, &saved_indent_level
);
2003 de_dbg(c
, "member at %"I64_FMT
, pos1
);
2004 de_dbg_indent(c
, 1);
2006 lev0_header_size
= (i64
)de_getbyte(pos1
);
2007 de_dbg(c
, "header size: %d", (int)lev0_header_size
);
2008 if(lev0_header_size
<22) goto done
;
2009 md
->hdr_endpos
= pos1
+ 2 + lev0_header_size
;
2011 md
->compressed_data_len
= de_getu32le(pos1
+8);
2012 de_dbg(c
, "compressed size: %"I64_FMT
, md
->compressed_data_len
);
2014 md
->unc_data_len
= de_getu32le(pos1
+12);
2015 de_dbg(c
, "uncmpr. size: %"I64_FMT
, md
->unc_data_len
);
2017 if(md
->compressed_data_len
==0) {
2018 md
->is_uncompressed
= 1;
2019 md
->compressed_data_len
= md
->unc_data_len
;
2022 if(md
->hdr_endpos
+ md
->compressed_data_len
> c
->infile
->len
) goto done
;
2024 // Just the first byte of the CRC is present, and apparently not even
2025 // that for non-compressed files.
2026 extra_crc_byte
= de_getbyte(md
->hdr_endpos
-1);
2027 de_dbg(c
, "crc (reported): 0x??%02x", (UI
)extra_crc_byte
);
2029 // Find the correct CRC of the file data.
2030 arx_recalc_crc(c
, d
, md
);
2031 de_dbg(c
, "crc (calculated): 0x%04x", (UI
)md
->crc_calc
);
2032 if(!md
->is_uncompressed
) {
2033 if((u8
)(md
->crc_calc
& 0xff) != extra_crc_byte
) {
2034 de_warn(c
, "CRC mismatch. Conversion to LHA may have failed.");
2038 // Convert to an LHA header
2039 dbuf_empty(d
->hdr_tmp
);
2041 // Fields through cmpr meth. (We'll patch the checksum, and
2042 // compression method if necessary, later.)
2043 dbuf_copy(c
->infile
, pos1
, 7, d
->hdr_tmp
);
2045 dbuf_writeu32le(d
->hdr_tmp
, md
->compressed_data_len
);
2046 dbuf_writeu32le(d
->hdr_tmp
, md
->unc_data_len
);
2048 /// This part of the header can be copied as-is.
2049 dbuf_copy(c
->infile
, pos1
+8+8, lev0_header_size
-1-6-8, d
->hdr_tmp
);
2052 dbuf_writebyte(d
->hdr_tmp
, (u8
)(md
->crc_calc
& 0xff));
2053 dbuf_writebyte(d
->hdr_tmp
, (u8
)((md
->crc_calc
& 0xff00)>>8));
2055 if(md
->is_uncompressed
) {
2056 dbuf_writebyte_at(d
->hdr_tmp
, 5, '0'); // lh1 -> lh0
2059 // Recalculate header checksum
2060 md
->hdr_checksum_calc
= lha_calc_checksum(d
->hdr_tmp
, 2, lev0_header_size
,
2062 de_dbg(c
, "header checksum (calculated): 0x%02x", (UI
)md
->hdr_checksum_calc
);
2063 dbuf_writebyte_at(d
->hdr_tmp
, 1, (u8
)md
->hdr_checksum_calc
);
2064 dbuf_truncate(d
->hdr_tmp
, 2+lev0_header_size
);
2066 // Write everything out
2067 dbuf_copy(d
->hdr_tmp
, 0, d
->hdr_tmp
->len
, d
->lha_outf
);
2068 de_dbg(c
, "member data at %"I64_FMT
", len=%"I64_FMT
, md
->hdr_endpos
, md
->compressed_data_len
);
2069 dbuf_copy(c
->infile
, md
->hdr_endpos
, md
->compressed_data_len
, d
->lha_outf
);
2070 md
->total_size
= 2 + lev0_header_size
+ md
->compressed_data_len
;
2074 de_dbg_indent_restore(c
, saved_indent_level
);
2078 static void de_run_arx(deark
*c
, de_module_params
*mparams
)
2080 struct arx_ctx
*d
= NULL
;
2081 struct arx_member_data
*md
= NULL
;
2085 d
= de_malloc(c
, sizeof(struct arx_ctx
));
2087 if(!looks_like_arx_member(c
, 0)) {
2088 de_err(c
, "Not an ARX file");
2092 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_ARC
);
2093 d
->crco_cksum
= de_crcobj_create(c
, DE_CRCOBJ_SUM_BYTES
);
2094 d
->lha_outf
= dbuf_create_output_file(c
, "lha", NULL
, 0);
2095 d
->hdr_tmp
= dbuf_create_membuf(c
, 0, 0);
2097 md
= de_malloc(c
, sizeof(struct arx_member_data
));
2099 if(de_getbyte(pos
)==0) {
2100 de_dbg(c
, "trailer at %"I64_FMT
, pos
);
2101 dbuf_writebyte(d
->lha_outf
, 0);
2105 if(pos
+27 > c
->infile
->len
) goto done
;
2106 if(!looks_like_arx_member(c
, pos
)) goto done
;
2108 de_zeromem(md
, sizeof(struct arx_member_data
));
2109 md
->member_pos
= pos
;
2110 if(!do_arx_member(c
, d
, md
)) goto done
;
2111 pos
+= md
->total_size
;
2118 dbuf_close(d
->lha_outf
);
2120 de_err(c
, "Conversion to LHA format failed");
2123 dbuf_close(d
->hdr_tmp
);
2124 de_crcobj_destroy(d
->crco
);
2125 de_crcobj_destroy(d
->crco_cksum
);
2130 static int de_identify_arx(deark
*c
)
2132 if(dbuf_memcmp(c
->infile
, 2, "-lh1-", 5)) return 0;
2133 if(de_getbyte(20)!=0x20 || de_getbyte(21)!=0x00) return 0;
2134 if(de_input_file_has_ext(c
, "arx")) return 100;
2138 void de_module_arx(deark
*c
, struct deark_module_info
*mi
)
2141 mi
->desc
= "ARX LHA-like archive";
2142 mi
->run_fn
= de_run_arx
;
2143 mi
->identify_fn
= de_identify_arx
;
2147 /////////////////////// ar (Haruhiko Okumura) version "ar001"
2149 static void do_check_ar001_header_crc(deark
*c
, lctx
*d
, struct member_data
*md
, i64 basic_hdr_size
)
2151 //if(!md->have_hdr_crc_reported) return;
2152 de_crcobj_reset(d
->crco
);
2154 // Everything before the CRC field:
2155 de_crcobj_addslice(d
->crco
, c
->infile
, md
->member_pos
+2, basic_hdr_size
);
2157 md
->hdr_crc_calc
= de_crcobj_getval(d
->crco
);
2158 de_dbg(c
, "header crc (calculated): 0x%04x", (UI
)md
->hdr_crc_calc
);
2159 if(md
->hdr_crc_calc
!= md
->hdr_crc_reported
) {
2160 de_err(c
, "Wrong header CRC: reported=0x%04x, calculated=0x%04x",
2161 (UI
)md
->hdr_crc_reported
, (UI
)md
->hdr_crc_calc
);
2165 // Caller allocates and initializes md.
2166 // If the member was successfully parsed, sets md->total_size and returns nonzero.
2167 static int do_read_ar001_member(deark
*c
, lctx
*d
, struct member_data
*md
)
2169 int saved_indent_level
;
2171 i64 pos1
= md
->member_pos
;
2175 i64 first_ext_hdr_size
;
2177 u8 cmpr_meth_raw
[5];
2179 de_dbg_indent_save(c
, &saved_indent_level
);
2180 de_dbg(c
, "member at %"I64_FMT
, pos1
);
2181 de_dbg_indent(c
, 1);
2183 md
->hlev
= 0; // (hack)
2184 basic_hdr_size
= de_getu16le_p(&pos
);
2185 if(basic_hdr_size
==0) {
2186 de_dbg(c
, "end of archive");
2189 de_dbg(c
, "basic header size: %u", (UI
)basic_hdr_size
);
2190 if(basic_hdr_size
<18) goto done
;
2191 fnlen
= basic_hdr_size
-18;
2193 cmpr_method
= (UI
)de_getu16le_p(&pos
);
2195 de_zeromem(cmpr_meth_raw
, 5);
2196 switch(cmpr_method
) {
2197 case 0: de_memcpy(cmpr_meth_raw
, (const void*)"-lh0-", 5); break; // (hack)
2198 case 1: de_memcpy(cmpr_meth_raw
, (const void*)"-lh4-", 5); break; // ...
2200 md
->cmi
= de_malloc(c
, sizeof(struct cmpr_meth_info
));
2201 get_cmpr_meth_info(cmpr_meth_raw
, BASEFMT_LHA
, md
->cmi
);
2202 de_dbg(c
, "cmpr method: %u (%s)", cmpr_method
, md
->cmi
->descr
);
2204 pos
+= 1; // file type
2206 // timestamp: This is the time the file was added to the archive, not its
2207 // last-modified timestamp, so it's not very useful.
2210 md
->compressed_data_len
= de_getu32le_p(&pos
);
2211 de_dbg(c
, "compressed size: %"I64_FMT
, md
->compressed_data_len
);
2213 md
->orig_size
= de_getu32le_p(&pos
);
2214 de_dbg(c
, "original size: %"I64_FMT
, md
->orig_size
);
2216 md
->crc_reported
= (u32
)de_getu16le_p(&pos
);
2217 de_dbg(c
, "crc16 (reported): 0x%04x", (UI
)md
->crc_reported
);
2219 read_filename_hlev0(c
, d
, md
, pos
, fnlen
);
2221 make_fullfilename(c
, d
, md
);
2223 md
->hdr_crc_reported
= (UI
)de_getu16le_p(&pos
);
2224 de_dbg(c
, "basic header crc (reported): 0x%04x", (UI
)md
->hdr_crc_reported
);
2226 do_check_ar001_header_crc(c
, d
, md
, basic_hdr_size
);
2228 first_ext_hdr_size
= de_getu16le_p(&pos
);
2229 de_dbg(c
, "first ext header size: %"I64_FMT
, first_ext_hdr_size
);
2230 if(first_ext_hdr_size
) {
2231 // The ar001 software never uses this feature, so I'm not going to try to
2233 de_err(c
, "Files with extended headers aren't supported");
2237 md
->total_size
= pos
+ md
->compressed_data_len
- pos1
;
2240 md
->compressed_data_pos
= pos
;
2242 de_dbg(c
, "member data at %"I64_FMT
", len=%"I64_FMT
,
2243 md
->compressed_data_pos
, md
->compressed_data_len
);
2245 if(!md
->cmi
->decompressor
) {
2246 de_err(c
, "%s: Unsupported compression method: %u",
2247 ucstring_getpsz_d(md
->fullfilename
), cmpr_method
);
2251 de_dbg_indent(c
, 1);
2252 do_extract_file(c
, d
, md
);
2255 de_dbg_indent_restore(c
, saved_indent_level
);
2259 static void de_run_ar001(deark
*c
, de_module_params
*mparams
)
2263 struct member_data
*md
= NULL
;
2265 d
= lha_create_lctx(c
);
2266 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_ASCII
);
2268 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_IBMSDLC
);
2272 if(pos
>= c
->infile
->len
) break;
2274 md
= de_malloc(c
, sizeof(struct member_data
));
2275 md
->encoding
= d
->input_encoding
;
2276 md
->member_pos
= pos
;
2277 if(!do_read_ar001_member(c
, d
, md
)) goto done
;
2278 if(md
->total_size
<1) goto done
;
2281 pos
+= md
->total_size
;
2283 destroy_member_data(c
, md
);
2288 destroy_member_data(c
, md
);
2289 lha_destroy_lctx(c
, d
);
2292 static int slice_is_printable_ascii(dbuf
*f
, i64 pos
, i64 len
)
2296 for(i
=0; i
<len
; i
++) {
2299 b
= dbuf_getbyte(f
, pos
+i
);
2300 if(b
<32 || b
>126) return 0;
2305 static int de_identify_ar001(deark
*c
)
2310 u32 bhcrc_r
, bhcrc_c
;
2313 struct de_crcobj
*crco
= NULL
;
2315 bhlen
= de_getu16le(0); // basic header size
2316 if(bhlen
<(18+1) || bhlen
>(18+1024)) goto done
;
2317 if(c
->infile
->len
< bhlen
+8) goto done
;
2319 n
= de_getu16le(2); // cmpr method
2321 b
= de_getbyte(4); // file type
2322 if(b
!=0) goto done
; // 1 (text) is also defined, but not used by ar001
2323 n
= de_getu16le(2+bhlen
+2); // first ext hdr len
2325 if(!slice_is_printable_ascii(c
->infile
, 20, nlen
)) goto done
;
2326 bhcrc_r
= (u32
)de_getu16le(2+bhlen
);
2327 crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_IBMSDLC
);
2328 de_crcobj_addslice(crco
, c
->infile
, 2, bhlen
);
2329 bhcrc_c
= de_crcobj_getval(crco
);
2330 if(bhcrc_c
== bhcrc_r
) conf
= 91;
2333 if(crco
) de_crcobj_destroy(crco
);
2337 void de_module_ar001(deark
*c
, struct deark_module_info
*mi
)
2340 mi
->desc
= "ar001 archive (Okumura)";
2341 mi
->run_fn
= de_run_ar001
;
2342 mi
->identify_fn
= de_identify_ar001
;
2345 // **************************************************************************
2346 // LHarc & LArc SFX - COM format
2347 // **************************************************************************
2348 // Note that EXE SFX format is handled by the exe module.
2350 struct lhasfx_context
{
2353 int sfx_container_is_larc
;
2357 static int looks_like_lharc_sfx_com(deark
*c
, int *is_larc
)
2363 b
= de_getbyte_p(&pos
);
2364 if(b
!=0xeb) return 0;
2365 b
= de_getbyte_p(&pos
);
2367 // I don't know how good this test is, but I don't trust the text signature.
2368 // It's not formatted consistently. And because the source code was released,
2369 // who knows what weirdness is out there?
2370 if(b
==0x60 || b
==0x6c) {
2371 v
= dbuf_getu64be(c
->infile
, pos
);
2372 if(v
==0xfcbc0001bb0601e8ULL
) {
2377 v
= dbuf_getu64be(c
->infile
, pos
);
2378 if(v
==0xfc8cc8030602018eULL
) {
2387 // Probe for LHarc (v1.x) or LArc data
2388 static int is_lharc_data_at(deark
*c
, i64 pos
, i64
*pfoundpos
)
2392 if(pos
+21 > c
->infile
->len
) return 0;
2393 de_read(b
, pos
+2, sizeof(b
));
2394 if(b
[0]!='-' || b
[1]!='l' || b
[4]!='-') return 0;
2395 if(b
[2]!='h' && b
[2]!='z') return 0;
2400 static void lhasfx_find_payload(deark
*c
, struct lhasfx_context
*d
)
2402 if(d
->sfx_container_is_larc
) {
2403 if(is_lharc_data_at(c
, 594, &d
->payload_offs
)) {
2408 if(is_lharc_data_at(c
, 1260, &d
->payload_offs
) || // LHarc 1.12
2409 is_lharc_data_at(c
, 1263, &d
->payload_offs
) || // LHarc 1.13-1.14
2410 is_lharc_data_at(c
, 1290, &d
->payload_offs
)) // LHarc 1.00
2417 if(d
->payload_offs
==0) {
2423 static void de_run_lharc_sfx_com(deark
*c
, de_module_params
*mparams
)
2425 struct lhasfx_context
*d
= NULL
;
2428 d
= de_malloc(c
, sizeof(struct lhasfx_context
));
2430 ret
= looks_like_lharc_sfx_com(c
, &d
->sfx_container_is_larc
);
2437 de_declare_fmtf(c
, "%s self-extracting archive (COM)",
2438 (d
->sfx_container_is_larc
? "LArc":"LHarc"));
2440 lhasfx_find_payload(c
, d
);
2441 if(d
->errflag
) goto done
;
2443 de_dbg(c
, "payload found at: %"I64_FMT
, d
->payload_offs
);
2445 dbuf_create_file_from_slice(c
->infile
, d
->payload_offs
,
2446 c
->infile
->len
-d
->payload_offs
,
2447 (d
->sfx_container_is_larc
? "lzs" : "lha"), NULL
, 0);
2450 if(d
->errflag
&& d
->need_errmsg
) {
2451 de_err(c
, "Not a known LHarc/LArc SFX format");
2456 static int de_identify_lharc_sfx_com(deark
*c
)
2461 if(c
->infile
->len
>65280) return 0;
2462 ret
= looks_like_lharc_sfx_com(c
, &is_larc
);
2467 void de_module_lharc_sfx_com(deark
*c
, struct deark_module_info
*mi
)
2469 mi
->id
= "lharc_sfx_com";
2470 mi
->desc
= "LHarc/LArc self-extracting archive (COM)";
2471 mi
->run_fn
= de_run_lharc_sfx_com
;
2472 mi
->identify_fn
= de_identify_lharc_sfx_com
;