lzah: Improved some debug messages
[deark.git] / modules / crush.c
blob607676d6bba861a5969cf97cdf78715251b57d2e
1 // This file is part of Deark.
2 // Copyright (C) 2020 Jason Summers
3 // See the file COPYING for terms of use.
5 // CRUSH archive (PocketWare)
6 // Format is documented (CRUSH18.ZIP/MANUAL.DOC), though not in full detail.
8 #include <deark-config.h>
9 #include <deark-private.h>
10 DE_DECLARE_MODULE(de_module_crush);
12 #define CRUSH_HEADER_LEN 26
13 #define CRUSH_DIRENTRY_LEN 24
15 struct member_data {
16 i64 file_data_pos;
17 i64 file_size;
18 de_ucstring *fn;
19 de_ucstring *fullfn;
20 UI path_num; // 0 = no path, 1 = paths[0], 2 = paths[1], ...
21 UI attribs;
22 struct de_timestamp mod_time;
25 typedef struct localctx_struct {
26 de_encoding input_encoding;
27 UI ver_maj, ver_min;
28 int is_cri;
29 i64 num_paths;
30 i64 num_files;
31 i64 dir_segment_pos;
32 int paths_segment_len_known;
33 i64 paths_segment_pos;
34 i64 paths_segment_len;
35 de_ucstring **paths; // array[num_paths]
36 i64 file_data_curpos;
37 } lctx;
39 static void fixup_filename(de_ucstring *s)
41 i64 i;
43 for(i=0; i<s->len; i++) {
44 if(s->str[i]=='/') {
45 s->str[i] = '_';
49 if(ucstring_isempty(s)) {
50 ucstring_append_char(s, '_');
54 static void fixup_path(de_ucstring *s)
56 i64 i;
58 if(ucstring_isempty(s)) return;
59 for(i=0; i<s->len; i++) {
60 if(s->str[i]=='\\') {
61 s->str[i] = '/';
65 if(s->str[s->len-1]!='/') {
66 ucstring_append_char(s, '/');
70 static int do_read_paths(deark *c, lctx *d)
72 i64 pos;
73 i64 i;
74 int retval = 0;
75 int saved_indent_level;
77 de_dbg_indent_save(c, &saved_indent_level);
78 if(d->num_paths > 255) {
79 // Number of paths can't be >255 in the known versions of the format,
80 // because paths are indexed by a 1-byte int.
81 d->num_paths = 255;
84 if(d->num_paths <= 0) {
85 d->paths_segment_len = 0;
86 d->paths_segment_len_known = 1;
87 retval = 1;
88 goto done;
91 d->paths = de_mallocarray(c, d->num_paths, sizeof(de_ucstring*));
92 for(i=0; i<d->num_paths; i++) {
93 d->paths[i] = ucstring_create(c);
96 de_dbg(c, "paths at %"I64_FMT, d->paths_segment_pos);
97 de_dbg_indent(c, 1);
99 pos = d->paths_segment_pos;
100 for(i=0; i<d->num_paths; i++) {
101 int ret;
102 i64 foundpos = 0;
103 i64 path_len;
105 if(pos >= c->infile->len) goto done;
106 ret = dbuf_search_byte(c->infile, 0x00, pos, c->infile->len-pos, &foundpos);
107 if(!ret) goto done;
108 path_len = foundpos - pos;
109 if(path_len > 260) goto done;
110 dbuf_read_to_ucstring(c->infile, pos, path_len, d->paths[i], 0, d->input_encoding);
111 de_dbg(c, "path #%d: \"%s\"", (int)(i+1), ucstring_getpsz_d(d->paths[i]));
112 fixup_path(d->paths[i]);
113 pos = foundpos + 1;
115 d->paths_segment_len = pos - d->paths_segment_pos;
116 d->paths_segment_len_known = 1;
117 retval = 1;
118 done:
119 if(!retval) {
120 de_warn(c, "Could not read path table");
122 de_dbg_indent_restore(c, saved_indent_level);
123 return retval;
126 static void do_comment(deark *c, lctx *d)
128 i64 pos;
129 i64 len;
130 i64 avail_len;
131 i64 foundpos;
132 int ret;
133 de_ucstring *s = NULL;
135 if(!d->paths_segment_len_known) goto done;
136 pos = d->paths_segment_pos + d->paths_segment_len;
137 avail_len = c->infile->len - pos;
138 if(avail_len<=1) goto done;
140 // Find the terminating NUL
141 ret = dbuf_search_byte(c->infile, 0x00, pos, c->infile->len-pos, &foundpos);
142 if(!ret) goto done;
143 len = foundpos - pos;
144 if(len<1 || len>2048) goto done;
146 de_dbg(c, "comment at %"I64_FMT, pos);
147 s = ucstring_create(c);
148 dbuf_read_to_ucstring(c->infile, pos, len, s, 0,
149 DE_EXTENC_MAKE(d->input_encoding, DE_ENCSUBTYPE_HYBRID));
150 de_dbg_indent(c, 1);
151 de_dbg(c, "archive comment: \"%s\"", ucstring_getpsz_d(s));
152 de_dbg_indent(c, -1);
154 done:
155 ucstring_destroy(s);
158 static void do_extract_file(deark *c, lctx *d, struct member_data *md)
160 de_finfo *fi = NULL;
161 dbuf *outf = NULL;
163 fi = de_finfo_create(c);
164 de_finfo_set_name_from_ucstring(c, fi, md->fullfn, DE_SNFLAG_FULLPATH);
165 fi->original_filename_flag = 1;
166 if(md->mod_time.is_valid) {
167 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->mod_time;
170 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
171 dbuf_copy(c->infile, md->file_data_pos, md->file_size, outf);
173 dbuf_close(outf);
174 de_finfo_destroy(c, fi);
177 static void dbg_timestamp(deark *c, struct de_timestamp *ts, const char *field_name)
179 char timestamp_buf[64];
181 de_dbg_timestamp_to_string(c, ts, timestamp_buf, sizeof(timestamp_buf), 0);
182 de_dbg(c, "%s: %s", field_name, timestamp_buf);
185 static void make_fullfilename(deark *c, lctx *d, struct member_data *md)
187 md->fullfn = ucstring_create(c);
189 if(md->path_num>0 && md->path_num<=d->num_paths &&
190 d->paths && ucstring_isnonempty(d->paths[md->path_num-1]))
192 ucstring_append_ucstring(md->fullfn, d->paths[md->path_num-1]);
195 ucstring_append_ucstring(md->fullfn, md->fn);
198 // Uses and updates d->file_data_curpos
199 static int do_member_file(deark *c, lctx *d, i64 idx, i64 pos1)
201 int retval = 0;
202 i64 mod_time_raw, mod_date_raw;
203 struct member_data *md = NULL;
204 de_ucstring *descr = NULL;
205 int saved_indent_level;
207 de_dbg_indent_save(c, &saved_indent_level);
208 md = de_malloc(c, sizeof(struct member_data));
209 de_dbg(c, "member file[%d]", (int)idx);
210 de_dbg_indent(c, 1);
211 de_dbg(c, "dir entry at %"I64_FMT, pos1);
212 de_dbg_indent(c, 1);
214 md->path_num = (UI)de_getbyte(pos1);
215 de_dbg(c, "path num: %u", md->path_num);
217 md->attribs = (UI)de_getbyte(pos1+1);
218 descr = ucstring_create(c);
219 de_describe_dos_attribs(c, md->attribs, descr, 0);
220 de_dbg(c, "attribs: 0x%02x (%s)", md->attribs, ucstring_getpsz_d(descr));
222 mod_time_raw = de_getu16le(pos1+2);
223 mod_date_raw = de_getu16le(pos1+4);
224 de_dos_datetime_to_timestamp(&md->mod_time, mod_date_raw, mod_time_raw);
225 md->mod_time.tzcode = DE_TZCODE_LOCAL;
226 dbg_timestamp(c, &md->mod_time, "mod time");
228 md->file_size = de_getu32le(pos1+6);
229 de_dbg(c, "size: %"I64_FMT, md->file_size);
231 md->fn = ucstring_create(c);
232 dbuf_read_to_ucstring(c->infile, pos1+10, 12, md->fn, DE_CONVFLAG_STOP_AT_NUL,
233 d->input_encoding);
234 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->fn));
235 fixup_filename(md->fn);
236 make_fullfilename(c, d, md);
237 de_dbg_indent(c, -1);
239 // If this is just an index (.CRI) file, there's nothing to extract.
240 // We can't tell the difference between an index file, and an archive file
241 // containing only zero-length members, so extract zero-length files just
242 // in case.
243 if(d->is_cri && md->file_size!=0) {
244 retval = 1;
245 goto done;
248 de_dbg(c, "file data at %"I64_FMT, d->file_data_curpos);
250 if(d->file_data_curpos + md->file_size > d->dir_segment_pos) {
251 de_err(c, "Malformed CRU archive");
252 goto done;
255 md->file_data_pos = d->file_data_curpos;
256 d->file_data_curpos += md->file_size;
257 retval = 1;
259 if((md->attribs & 0x18) != 0x00) {
260 // I don't know if subdirs or volume labels can be in these archives.
261 de_warn(c, "%s: Not a regular file", ucstring_getpsz_d(md->fullfn));
264 do_extract_file(c, d, md);
266 done:
267 ucstring_destroy(descr);
268 if(md) {
269 ucstring_destroy(md->fn);
270 ucstring_destroy(md->fullfn);
271 de_free(c, md);
273 de_dbg_indent_restore(c, saved_indent_level);
274 return retval;
277 static void do_read_dir_and_extract_files(deark *c, lctx *d)
279 i64 i;
280 int saved_indent_level;
282 de_dbg_indent_save(c, &saved_indent_level);
284 if(d->num_files<1) {
285 goto done;
287 de_dbg(c, "directory at %"I64_FMT, d->dir_segment_pos);
288 de_dbg_indent(c, 1);
290 d->file_data_curpos = CRUSH_HEADER_LEN;
292 for(i=0; i<d->num_files; i++) {
293 if(!do_member_file(c, d, i, d->dir_segment_pos + CRUSH_DIRENTRY_LEN * i)) {
294 goto done;
298 done:
299 de_dbg_indent_restore(c, saved_indent_level);
302 static int do_archive_header(deark *c, lctx *d)
304 i64 pos1 = 0;
305 u8 b;
307 de_dbg(c, "archive header at %"I64_FMT, pos1);
308 de_dbg_indent(c, 1);
309 b = de_getbyte(pos1+7);
310 if(b>='0' && b<='9') d->ver_maj = (UI)(b-'0');
311 b = (UI)de_getbyte(pos1+9);
312 if(b>='0' && b<='9') d->ver_min = (UI)(b-'0');
313 de_dbg(c, "version: %u.%u", d->ver_maj, d->ver_min);
315 d->num_paths = de_getu16le(pos1+16);
316 de_dbg(c, "num paths %"I64_FMT, d->num_paths);
317 d->num_files = de_getu16le(pos1+18);
318 de_dbg(c, "num files %"I64_FMT, d->num_files);
319 d->dir_segment_pos = de_getu32le(pos1+22);
320 de_dbg(c, "directory pos: %"I64_FMT, d->dir_segment_pos);
322 de_dbg_indent(c, -1);
323 return 1;
326 static void de_run_crush(deark *c, de_module_params *mparams)
328 lctx *d = NULL;
330 d = de_malloc(c, sizeof(lctx));
331 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
333 if(!do_archive_header(c, d)) goto done;
334 d->is_cri = (d->dir_segment_pos == CRUSH_HEADER_LEN);
335 de_declare_fmtf(c, "CRUSH %s", d->is_cri ? "index" : "archive");
337 d->paths_segment_pos = d->dir_segment_pos + (CRUSH_DIRENTRY_LEN * d->num_files);
339 (void)do_read_paths(c, d);
340 do_comment(c, d);
341 do_read_dir_and_extract_files(c, d);
343 done:
344 if(d) {
345 if(d->paths) {
346 i64 i;
348 for(i=0; i<d->num_paths; i++) {
349 ucstring_destroy(d->paths[i]);
352 de_free(c, d);
356 static int de_identify_crush(deark *c)
358 u8 buf[14];
360 de_read(buf, 0, 14);
361 if(!de_memcmp(buf, "CRUSH v", 7) &&
362 buf[10]==0x0a && buf[11]==0x1a && buf[12]==0x00)
364 return 100;
367 return 0;
370 void de_module_crush(deark *c, struct deark_module_info *mi)
372 mi->id = "crush";
373 mi->desc = "CRUSH archive";
374 mi->run_fn = de_run_crush;
375 mi->identify_fn = de_identify_crush;