1 // This file is part of Deark.
2 // Copyright (C) 2020 Jason Summers
3 // See the file COPYING for terms of use.
5 // CRUSH archive (PocketWare)
6 // Format is documented (CRUSH18.ZIP/MANUAL.DOC), though not in full detail.
8 #include <deark-config.h>
9 #include <deark-private.h>
10 DE_DECLARE_MODULE(de_module_crush
);
12 #define CRUSH_HEADER_LEN 26
13 #define CRUSH_DIRENTRY_LEN 24
20 UI path_num
; // 0 = no path, 1 = paths[0], 2 = paths[1], ...
22 struct de_timestamp mod_time
;
25 typedef struct localctx_struct
{
26 de_encoding input_encoding
;
32 int paths_segment_len_known
;
33 i64 paths_segment_pos
;
34 i64 paths_segment_len
;
35 de_ucstring
**paths
; // array[num_paths]
39 static void fixup_filename(de_ucstring
*s
)
43 for(i
=0; i
<s
->len
; i
++) {
49 if(ucstring_isempty(s
)) {
50 ucstring_append_char(s
, '_');
54 static void fixup_path(de_ucstring
*s
)
58 if(ucstring_isempty(s
)) return;
59 for(i
=0; i
<s
->len
; i
++) {
65 if(s
->str
[s
->len
-1]!='/') {
66 ucstring_append_char(s
, '/');
70 static int do_read_paths(deark
*c
, lctx
*d
)
75 int saved_indent_level
;
77 de_dbg_indent_save(c
, &saved_indent_level
);
78 if(d
->num_paths
> 255) {
79 // Number of paths can't be >255 in the known versions of the format,
80 // because paths are indexed by a 1-byte int.
84 if(d
->num_paths
<= 0) {
85 d
->paths_segment_len
= 0;
86 d
->paths_segment_len_known
= 1;
91 d
->paths
= de_mallocarray(c
, d
->num_paths
, sizeof(de_ucstring
*));
92 for(i
=0; i
<d
->num_paths
; i
++) {
93 d
->paths
[i
] = ucstring_create(c
);
96 de_dbg(c
, "paths at %"I64_FMT
, d
->paths_segment_pos
);
99 pos
= d
->paths_segment_pos
;
100 for(i
=0; i
<d
->num_paths
; i
++) {
105 if(pos
>= c
->infile
->len
) goto done
;
106 ret
= dbuf_search_byte(c
->infile
, 0x00, pos
, c
->infile
->len
-pos
, &foundpos
);
108 path_len
= foundpos
- pos
;
109 if(path_len
> 260) goto done
;
110 dbuf_read_to_ucstring(c
->infile
, pos
, path_len
, d
->paths
[i
], 0, d
->input_encoding
);
111 de_dbg(c
, "path #%d: \"%s\"", (int)(i
+1), ucstring_getpsz_d(d
->paths
[i
]));
112 fixup_path(d
->paths
[i
]);
115 d
->paths_segment_len
= pos
- d
->paths_segment_pos
;
116 d
->paths_segment_len_known
= 1;
120 de_warn(c
, "Could not read path table");
122 de_dbg_indent_restore(c
, saved_indent_level
);
126 static void do_comment(deark
*c
, lctx
*d
)
133 de_ucstring
*s
= NULL
;
135 if(!d
->paths_segment_len_known
) goto done
;
136 pos
= d
->paths_segment_pos
+ d
->paths_segment_len
;
137 avail_len
= c
->infile
->len
- pos
;
138 if(avail_len
<=1) goto done
;
140 // Find the terminating NUL
141 ret
= dbuf_search_byte(c
->infile
, 0x00, pos
, c
->infile
->len
-pos
, &foundpos
);
143 len
= foundpos
- pos
;
144 if(len
<1 || len
>2048) goto done
;
146 de_dbg(c
, "comment at %"I64_FMT
, pos
);
147 s
= ucstring_create(c
);
148 dbuf_read_to_ucstring(c
->infile
, pos
, len
, s
, 0,
149 DE_EXTENC_MAKE(d
->input_encoding
, DE_ENCSUBTYPE_HYBRID
));
151 de_dbg(c
, "archive comment: \"%s\"", ucstring_getpsz_d(s
));
152 de_dbg_indent(c
, -1);
158 static void do_extract_file(deark
*c
, lctx
*d
, struct member_data
*md
)
163 fi
= de_finfo_create(c
);
164 de_finfo_set_name_from_ucstring(c
, fi
, md
->fullfn
, DE_SNFLAG_FULLPATH
);
165 fi
->original_filename_flag
= 1;
166 if(md
->mod_time
.is_valid
) {
167 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = md
->mod_time
;
170 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0x0);
171 dbuf_copy(c
->infile
, md
->file_data_pos
, md
->file_size
, outf
);
174 de_finfo_destroy(c
, fi
);
177 static void dbg_timestamp(deark
*c
, struct de_timestamp
*ts
, const char *field_name
)
179 char timestamp_buf
[64];
181 de_dbg_timestamp_to_string(c
, ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
182 de_dbg(c
, "%s: %s", field_name
, timestamp_buf
);
185 static void make_fullfilename(deark
*c
, lctx
*d
, struct member_data
*md
)
187 md
->fullfn
= ucstring_create(c
);
189 if(md
->path_num
>0 && md
->path_num
<=d
->num_paths
&&
190 d
->paths
&& ucstring_isnonempty(d
->paths
[md
->path_num
-1]))
192 ucstring_append_ucstring(md
->fullfn
, d
->paths
[md
->path_num
-1]);
195 ucstring_append_ucstring(md
->fullfn
, md
->fn
);
198 // Uses and updates d->file_data_curpos
199 static int do_member_file(deark
*c
, lctx
*d
, i64 idx
, i64 pos1
)
202 i64 mod_time_raw
, mod_date_raw
;
203 struct member_data
*md
= NULL
;
204 de_ucstring
*descr
= NULL
;
205 int saved_indent_level
;
207 de_dbg_indent_save(c
, &saved_indent_level
);
208 md
= de_malloc(c
, sizeof(struct member_data
));
209 de_dbg(c
, "member file[%d]", (int)idx
);
211 de_dbg(c
, "dir entry at %"I64_FMT
, pos1
);
214 md
->path_num
= (UI
)de_getbyte(pos1
);
215 de_dbg(c
, "path num: %u", md
->path_num
);
217 md
->attribs
= (UI
)de_getbyte(pos1
+1);
218 descr
= ucstring_create(c
);
219 de_describe_dos_attribs(c
, md
->attribs
, descr
, 0);
220 de_dbg(c
, "attribs: 0x%02x (%s)", md
->attribs
, ucstring_getpsz_d(descr
));
222 mod_time_raw
= de_getu16le(pos1
+2);
223 mod_date_raw
= de_getu16le(pos1
+4);
224 de_dos_datetime_to_timestamp(&md
->mod_time
, mod_date_raw
, mod_time_raw
);
225 md
->mod_time
.tzcode
= DE_TZCODE_LOCAL
;
226 dbg_timestamp(c
, &md
->mod_time
, "mod time");
228 md
->file_size
= de_getu32le(pos1
+6);
229 de_dbg(c
, "size: %"I64_FMT
, md
->file_size
);
231 md
->fn
= ucstring_create(c
);
232 dbuf_read_to_ucstring(c
->infile
, pos1
+10, 12, md
->fn
, DE_CONVFLAG_STOP_AT_NUL
,
234 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(md
->fn
));
235 fixup_filename(md
->fn
);
236 make_fullfilename(c
, d
, md
);
237 de_dbg_indent(c
, -1);
239 // If this is just an index (.CRI) file, there's nothing to extract.
240 // We can't tell the difference between an index file, and an archive file
241 // containing only zero-length members, so extract zero-length files just
243 if(d
->is_cri
&& md
->file_size
!=0) {
248 de_dbg(c
, "file data at %"I64_FMT
, d
->file_data_curpos
);
250 if(d
->file_data_curpos
+ md
->file_size
> d
->dir_segment_pos
) {
251 de_err(c
, "Malformed CRU archive");
255 md
->file_data_pos
= d
->file_data_curpos
;
256 d
->file_data_curpos
+= md
->file_size
;
259 if((md
->attribs
& 0x18) != 0x00) {
260 // I don't know if subdirs or volume labels can be in these archives.
261 de_warn(c
, "%s: Not a regular file", ucstring_getpsz_d(md
->fullfn
));
264 do_extract_file(c
, d
, md
);
267 ucstring_destroy(descr
);
269 ucstring_destroy(md
->fn
);
270 ucstring_destroy(md
->fullfn
);
273 de_dbg_indent_restore(c
, saved_indent_level
);
277 static void do_read_dir_and_extract_files(deark
*c
, lctx
*d
)
280 int saved_indent_level
;
282 de_dbg_indent_save(c
, &saved_indent_level
);
287 de_dbg(c
, "directory at %"I64_FMT
, d
->dir_segment_pos
);
290 d
->file_data_curpos
= CRUSH_HEADER_LEN
;
292 for(i
=0; i
<d
->num_files
; i
++) {
293 if(!do_member_file(c
, d
, i
, d
->dir_segment_pos
+ CRUSH_DIRENTRY_LEN
* i
)) {
299 de_dbg_indent_restore(c
, saved_indent_level
);
302 static int do_archive_header(deark
*c
, lctx
*d
)
307 de_dbg(c
, "archive header at %"I64_FMT
, pos1
);
309 b
= de_getbyte(pos1
+7);
310 if(b
>='0' && b
<='9') d
->ver_maj
= (UI
)(b
-'0');
311 b
= (UI
)de_getbyte(pos1
+9);
312 if(b
>='0' && b
<='9') d
->ver_min
= (UI
)(b
-'0');
313 de_dbg(c
, "version: %u.%u", d
->ver_maj
, d
->ver_min
);
315 d
->num_paths
= de_getu16le(pos1
+16);
316 de_dbg(c
, "num paths %"I64_FMT
, d
->num_paths
);
317 d
->num_files
= de_getu16le(pos1
+18);
318 de_dbg(c
, "num files %"I64_FMT
, d
->num_files
);
319 d
->dir_segment_pos
= de_getu32le(pos1
+22);
320 de_dbg(c
, "directory pos: %"I64_FMT
, d
->dir_segment_pos
);
322 de_dbg_indent(c
, -1);
326 static void de_run_crush(deark
*c
, de_module_params
*mparams
)
330 d
= de_malloc(c
, sizeof(lctx
));
331 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_CP437
);
333 if(!do_archive_header(c
, d
)) goto done
;
334 d
->is_cri
= (d
->dir_segment_pos
== CRUSH_HEADER_LEN
);
335 de_declare_fmtf(c
, "CRUSH %s", d
->is_cri
? "index" : "archive");
337 d
->paths_segment_pos
= d
->dir_segment_pos
+ (CRUSH_DIRENTRY_LEN
* d
->num_files
);
339 (void)do_read_paths(c
, d
);
341 do_read_dir_and_extract_files(c
, d
);
348 for(i
=0; i
<d
->num_paths
; i
++) {
349 ucstring_destroy(d
->paths
[i
]);
356 static int de_identify_crush(deark
*c
)
361 if(!de_memcmp(buf
, "CRUSH v", 7) &&
362 buf
[10]==0x0a && buf
[11]==0x1a && buf
[12]==0x00)
370 void de_module_crush(deark
*c
, struct deark_module_info
*mi
)
373 mi
->desc
= "CRUSH archive";
374 mi
->run_fn
= de_run_crush
;
375 mi
->identify_fn
= de_identify_crush
;