Refactoring the iff decoder
[deark.git] / modules / sis.c
blob43fa38c6e15ce25030903affc46d68c5aa2d1681
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // SIS - Symbian/EPOC installation archive
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_sis);
12 struct lang_info {
13 char sz[4];
16 struct file_fork_info {
17 i64 ptr;
18 i64 len;
19 i64 orig_len;
22 // A "file rec" is a kind of record, which may or may not actually represent
23 // a file.
24 struct file_rec {
25 i64 rec_pos; // points to the "File record type" field
26 i64 rec_len;
27 unsigned int rectype;
28 unsigned int file_type;
30 i64 num_forks;
31 struct file_fork_info *ffi; // has [num_forks] elements
32 de_ucstring *name_src;
33 de_ucstring *name_dest;
34 de_ucstring *name_to_use;
37 typedef struct localctx_struct {
38 i64 installer_ver;
39 unsigned int options;
40 u8 is_rel6;
41 u8 files_are_compressed;
42 i64 nlangs;
43 i64 nfiles;
44 i64 nrequisites;
45 i64 languages_ptr;
46 i64 files_ptr;
47 i64 requisites_ptr;
48 i64 certificates_ptr;
49 i64 component_name_ptr;
50 i64 signature_ptr;
51 i64 capabilities_ptr;
52 struct lang_info *langi;
53 } lctx;
55 static int do_file_header(deark *c, lctx *d, i64 pos1)
57 i64 pos = pos1;
58 i64 k;
59 i64 n, n2;
60 int retval = 0;
61 u32 crc_even;
62 u32 crc_odd;
63 de_ucstring *options_descr = NULL;
64 struct de_crcobj *crco = NULL;
65 u8 tmpbuf[12];
67 de_dbg(c, "file header at %d", (int)pos);
69 de_dbg_indent(c, 1);
71 // Pre-read the first 12 bytes, to calculate some CRCs for later.
72 de_read(tmpbuf, pos, 12);
74 crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_CCITT);
75 for(k=0; k<12; k+=2) {
76 de_crcobj_addbuf(crco, &tmpbuf[k], 1);
78 crc_even = de_crcobj_getval(crco);
79 de_crcobj_reset(crco);
80 for(k=1; k<12; k+=2) {
81 de_crcobj_addbuf(crco, &tmpbuf[k], 1);
83 crc_odd = de_crcobj_getval(crco);
84 de_crcobj_destroy(crco);
85 crco = NULL;
87 n = de_getu32le_p(&pos);
88 de_dbg(c, "UID 1: 0x%08x", (unsigned int)n);
90 n = de_getu32le_p(&pos);
91 de_dbg(c, "UID 2: 0x%08x", (unsigned int)n);
92 if(n==0x10003a12) {
93 d->is_rel6 = 1;
96 n = de_getu32le_p(&pos);
97 de_dbg(c, "UID 3: 0x%08x", (unsigned int)n);
99 n = de_getu32le_p(&pos);
100 de_dbg(c, "UID 4: 0x%08x", (unsigned int)n);
101 // The way UID 4 is calculated is really silly.
102 de_dbg(c, "expected value of UID 4: 0x%04x%04x",
103 (unsigned int)crc_odd, (unsigned int)crc_even);
105 if(d->is_rel6) {
106 de_declare_fmt(c, "SIS, EPOC r6");
108 else {
109 de_declare_fmt(c, "SIS, EPOC r3/4/5");
112 pos += 2; // checksum
114 d->nlangs = de_getu16le_p(&pos);
115 de_dbg(c, "num languages: %d", (int)d->nlangs);
117 d->nfiles = de_getu16le_p(&pos);
118 de_dbg(c, "num files: %d", (int)d->nfiles);
120 d->nrequisites = de_getu16le_p(&pos);
121 de_dbg(c, "num requisites: %d", (int)d->nrequisites);
123 pos += 2; // installation language
124 pos += 2; // installation files
125 pos += 2; // installation drive
127 n = de_getu16le_p(&pos);
128 de_dbg(c, "num capabilities: %d", (int)n);
130 d->installer_ver = de_getu32le_p(&pos);
131 de_dbg(c, "installer ver: %d", (int)d->installer_ver);
132 if(d->installer_ver<68) {
133 de_warn(c, "Unknown version: %d", (int)d->installer_ver);
136 d->options = (unsigned int)de_getu16le_p(&pos);
137 options_descr = ucstring_create(c);
138 if(d->options&0x01) ucstring_append_flags_item(options_descr, "IsUnicode");
139 if(d->options&0x02) ucstring_append_flags_item(options_descr, "IsDistributable");
140 if(d->options&0x08) ucstring_append_flags_item(options_descr, "NoCompress");
141 if(d->options&0x10) ucstring_append_flags_item(options_descr, "ShutdownApps");
142 de_dbg(c, "options: 0x%04x (%s)", d->options, ucstring_getpsz(options_descr));
143 if(d->is_rel6 && !(d->options&0x0008)) {
144 d->files_are_compressed = 1;
147 pos += 2; // type (TODO)
148 n = de_getu16le_p(&pos);
149 n2 = de_getu16le_p(&pos);
150 de_dbg(c, "app version: %d,%d", (int)n, (int)n2);
151 pos += 4; // variant
153 d->languages_ptr = de_getu32le_p(&pos);
154 de_dbg(c, "languages ptr: %"I64_FMT, d->languages_ptr);
155 d->files_ptr = de_getu32le_p(&pos);
156 de_dbg(c, "files ptr: %"I64_FMT, d->files_ptr);
158 d->requisites_ptr = de_getu32le_p(&pos);
159 de_dbg(c, "requisites ptr: %"I64_FMT, d->requisites_ptr);
160 d->certificates_ptr = de_getu32le_p(&pos);
161 de_dbg(c, "certificates ptr: %"I64_FMT, d->certificates_ptr);
162 d->component_name_ptr = de_getu32le_p(&pos);
163 de_dbg(c, "component name ptr: %"I64_FMT, d->component_name_ptr);
165 if(d->is_rel6) {
166 n = de_getu32le_p(&pos);
167 de_dbg(c, "signature ptr: %"I64_FMT, n);
168 n = de_getu32le_p(&pos);
169 de_dbg(c, "capabilities ptr: %"I64_FMT, n);
172 retval = 1;
173 de_dbg_indent(c, -1);
174 ucstring_destroy(options_descr);
175 return retval;
178 static const char *get_file_type_name(unsigned int t)
180 const char *s = NULL;
181 switch(t) {
182 case 0: s="standard file"; break;
183 case 1: s="text file displayed during install"; break;
184 case 2: s="SIS component file"; break;
185 case 3: s="file run during install"; break;
186 case 4: s="file to be created during install"; break;
187 case 5: s="open file"; break;
189 return s?s:"?";
192 static void do_extract_file(deark *c, lctx *d, struct file_rec *fr,
193 i64 fork_num)
195 de_finfo *fi = NULL;
196 de_ucstring *fn = NULL;
197 dbuf *outf = NULL;
199 if(fr->ffi[fork_num].ptr<0 ||
200 fr->ffi[fork_num].ptr + fr->ffi[fork_num].len > c->infile->len)
202 goto done;
205 fi = de_finfo_create(c);
207 fn = ucstring_create(c);
209 if(fr->rectype==0x1 && fork_num<d->nlangs && d->langi &&
210 d->langi[fork_num].sz[0])
212 // Prepend a code for the language
213 ucstring_append_sz(fn, d->langi[fork_num].sz, DE_ENCODING_LATIN1);
214 ucstring_append_sz(fn, ".", DE_ENCODING_LATIN1);
216 ucstring_append_ucstring(fn, fr->name_to_use);
217 de_finfo_set_name_from_ucstring(c, fi, fn, 0);
219 outf = dbuf_create_output_file(c, NULL, fi, 0);
220 if(d->files_are_compressed) {
221 if(!fmtutil_decompress_deflate(c->infile, fr->ffi[fork_num].ptr, fr->ffi[fork_num].len,
222 outf, fr->ffi[fork_num].orig_len, NULL,
223 DE_DEFLATEFLAG_ISZLIB|DE_DEFLATEFLAG_USEMAXUNCMPRSIZE))
225 goto done;
227 if(outf->len != fr->ffi[fork_num].orig_len) {
228 de_warn(c, "expected %"I64_FMT" bytes, got %"I64_FMT,
229 fr->ffi[fork_num].orig_len, outf->len);
232 else {
233 dbuf_copy(c->infile, fr->ffi[fork_num].ptr, fr->ffi[fork_num].len, outf);
236 done:
237 dbuf_close(outf);
238 de_finfo_destroy(c, fi);
239 ucstring_destroy(fn);
242 static void read_sis_string(deark *c, lctx *d, de_ucstring *s,
243 i64 pos, i64 len)
245 if(d->options & 0x0001) {
246 dbuf_read_to_ucstring_n(c->infile, pos, len, 512*2, s, 0, DE_ENCODING_UTF16LE);
248 else {
249 dbuf_read_to_ucstring_n(c->infile, pos, len, 512, s, 0, DE_ENCODING_WINDOWS1252);
253 // Append a substring of s2 to s1
254 static void ucstring_append_substring(de_ucstring *s1, const de_ucstring *s2,
255 i64 pos, i64 len)
257 i64 i;
259 if(!s2) return;
260 if(pos<0) return;
261 for(i=0; i<len; i++) {
262 if(pos+i >= s2->len) break;
263 ucstring_append_char(s1, s2->str[pos+i]);
267 // Sets fr->name_to_use
268 static void make_output_filename(deark *c, lctx *d, struct file_rec *fr)
270 i64 k;
271 i64 pathlen = 0;
272 i64 basenamelen;
273 de_ucstring *s;
275 if(fr->name_to_use) return;
276 if(!fr->name_dest || !fr->name_src) return;
277 fr->name_to_use = ucstring_create(c);
279 // s will point to either fr->name_dest or fr->name_src, whichever
280 // one looks better.
281 if(fr->name_src->len>0) {
282 s = fr->name_src;
284 else {
285 s = fr->name_dest;
288 if((fr->file_type==0 || fr->file_type==3) && fr->name_dest->len>0) {
289 s = fr->name_dest;
292 for(k=s->len-1; k>=0; k--) {
293 if(s->str[k]=='\\' ||
294 s->str[k]=='/')
296 pathlen = k+1;
297 break;
300 basenamelen = s->len - pathlen;
302 if(basenamelen>1) {
303 ucstring_append_substring(fr->name_to_use, s, pathlen, basenamelen);
305 else {
306 ucstring_append_ucstring(fr->name_to_use, s);
310 // Returns 0 if fr->rec_len was not set
311 static int do_file_record_file(deark *c, lctx *d, struct file_rec *fr)
313 i64 pos = fr->rec_pos;
314 i64 k;
315 i64 nlen, nptr;
316 int should_extract;
318 pos += 4; // File record type, already read
319 fr->file_type = (unsigned int)de_getu32le_p(&pos);
320 de_dbg(c, "file type: %u (%s)", fr->file_type, get_file_type_name(fr->file_type));
322 pos += 4; // file details
324 nlen = de_getu32le_p(&pos);
325 nptr = de_getu32le_p(&pos);
326 fr->name_src = ucstring_create(c);
327 read_sis_string(c, d, fr->name_src, nptr, nlen);
328 de_dbg(c, "src name: \"%s\"", ucstring_getpsz_d(fr->name_src));
330 nlen = de_getu32le_p(&pos);
331 nptr = de_getu32le_p(&pos);
332 fr->name_dest = ucstring_create(c);
333 read_sis_string(c, d, fr->name_dest, nptr, nlen);
334 de_dbg(c, "dest name: \"%s\"", ucstring_getpsz_d(fr->name_dest));
336 make_output_filename(c, d, fr);
338 if(fr->rectype==0x1) fr->num_forks = d->nlangs;
339 else fr->num_forks = 1;
341 fr->ffi = de_mallocarray(c, fr->num_forks, sizeof(struct file_fork_info));
343 for(k=0; k<fr->num_forks; k++) {
344 fr->ffi[k].len = de_getu32le_p(&pos);
345 de_dbg(c, "len[%d]: %"I64_FMT, (int)k, fr->ffi[k].len);
347 for(k=0; k<fr->num_forks; k++) {
348 fr->ffi[k].ptr = de_getu32le_p(&pos);
349 de_dbg(c, "ptr[%d]: %"I64_FMT, (int)k, fr->ffi[k].ptr);
352 if(d->is_rel6) {
353 for(k=0; k<fr->num_forks; k++) {
354 fr->ffi[k].orig_len = de_getu32le_p(&pos);
355 de_dbg(c, "orig_len[%d]: %"I64_FMT, (int)k, fr->ffi[k].orig_len);
357 pos += 4; // MIME type len
358 pos += 4; // MIME type ptr
361 should_extract = 0;
362 if(fr->file_type==0 || fr->file_type==1 || fr->file_type==2 ||
363 fr->file_type==3 || fr->file_type==5)
365 should_extract = 1;
368 if(should_extract) {
369 for(k=0; k<fr->num_forks; k++) {
370 do_extract_file(c, d, fr, k);
374 fr->rec_len = pos - fr->rec_pos;
375 de_dbg2(c, "record len: %d", (int)fr->rec_len);
376 return 1;
379 static const char *get_file_rec_type_name(unsigned int t)
381 const char *s = NULL;
382 switch(t) {
383 case 0: s="simple file"; break;
384 case 1: s="multi-language file set"; break;
385 case 2: s="options"; break;
386 case 3: s="*if*"; break;
387 case 4: s="*elseif*"; break;
388 case 5: s="*else*"; break;
389 case 6: s="*endif*"; break;
391 return s?s:"?";
394 static int do_file_record(deark *c, lctx *d, i64 idx,
395 i64 pos1, i64 *bytes_consumed)
397 i64 pos = pos1;
398 int retval = 0;
399 struct file_rec *fr = NULL;
400 int saved_indent_level;
402 de_dbg_indent_save(c, &saved_indent_level);
403 fr = de_malloc(c, sizeof(struct file_rec));
404 fr->rec_pos = pos1;
405 de_dbg(c, "file record[%d] at %"I64_FMT, (int)idx, fr->rec_pos);
406 de_dbg_indent(c, 1);
408 fr->rectype = (unsigned int)de_getu32le_p(&pos);
409 de_dbg(c, "record type: 0x%08x (%s)", fr->rectype, get_file_rec_type_name(fr->rectype));
411 if(fr->rectype==0x0 || fr->rectype==0x1) {
412 if(!do_file_record_file(c, d, fr)) goto done;
414 else if(fr->rectype==0x3 || fr->rectype==0x4) { // *if*, *elseif*
415 i64 n;
416 n = de_getu32le_p(&pos);
417 de_dbg(c, "size of conditional expression: %d", (int)n);
418 pos += n;
419 fr->rec_len = pos - pos1;
421 else if(fr->rectype==0x5 || fr->rectype==0x6) { // *else*, *endif*
422 fr->rec_len = 4;
424 else {
425 de_err(c, "Unsupported record type (0x%08x), can't continue", fr->rectype);
426 goto done;
429 *bytes_consumed = fr->rec_len;
430 retval = 1;
431 done:
432 if(fr) {
433 de_free(c, fr->ffi);
434 ucstring_destroy(fr->name_src);
435 ucstring_destroy(fr->name_dest);
436 ucstring_destroy(fr->name_to_use);
437 de_free(c, fr);
439 de_dbg_indent_restore(c, saved_indent_level);
440 return retval;
443 static void do_file_records(deark *c, lctx *d)
445 i64 k;
446 i64 pos1 = d->files_ptr;
447 i64 pos = pos1;
449 de_dbg(c, "file records at %"I64_FMT, pos1);
450 de_dbg_indent(c, 1);
451 for(k=0; k<d->nfiles; k++) {
452 i64 bytes_consumed = 0;
454 if(pos >= c->infile->len) break;
455 if(!do_file_record(c, d, k, pos, &bytes_consumed)) break;
456 pos += bytes_consumed;
458 de_dbg_indent(c, -1);
461 static void lookup_lang_namecode(unsigned int lc, char *nc, size_t nc_len)
463 static const char codes[99*2+1] =
464 "XXENFRGESPITSWDANOFIAMSFSGPOTUICRUHUDUBLAUBGASNZIFCSSKPLSLTCHKZH"
465 "JATHAFSQAHARHYTLBEBNBGMYCAHRCEIESFETFACFGDKAELCGGUHEHIINGASZKNKK"
466 "KMKOLOLVLTMKMSMLMRMOMNNNBPPAROSRSISOOSLSSHFSXXTATEBOTICTTKUKURXX"
467 "VICYZU";
469 if(lc>=99) lc=0;
470 nc[0] = codes[2*lc];
471 nc[1] = codes[2*lc+1];
472 nc[2] = '\0';
475 static void do_language_records(deark *c, lctx *d)
477 i64 k;
478 i64 pos1 = d->languages_ptr;
479 i64 pos = pos1;
481 if(d->nlangs<1) return;
482 de_dbg(c, "language records at %"I64_FMT, pos1);
483 d->langi = de_mallocarray(c, d->nlangs, sizeof(struct lang_info));
484 de_dbg_indent(c, 1);
485 for(k=0; k<d->nlangs; k++) {
486 unsigned int lc;
487 lc = (unsigned int)de_getu16le_p(&pos);
488 lookup_lang_namecode(lc, d->langi[k].sz, sizeof(d->langi[k].sz));
489 de_dbg(c, "lang[%d] = %u (%s)", (int)k, lc, d->langi[k].sz);
491 de_dbg_indent(c, -1);
494 static void do_component_name_record(deark *c, lctx *d)
496 i64 pos1 = d->component_name_ptr;
497 de_ucstring *s = NULL;
498 i64 k;
500 if(pos1<1 || pos1>=c->infile->len) return;
501 if(d->nlangs<1) return;
503 de_dbg(c, "component name record at %"I64_FMT, pos1);
504 de_dbg_indent(c, 1);
505 s = ucstring_create(c);
506 for(k=0; k<d->nlangs; k++) {
507 i64 npos, nlen;
508 nlen = de_getu32le(pos1+4*k);
509 npos = de_getu32le(pos1+4*d->nlangs+4*k);
510 ucstring_empty(s);
511 read_sis_string(c, d, s, npos, nlen);
512 de_dbg(c, "name[%d]: \"%s\"", (int)k, ucstring_getpsz_d(s));
514 de_dbg_indent(c, -1);
515 ucstring_destroy(s);
518 static void do_requisite_records(deark *c, lctx *d)
520 i64 pos1 = d->requisites_ptr;
521 i64 pos = pos1;
522 i64 k, i;
523 de_ucstring *s = NULL;
525 if(d->nrequisites<1) return;
526 if(pos1<1 || pos1>=c->infile->len) return;
527 de_dbg(c, "requisite records at %"I64_FMT, pos1);
528 s = ucstring_create(c);
529 de_dbg_indent(c, 1);
530 for(k=0; k<d->nrequisites; k++) {
531 i64 n, n2;
533 de_dbg(c, "requisite record[%d] at %"I64_FMT, (int)k, pos);
534 de_dbg_indent(c, 1);
535 n = de_getu32le_p(&pos);
536 de_dbg(c, "UID: 0x%08x", (unsigned int)n);
537 n = de_getu16le_p(&pos);
538 n2 = de_getu16le_p(&pos);
539 de_dbg(c, "version required: %d,%d", (int)n, (int)n2);
540 n = de_getu32le_p(&pos);
541 de_dbg(c, "variant: 0x%08x", (unsigned int)n);
543 for(i=0; i<d->nlangs; i++) {
544 i64 npos, nlen;
545 nlen = de_getu32le(pos+4*i);
546 npos = de_getu32le(pos+4*d->nlangs+4*i);
547 ucstring_empty(s);
548 read_sis_string(c, d, s, npos, nlen);
549 de_dbg(c, "name[%d]: \"%s\"", (int)i, ucstring_getpsz_d(s));
551 pos += 4*d->nlangs; // name lengths
552 pos += 4*d->nlangs; // name pointers
554 de_dbg_indent(c, -1);
556 de_dbg_indent(c, -1);
557 ucstring_destroy(s);
559 static void do_certificate_records(deark *c, lctx *d)
561 i64 pos1 = d->certificates_ptr;
562 i64 pos = pos1;
563 i64 k;
564 i64 ncerts;
565 int z[6];
567 if(pos1<1 || pos1>=c->infile->len) return;
568 de_dbg(c, "certificate records at %"I64_FMT, pos1);
569 de_dbg_indent(c, 1);
570 for(k=0; k<6; k++) {
571 z[k] = (int)de_getu16le_p(&pos);
574 // The documentation I have does not explain how the month is encoded.
575 // I.e., is January month #0, or month #1?
576 // I found a file that has the month set to 0, so I assume that must be
577 // January.
578 de_dbg(c, "timestamp: %04d-%02d-%02d %02d:%02d:%02d",
579 z[0], z[1]+1, z[2],
580 z[3], z[4], z[5]);
581 ncerts = de_getu32le_p(&pos);
582 de_dbg(c, "number of certs: %d", (int)ncerts);
583 de_dbg_indent(c, -1);
586 static void de_run_sis(deark *c, de_module_params *mparams)
588 lctx *d = NULL;
589 i64 pos;
591 d = de_malloc(c, sizeof(lctx));
593 pos = 0;
594 if(!do_file_header(c, d, pos)) goto done;
596 do_language_records(c, d);
597 do_component_name_record(c, d);
598 do_requisite_records(c, d);
599 do_file_records(c, d);
600 do_certificate_records(c, d);
602 done:
603 if(d) {
604 de_free(c, d->langi);
606 de_free(c, d);
609 static int de_identify_sis(deark *c)
611 if(!dbuf_memcmp(c->infile, 8, "\x19\x04\x00\x10", 4)) {
612 if(!dbuf_memcmp(c->infile, 4, "\x6d\x00\x00\x10", 4))
613 return 100;
614 if(!dbuf_memcmp(c->infile, 4, "\x12\x3a\x00\x10", 4))
615 return 100;
617 return 0;
620 void de_module_sis(deark *c, struct deark_module_info *mi)
622 mi->id = "sis";
623 mi->desc = "SIS (EPOC/Symbian installation archive)";
624 mi->run_fn = de_run_sis;
625 mi->identify_fn = de_identify_sis;