zip: Better parsing of Info-ZIP type 1 extra field
[deark.git] / modules / cardfile.c
blobe19f39cc7a0b9f30bf281b41120f6f55381b80b8
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // Extract graphics and text from Windows Cardfile .crd format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_cardfile);
11 struct page_ctx {
12 i64 cardnum;
13 i64 datapos;
14 de_ucstring *name;
17 typedef struct localctx_struct {
18 #define DE_CRDFMT_MGC 1
19 #define DE_CRDFMT_RRG 2
20 #define DE_CRDFMT_DKO 3
21 int fmt;
22 int crd_encoding;
23 int ole_encoding;
24 const char *signature;
25 i64 numcards;
26 } lctx;
28 static void do_extract_text_data(deark *c, lctx *d, de_finfo *fi, i64 text_pos, i64 text_len)
30 if(text_len<1) return;
31 if(text_pos + text_len > c->infile->len) return;
33 // TODO: Consider trying to convert to UTF-8, especially if the user used the
34 // "inenc" option.
35 dbuf_create_file_from_slice(c->infile, text_pos, text_len, "txt", fi, 0);
38 static void do_dbg_text_data(deark *c, lctx *d, i64 text_pos, i64 text_len)
40 de_ucstring *s = NULL;
42 s = ucstring_create(c);
43 dbuf_read_to_ucstring_n(c->infile, text_pos, text_len, DE_DBG_MAX_STRLEN, s,
44 0, d->crd_encoding);
45 de_dbg(c, "text: \"%s\"", ucstring_getpsz_d(s));
46 ucstring_destroy(s);
49 static void do_bitmap_mgc(deark *c, lctx *d, struct page_ctx *pg)
51 i64 w, h;
52 i64 src_rowspan;
53 de_bitmap *img = NULL;
54 de_finfo *fi_bitmap = NULL;
56 fi_bitmap = de_finfo_create(c);
57 if(c->filenames_from_file)
58 de_finfo_set_name_from_ucstring(c, fi_bitmap, pg->name, 0);
60 w = de_getu16le(pg->datapos+2);
61 h = de_getu16le(pg->datapos+4);
62 de_dbg(c, "bitmap dimensions: %d"DE_CHAR_TIMES"%d", (int)w, (int)h);
64 img = de_bitmap_create(c, w, h, 1);
65 src_rowspan = ((w+15)/16)*2;
67 de_convert_and_write_image_bilevel2(c->infile, pg->datapos+10,
68 w, h, src_rowspan, 0, fi_bitmap, 0);
70 de_bitmap_destroy(img);
71 de_finfo_destroy(c, fi_bitmap);
74 static void do_text(deark *c, lctx *d, struct page_ctx *pg,
75 i64 text_pos, i64 text_len)
77 de_finfo *fi_text = NULL;
79 if(text_len<1) goto done;
81 if(c->extract_level>=2) {
82 fi_text = de_finfo_create(c);
83 if(c->filenames_from_file)
84 de_finfo_set_name_from_ucstring(c, fi_text, pg->name, 0);
86 do_extract_text_data(c, d, fi_text, text_pos, text_len);
88 else {
89 do_dbg_text_data(c, d, text_pos, text_len);
92 done:
93 de_finfo_destroy(c, fi_text);
96 static void do_carddata_mgc(deark *c, lctx *d, struct page_ctx *pg)
98 i64 bitmap_len;
99 i64 text_len;
100 i64 text_pos;
102 // Bitmap
104 bitmap_len = de_getu16le(pg->datapos);
105 de_dbg(c, "bitmap length: %d", (int)bitmap_len);
107 if(bitmap_len!=0) {
108 do_bitmap_mgc(c, d, pg);
111 // Text
113 if(bitmap_len==0) {
114 text_len = de_getu16le(pg->datapos+2);
115 text_pos = pg->datapos+4;
117 else {
118 text_len = de_getu16le(pg->datapos + 10 + bitmap_len);
119 text_pos = pg->datapos + 10 + bitmap_len +2;
121 de_dbg(c, "text length: %d", (int)text_len);
123 if(text_len!=0) {
124 do_text(c, d, pg, text_pos, text_len);
128 static int do_object_rrg(deark *c, lctx *d, struct page_ctx *pg, i64 pos1,
129 i64 *bytes_consumed)
131 de_module_params *mparams = NULL;
132 i64 pos = pos1;
133 i64 n1, n2, n3, n4;
134 int retval = 0;
136 n1 = de_getu32le_p(&pos);
137 de_dbg(c, "object ID: 0x%08x", (unsigned int)n1);
139 mparams = de_malloc(c, sizeof(de_module_params));
140 mparams->in_params.codes = "U";
141 mparams->in_params.input_encoding = d->ole_encoding;
143 // TODO: Make the output filenames contain the index text
144 de_dbg(c, "OLE1 data at %"I64_FMT, pos);
145 de_dbg_indent(c, 1);
146 de_run_module_by_id_on_slice(c, "ole1", mparams, c->infile, pos,
147 c->infile->len-pos);
148 de_dbg_indent(c, -1);
150 // Unfortunately, there is no direct way to figure out the OLE object size,
151 // and we need it to find the card's text (and to know whether it has text).
152 // The ole1 module will try to tell us the size, but this feature needs more
153 // work, and is difficult to test.
155 if(mparams->out_params.flags & 0x1) {
156 pos += mparams->out_params.int64_1;
158 else {
159 // ole1 module failed to figure out the object size
160 goto done;
162 de_dbg(c, "[OLE object ends at %"I64_FMT"]", pos);
164 n1 = de_getu16le_p(&pos);
165 n2 = de_getu16le_p(&pos);
166 de_dbg(c, "char width,height: %d,%d", (int)n1, (int)n2);
168 n1 = de_geti16le_p(&pos);
169 n2 = de_geti16le_p(&pos);
170 n3 = de_getu16le_p(&pos);
171 n4 = de_getu16le_p(&pos);
172 de_dbg(c, "rect: %d,%d,%d,%d", (int)n1, (int)n2, (int)n3, (int)n4);
174 n1 = de_getu16le_p(&pos);
175 de_dbg(c, "object type: %d", (int)n1);
177 *bytes_consumed = pos - pos1;
178 retval = 1;
179 done:
180 de_free(c, mparams);
181 return retval;
184 static void do_carddata_rrg(deark *c, lctx *d, struct page_ctx *pg)
186 unsigned int flags;
187 int ret;
188 i64 text_len;
189 i64 pos = pg->datapos;
191 flags = (unsigned int)de_getu16le_p(&pos);
192 de_dbg(c, "flags: %u", flags);
193 if(flags) {
194 i64 bytes_consumed = 0;
195 ret = do_object_rrg(c, d, pg, pos, &bytes_consumed);
196 if(!ret || bytes_consumed<1) {
197 de_warn(c, "card #%d: Failed to parse OLE object; any text on this card "
198 "cannot be processed.", (int)pg->cardnum);
199 goto done;
201 pos += bytes_consumed;
204 text_len = de_getu16le_p(&pos);
205 de_dbg(c, "text length: %d", (int)text_len);
206 if(text_len!=0) {
207 do_text(c, d, pg, pos, text_len);
210 done:
214 // Process a card, given the offset of its index
215 static void do_card(deark *c, lctx *d, i64 cardnum, i64 pos)
217 int saved_indent_level;
218 struct page_ctx *pg = NULL;
220 de_dbg_indent_save(c, &saved_indent_level);
222 pg = de_malloc(c, sizeof(struct page_ctx));
223 pg->cardnum = cardnum;
224 de_dbg(c, "card #%d", (int)pg->cardnum);
225 de_dbg_indent(c, 1);
226 de_dbg(c, "index at %"I64_FMT, pos);
227 de_dbg_indent(c, 1);
228 pg->datapos = de_getu32le(pos+6);
229 de_dbg(c, "datapos: %"I64_FMT, pg->datapos);
230 if(pg->datapos>=c->infile->len) goto done;
232 pg->name = ucstring_create(c);
233 if(d->crd_encoding==DE_ENCODING_UTF16LE) {
234 dbuf_read_to_ucstring(c->infile, pos+11, 40, pg->name, 0,
235 d->crd_encoding);
236 ucstring_truncate_at_NUL(pg->name);
238 else {
239 dbuf_read_to_ucstring(c->infile, pos+11, 40, pg->name, DE_CONVFLAG_STOP_AT_NUL,
240 d->crd_encoding);
242 de_dbg(c, "index text: \"%s\"", ucstring_getpsz_d(pg->name));
244 de_dbg_indent(c, -1);
246 de_dbg(c, "data at %"I64_FMT, pg->datapos);
247 de_dbg_indent(c, 1);
249 if(d->fmt==DE_CRDFMT_RRG) {
250 do_carddata_rrg(c, d, pg);
252 else {
253 do_carddata_mgc(c, d, pg);
256 done:
257 if(pg) {
258 ucstring_destroy(pg->name);
259 de_free(c, pg);
261 de_dbg_indent_restore(c, saved_indent_level);
264 static int detect_crd_fmt(deark *c)
266 u8 buf[4];
267 de_read(buf, 0, 4);
269 if(!de_memcmp(buf, "MGC", 3)) return DE_CRDFMT_MGC;
270 if(!de_memcmp(buf, "RRG", 3)) return DE_CRDFMT_RRG;
271 if(!de_memcmp(buf, "DKO", 3)) return DE_CRDFMT_DKO;
272 return 0;
275 static void de_run_cardfile(deark *c, de_module_params *mparams)
277 lctx *d = NULL;
278 i64 pos;
279 i64 n;
281 d = de_malloc(c, sizeof(lctx));
283 pos = 0;
284 d->fmt = detect_crd_fmt(c);
285 if(d->fmt==DE_CRDFMT_MGC) {
286 d->signature = "MGC";
287 de_declare_fmt(c, "CardFile");
289 else if(d->fmt==DE_CRDFMT_RRG) {
290 d->signature = "RRG";
291 de_declare_fmt(c, "CardFile, with objects");
293 else if(d->fmt==DE_CRDFMT_DKO) {
294 d->signature = "DKO";
295 de_declare_fmt(c, "CardFile, Unicode");
297 else {
298 de_err(c, "This is not a known/supported CardFile format");
299 goto done;
301 de_dbg(c, "signature: %s", d->signature);
302 pos+=3;
304 if(d->fmt==DE_CRDFMT_DKO) {
305 // TODO: Samples needed
306 de_warn(c, "Unicode Cardfile files might not be supported correctly");
309 // Microsoft's (old) Cardfile format documentation says that text is in "low
310 // ASCII format", but that seems doubtful on the face of it, and indeed I have
311 // seen files where it is not.
312 d->ole_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_WINDOWS1252);
313 if(d->fmt==DE_CRDFMT_DKO) {
314 d->crd_encoding = DE_ENCODING_UTF16LE;
316 else {
317 d->crd_encoding = d->ole_encoding;
320 if(d->fmt==DE_CRDFMT_RRG) {
321 pos += 4; // Last object's ID
324 d->numcards = de_getu16le_p(&pos);
325 de_dbg(c, "number of cards: %d", (int)d->numcards);
327 for(n=0; n<d->numcards; n++) {
328 do_card(c, d, n, pos);
329 pos+=52;
332 done:
333 de_free(c, d);
336 static int de_identify_cardfile(deark *c)
338 int fmt;
340 fmt = detect_crd_fmt(c);
341 if(fmt!=0) {
342 return 80;
344 return 0;
347 void de_module_cardfile(deark *c, struct deark_module_info *mi)
349 mi->id = "cardfile";
350 mi->desc = "Windows Cardfile address book";
351 mi->run_fn = de_run_cardfile;
352 mi->identify_fn = de_identify_cardfile;