1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // Extract graphics and text from Windows Cardfile .crd format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_cardfile
);
17 typedef struct localctx_struct
{
18 #define DE_CRDFMT_MGC 1
19 #define DE_CRDFMT_RRG 2
20 #define DE_CRDFMT_DKO 3
24 const char *signature
;
28 static void do_extract_text_data(deark
*c
, lctx
*d
, de_finfo
*fi
, i64 text_pos
, i64 text_len
)
30 if(text_len
<1) return;
31 if(text_pos
+ text_len
> c
->infile
->len
) return;
33 // TODO: Consider trying to convert to UTF-8, especially if the user used the
35 dbuf_create_file_from_slice(c
->infile
, text_pos
, text_len
, "txt", fi
, 0);
38 static void do_dbg_text_data(deark
*c
, lctx
*d
, i64 text_pos
, i64 text_len
)
40 de_ucstring
*s
= NULL
;
42 s
= ucstring_create(c
);
43 dbuf_read_to_ucstring_n(c
->infile
, text_pos
, text_len
, DE_DBG_MAX_STRLEN
, s
,
45 de_dbg(c
, "text: \"%s\"", ucstring_getpsz_d(s
));
49 static void do_bitmap_mgc(deark
*c
, lctx
*d
, struct page_ctx
*pg
)
53 de_bitmap
*img
= NULL
;
54 de_finfo
*fi_bitmap
= NULL
;
56 fi_bitmap
= de_finfo_create(c
);
57 if(c
->filenames_from_file
)
58 de_finfo_set_name_from_ucstring(c
, fi_bitmap
, pg
->name
, 0);
60 w
= de_getu16le(pg
->datapos
+2);
61 h
= de_getu16le(pg
->datapos
+4);
62 de_dbg(c
, "bitmap dimensions: %d"DE_CHAR_TIMES
"%d", (int)w
, (int)h
);
64 img
= de_bitmap_create(c
, w
, h
, 1);
65 src_rowspan
= ((w
+15)/16)*2;
67 de_convert_and_write_image_bilevel2(c
->infile
, pg
->datapos
+10,
68 w
, h
, src_rowspan
, 0, fi_bitmap
, 0);
70 de_bitmap_destroy(img
);
71 de_finfo_destroy(c
, fi_bitmap
);
74 static void do_text(deark
*c
, lctx
*d
, struct page_ctx
*pg
,
75 i64 text_pos
, i64 text_len
)
77 de_finfo
*fi_text
= NULL
;
79 if(text_len
<1) goto done
;
81 if(c
->extract_level
>=2) {
82 fi_text
= de_finfo_create(c
);
83 if(c
->filenames_from_file
)
84 de_finfo_set_name_from_ucstring(c
, fi_text
, pg
->name
, 0);
86 do_extract_text_data(c
, d
, fi_text
, text_pos
, text_len
);
89 do_dbg_text_data(c
, d
, text_pos
, text_len
);
93 de_finfo_destroy(c
, fi_text
);
96 static void do_carddata_mgc(deark
*c
, lctx
*d
, struct page_ctx
*pg
)
104 bitmap_len
= de_getu16le(pg
->datapos
);
105 de_dbg(c
, "bitmap length: %d", (int)bitmap_len
);
108 do_bitmap_mgc(c
, d
, pg
);
114 text_len
= de_getu16le(pg
->datapos
+2);
115 text_pos
= pg
->datapos
+4;
118 text_len
= de_getu16le(pg
->datapos
+ 10 + bitmap_len
);
119 text_pos
= pg
->datapos
+ 10 + bitmap_len
+2;
121 de_dbg(c
, "text length: %d", (int)text_len
);
124 do_text(c
, d
, pg
, text_pos
, text_len
);
128 static int do_object_rrg(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
,
131 de_module_params
*mparams
= NULL
;
136 n1
= de_getu32le_p(&pos
);
137 de_dbg(c
, "object ID: 0x%08x", (unsigned int)n1
);
139 mparams
= de_malloc(c
, sizeof(de_module_params
));
140 mparams
->in_params
.codes
= "U";
141 mparams
->in_params
.input_encoding
= d
->ole_encoding
;
143 // TODO: Make the output filenames contain the index text
144 de_dbg(c
, "OLE1 data at %"I64_FMT
, pos
);
146 de_run_module_by_id_on_slice(c
, "ole1", mparams
, c
->infile
, pos
,
148 de_dbg_indent(c
, -1);
150 // Unfortunately, there is no direct way to figure out the OLE object size,
151 // and we need it to find the card's text (and to know whether it has text).
152 // The ole1 module will try to tell us the size, but this feature needs more
153 // work, and is difficult to test.
155 if(mparams
->out_params
.flags
& 0x1) {
156 pos
+= mparams
->out_params
.int64_1
;
159 // ole1 module failed to figure out the object size
162 de_dbg(c
, "[OLE object ends at %"I64_FMT
"]", pos
);
164 n1
= de_getu16le_p(&pos
);
165 n2
= de_getu16le_p(&pos
);
166 de_dbg(c
, "char width,height: %d,%d", (int)n1
, (int)n2
);
168 n1
= de_geti16le_p(&pos
);
169 n2
= de_geti16le_p(&pos
);
170 n3
= de_getu16le_p(&pos
);
171 n4
= de_getu16le_p(&pos
);
172 de_dbg(c
, "rect: %d,%d,%d,%d", (int)n1
, (int)n2
, (int)n3
, (int)n4
);
174 n1
= de_getu16le_p(&pos
);
175 de_dbg(c
, "object type: %d", (int)n1
);
177 *bytes_consumed
= pos
- pos1
;
184 static void do_carddata_rrg(deark
*c
, lctx
*d
, struct page_ctx
*pg
)
189 i64 pos
= pg
->datapos
;
191 flags
= (unsigned int)de_getu16le_p(&pos
);
192 de_dbg(c
, "flags: %u", flags
);
194 i64 bytes_consumed
= 0;
195 ret
= do_object_rrg(c
, d
, pg
, pos
, &bytes_consumed
);
196 if(!ret
|| bytes_consumed
<1) {
197 de_warn(c
, "card #%d: Failed to parse OLE object; any text on this card "
198 "cannot be processed.", (int)pg
->cardnum
);
201 pos
+= bytes_consumed
;
204 text_len
= de_getu16le_p(&pos
);
205 de_dbg(c
, "text length: %d", (int)text_len
);
207 do_text(c
, d
, pg
, pos
, text_len
);
214 // Process a card, given the offset of its index
215 static void do_card(deark
*c
, lctx
*d
, i64 cardnum
, i64 pos
)
217 int saved_indent_level
;
218 struct page_ctx
*pg
= NULL
;
220 de_dbg_indent_save(c
, &saved_indent_level
);
222 pg
= de_malloc(c
, sizeof(struct page_ctx
));
223 pg
->cardnum
= cardnum
;
224 de_dbg(c
, "card #%d", (int)pg
->cardnum
);
226 de_dbg(c
, "index at %"I64_FMT
, pos
);
228 pg
->datapos
= de_getu32le(pos
+6);
229 de_dbg(c
, "datapos: %"I64_FMT
, pg
->datapos
);
230 if(pg
->datapos
>=c
->infile
->len
) goto done
;
232 pg
->name
= ucstring_create(c
);
233 if(d
->crd_encoding
==DE_ENCODING_UTF16LE
) {
234 dbuf_read_to_ucstring(c
->infile
, pos
+11, 40, pg
->name
, 0,
236 ucstring_truncate_at_NUL(pg
->name
);
239 dbuf_read_to_ucstring(c
->infile
, pos
+11, 40, pg
->name
, DE_CONVFLAG_STOP_AT_NUL
,
242 de_dbg(c
, "index text: \"%s\"", ucstring_getpsz_d(pg
->name
));
244 de_dbg_indent(c
, -1);
246 de_dbg(c
, "data at %"I64_FMT
, pg
->datapos
);
249 if(d
->fmt
==DE_CRDFMT_RRG
) {
250 do_carddata_rrg(c
, d
, pg
);
253 do_carddata_mgc(c
, d
, pg
);
258 ucstring_destroy(pg
->name
);
261 de_dbg_indent_restore(c
, saved_indent_level
);
264 static int detect_crd_fmt(deark
*c
)
269 if(!de_memcmp(buf
, "MGC", 3)) return DE_CRDFMT_MGC
;
270 if(!de_memcmp(buf
, "RRG", 3)) return DE_CRDFMT_RRG
;
271 if(!de_memcmp(buf
, "DKO", 3)) return DE_CRDFMT_DKO
;
275 static void de_run_cardfile(deark
*c
, de_module_params
*mparams
)
281 d
= de_malloc(c
, sizeof(lctx
));
284 d
->fmt
= detect_crd_fmt(c
);
285 if(d
->fmt
==DE_CRDFMT_MGC
) {
286 d
->signature
= "MGC";
287 de_declare_fmt(c
, "CardFile");
289 else if(d
->fmt
==DE_CRDFMT_RRG
) {
290 d
->signature
= "RRG";
291 de_declare_fmt(c
, "CardFile, with objects");
293 else if(d
->fmt
==DE_CRDFMT_DKO
) {
294 d
->signature
= "DKO";
295 de_declare_fmt(c
, "CardFile, Unicode");
298 de_err(c
, "This is not a known/supported CardFile format");
301 de_dbg(c
, "signature: %s", d
->signature
);
304 if(d
->fmt
==DE_CRDFMT_DKO
) {
305 // TODO: Samples needed
306 de_warn(c
, "Unicode Cardfile files might not be supported correctly");
309 // Microsoft's (old) Cardfile format documentation says that text is in "low
310 // ASCII format", but that seems doubtful on the face of it, and indeed I have
311 // seen files where it is not.
312 d
->ole_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_WINDOWS1252
);
313 if(d
->fmt
==DE_CRDFMT_DKO
) {
314 d
->crd_encoding
= DE_ENCODING_UTF16LE
;
317 d
->crd_encoding
= d
->ole_encoding
;
320 if(d
->fmt
==DE_CRDFMT_RRG
) {
321 pos
+= 4; // Last object's ID
324 d
->numcards
= de_getu16le_p(&pos
);
325 de_dbg(c
, "number of cards: %d", (int)d
->numcards
);
327 for(n
=0; n
<d
->numcards
; n
++) {
328 do_card(c
, d
, n
, pos
);
336 static int de_identify_cardfile(deark
*c
)
340 fmt
= detect_crd_fmt(c
);
347 void de_module_cardfile(deark
*c
, struct deark_module_info
*mi
)
350 mi
->desc
= "Windows Cardfile address book";
351 mi
->run_fn
= de_run_cardfile
;
352 mi
->identify_fn
= de_identify_cardfile
;