Made Unix builds more likely to be Y2038-compliant
[deark.git] / modules / clp.c
blob374dd720023bd391ac9ae2feacf169e38100aa21
1 // This file is part of Deark.
2 // Copyright (C) 2021 Jason Summers
3 // See the file COPYING for terms of use.
5 // Windows CLP saved clipboard format
7 #include <deark-private.h>
8 DE_DECLARE_MODULE(de_module_clp);
10 // TODO: Support ENHMETAFILE, TIFF, RIFF, WAVE
11 #define CFMT_TEXT 1
12 #define CFMT_BITMAP 2
13 #define CFMT_METAFILEPICT 3
14 //#define CFMT_SYLK 4
15 //#define CFMT_DIF 5
16 //#define CFMT_TIFF 6
17 #define CFMT_OEMTEXT 7
18 #define CFMT_DIB 8
19 #define CFMT_PALETTE 9
20 //#define CFMT_PENDATA 10
21 //#define CFMT_RIFF 11
22 //#define CFMT_WAVE 12
23 #define CFMT_UNICODETEXT 13
24 //#define CFMT_ENHMETAFILE 14
25 //#define CFMT_HDROP 15
26 #define CFMT_LOCALE 16
27 #define CFMT_DIBV5 17
28 //#define CFMT_OWNERDISPLAY 0x80
29 #define CFMT_DSPTEXT 0x81
30 #define CFMT_DSPBITMAP 0x82
31 #define CFMT_DSPMETAFILEPICT 0x83
32 //#define CFMT_DSPENHMETAFILE 0x8e
34 struct index_item {
35 u8 handled;
36 UI clpfmt;
37 i64 dpos;
38 i64 dlen;
41 struct member_data {
42 UI idx;
43 i64 hpos;
44 de_finfo *fi;
45 de_ucstring *name;
46 u8 clpfmtname_known;
47 char clpfmtname_sz[32];
50 typedef struct localctx_struct {
51 UI sig;
52 u8 extractall;
53 u8 ddb_warned;
54 de_encoding input_encoding_ansi;
55 de_encoding input_encoding_oem;
56 i64 num_items;
57 i64 index_pos;
58 i64 index_item_len;
59 i64 next_avail_dpos;
60 struct index_item *index_array; // array[num_items]
61 u8 have_pal;
62 de_color pal[256];
63 } lctx;
65 static void destroy_md(deark *c, struct member_data *md)
67 if(!md) return;
68 de_finfo_destroy(c, md->fi);
69 ucstring_destroy(md->name);
70 de_free(c, md);
73 static int get_cf_name(deark *c, lctx *d, UI clpfmt, char *buf, size_t buflen)
75 static const char *names[18] = { NULL, "TEXT", "BITMAP", "METAFILEPICT",
76 "SYLK", "DIF", "TIFF", "OEMTEXT", "DIB", "PALETTE", "PENDATA", "RIFF",
77 "WAVE", "UNICODETEXT", "ENHMETAFILE", "HDROP", "LOCALE", "DIBV5" };
79 if((size_t)clpfmt<DE_ARRAYCOUNT(names) && names[clpfmt]) {
80 de_snprintf(buf, buflen, "CF_%s", names[clpfmt]);
81 return 1;
83 // TODO: CF_OWNERDISPLAY, etc.
84 de_strlcpy(buf, "?", buflen);
85 return 0;
88 static void extract_binary(deark *c, lctx *d, struct member_data *md, struct index_item *ii)
90 if(ii->dlen<=0) goto done;
91 dbuf_create_file_from_slice(c->infile, ii->dpos, ii->dlen, "bin", md->fi, 0);
92 done:
96 static void create_text_file_from_slice(dbuf *inf, i64 pos1, i64 len,
97 de_ext_encoding ee, const char *ext, de_finfo *fi)
99 dbuf *outf = NULL;
101 outf = dbuf_create_output_file(inf->c, ext, fi, 0);
102 if(inf->c->write_bom) {
103 dbuf_write_uchar_as_utf8(outf, 0xfeff);
105 dbuf_copy_slice_convert_to_utf8(inf, pos1, len, ee, outf, 0);
106 dbuf_close(outf);
109 static void extract_text(deark *c, lctx *d, struct member_data *md, struct index_item *ii)
111 de_ext_encoding ee;
112 const char *ext;
113 i64 dlen = ii->dlen;
115 ext = "txt";
116 if(ii->clpfmt==CFMT_TEXT || ii->clpfmt==CFMT_DSPTEXT) {
117 ee = DE_EXTENC_MAKE(d->input_encoding_ansi, DE_ENCSUBTYPE_HYBRID);
119 else if(ii->clpfmt==CFMT_OEMTEXT) {
120 ee = DE_EXTENC_MAKE(d->input_encoding_oem, DE_ENCSUBTYPE_HYBRID);
122 else if(ii->clpfmt==CFMT_UNICODETEXT) {
123 ee = DE_ENCODING_UTF16LE;
125 else {
126 goto done;
129 // Search for the NUL terminator, to refine the data len.
130 if(ii->clpfmt==CFMT_UNICODETEXT) {
131 i64 bytes_consumed = 0;
133 if(dbuf_get_utf16_NULterm_len(c->infile, ii->dpos, ii->dlen, &bytes_consumed)) {
134 dlen = bytes_consumed - 2;
137 else {
138 i64 foundpos = 0;
140 if(dbuf_search_byte(c->infile, 0x00, ii->dpos, ii->dlen, &foundpos)) {
141 dlen = foundpos - ii->dpos;
145 create_text_file_from_slice(c->infile, ii->dpos, dlen, ee, ext, md->fi);
146 done:
150 static void extract_ddb(deark *c, lctx *d, struct member_data *md, struct index_item *ii)
152 int old_extract_count;
153 de_module_params *mparams = NULL;
155 mparams = de_malloc(c, sizeof(de_module_params));
156 de_dbg(c, "reading ddb");
157 de_dbg_indent(c, 1);
158 mparams->in_params.codes = "N";
159 mparams->in_params.fi = md->fi;
160 if(d->have_pal) {
161 mparams->in_params.obj1 = (void*)d->pal;
163 old_extract_count = c->num_files_extracted;
164 de_run_module_by_id_on_slice(c, "ddb", mparams, c->infile, ii->dpos, ii->dlen);
165 de_dbg_indent(c, -1);
167 if(c->num_files_extracted>old_extract_count && !d->ddb_warned) {
168 de_warn(c, "Nonportable DDB images might not be decoded correctly");
169 d->ddb_warned = 1;
172 de_free(c, mparams);
175 static void extract_dib(deark *c, lctx *d, struct member_data *md, struct index_item *ii)
177 de_module_params *mparams = NULL;
179 mparams = de_malloc(c, sizeof(de_module_params));
180 de_dbg(c, "reading dib");
181 de_dbg_indent(c, 1);
182 mparams->in_params.fi = md->fi;
183 de_run_module_by_id_on_slice(c, "dib", mparams, c->infile, ii->dpos, ii->dlen);
184 de_dbg_indent(c, -1);
185 de_free(c, mparams);
188 static void extract_wmf(deark *c, lctx *d, struct member_data *md, struct index_item *ii)
190 if(ii->dlen <= 8) goto done;
191 dbuf_create_file_from_slice(c->infile, ii->dpos+8, ii->dlen-8, "wmf", md->fi, 0);
192 done:
196 static void read_palette(deark *c, lctx *d, struct member_data *md, struct index_item *ii)
198 i64 dpos, dlen;
200 if(d->have_pal) goto done;
201 dpos = ii->dpos + 4;
202 dlen = ii->dlen - 4;
204 de_dbg(c, "reading palette");
205 d->have_pal = 1;
206 de_dbg_indent(c, 1);
207 de_read_palette_rgb(c->infile, dpos, dlen/4, 4,
208 d->pal, 256, 0);
209 de_dbg_indent(c, -1);
211 done:
215 // Assign a name to md->fi, if possible
216 static void set_output_filename(deark *c, lctx *d, struct member_data *md, struct index_item *ii)
218 i64 i;
219 int escape_flag = 0;
220 de_ucstring *s = NULL;
222 s = ucstring_create(c);
224 if(c->filenames_from_file) {
225 for(i=0; i<md->name->len; i++) {
226 if(md->name->str[i]=='&' && !escape_flag) {
227 escape_flag = 1;
229 else {
230 ucstring_append_char(s, md->name->str[i]);
231 escape_flag = 0;
236 if(ii->clpfmt==CFMT_BITMAP || ii->clpfmt==CFMT_DSPBITMAP) {
237 // Add an indication that this was a device-dependent bitmap
238 if(ucstring_isnonempty(s)) {
239 ucstring_append_char(s, '.');
241 ucstring_append_sz(s, "ddb", DE_ENCODING_LATIN1);
244 if(ucstring_isempty(s) && md->clpfmtname_known) {
245 ucstring_append_sz(s, md->clpfmtname_sz, DE_ENCODING_UTF8);
248 if(ucstring_isnonempty(s)) {
249 de_finfo_set_name_from_ucstring(c, md->fi, s, 0);
252 ucstring_destroy(s);
255 static void do_item(deark *c, lctx *d, UI idx)
257 struct index_item *ii;
258 struct member_data *md = NULL;
259 i64 pos;
260 int old_extract_count;
261 int saved_indent_level;
263 de_dbg_indent_save(c, &saved_indent_level);
264 md = de_malloc(c, sizeof(struct member_data));
265 md->idx = idx;
266 md->fi = de_finfo_create(c);
267 md->name = ucstring_create(c);
268 md->hpos = d->index_pos + (i64)md->idx * d->index_item_len;
269 ii = &d->index_array[md->idx];
270 pos = md->hpos;
272 de_dbg(c, "item #%u, header at %"I64_FMT, md->idx, md->hpos);
273 de_dbg_indent(c, 1);
274 if(d->sig==0xc350) {
275 pos += 2; // clipfmt, already read
277 else {
278 pos += 4;
280 md->clpfmtname_known = get_cf_name(c, d, ii->clpfmt, md->clpfmtname_sz, sizeof(md->clpfmtname_sz));
281 de_dbg(c, "format: 0x%04x (%s)", ii->clpfmt, md->clpfmtname_sz);
282 if(ii->clpfmt==0) goto done;
283 pos += 4; // dlen, already read
284 pos += 4; // dpos, already read
285 de_dbg(c, "data at %"I64_FMT", len=%"I64_FMT, ii->dpos, ii->dlen);
287 if(d->sig==0xc350) {
288 dbuf_read_to_ucstring(c->infile, pos, 79, md->name, DE_CONVFLAG_STOP_AT_NUL,
289 d->input_encoding_ansi);
291 else {
292 dbuf_read_to_ucstring(c->infile, pos, 79*2, md->name, 0, DE_ENCODING_UTF16LE);
293 ucstring_truncate_at_NUL(md->name);
295 de_dbg(c, "name: \"%s\"", ucstring_getpsz_d(md->name));
297 if(ii->dlen<1) {
298 goto done;
301 set_output_filename(c, d, md, ii);
303 // So we can figure out if we successfully extracted anything
304 old_extract_count = c->num_files_extracted;
306 switch(ii->clpfmt) {
307 case CFMT_BITMAP: case CFMT_DSPBITMAP:
308 extract_ddb(c, d, md, ii);
309 break;
310 case CFMT_DIB: case CFMT_DIBV5:
311 extract_dib(c, d, md, ii);
312 break;
313 case CFMT_TEXT: case CFMT_OEMTEXT: case CFMT_UNICODETEXT: case CFMT_DSPTEXT:
314 extract_text(c, d, md, ii);
315 break;
316 case CFMT_PALETTE:
317 read_palette(c, d, md, ii);
318 break;
319 case CFMT_METAFILEPICT: case CFMT_DSPMETAFILEPICT:
320 extract_wmf(c, d, md, ii);
321 break;
324 if(c->num_files_extracted==old_extract_count) {
325 if(d->extractall) {
326 extract_binary(c, d, md, ii);
328 else if(c->debug_level>=2) {
329 de_dbg_hexdump(c, c->infile, ii->dpos, ii->dlen, 256, NULL, 0x1);
333 done:
334 destroy_md(c, md);
335 de_dbg_indent_restore(c, saved_indent_level);
338 static void do_process_items(deark *c, lctx *d)
340 UI i;
342 // Items to read first (PALETTE, LOCALE)
343 for(i=0; i<(UI)d->num_items; i++) {
344 if(d->index_array[i].clpfmt==CFMT_PALETTE || d->index_array[i].clpfmt==CFMT_LOCALE) {
345 do_item(c, d, i);
346 d->index_array[i].handled = 1;
350 // Everything else
351 for(i=0; i<(UI)d->num_items; i++) {
352 if(d->index_array[i].handled==0) {
353 do_item(c, d, i);
358 // Returns 0 if we should stop processing the CLP file
359 static int do_read_index(deark *c, lctx *d)
361 int retval = 0;
362 i64 i;
364 // d->num_items is untrusted, but can be no more than 64K.
365 d->index_array = de_mallocarray(c, d->num_items, sizeof(struct index_item));
367 de_dbg(c, "[scanning index]");
368 for(i=0; i<d->num_items; i++) {
369 struct index_item *ii;
370 i64 pos;
372 ii = &d->index_array[i];
373 pos = d->index_pos + i*d->index_item_len;
375 if(d->sig==0xc350) {
376 ii->clpfmt = (UI)de_getu16le_p(&pos);
378 else {
379 ii->clpfmt = (UI)de_getu32le_p(&pos);
381 ii->dlen = de_getu32le_p(&pos);
382 ii->dpos = de_getu32le_p(&pos);
384 if(ii->clpfmt==0) ii->dlen = 0;
386 if(ii->dlen>0) {
387 // Sanity check. I don't know if the data segments have to be in order and
388 // non-overlapping, but for now I'm assuming they do.
389 if((ii->dpos < d->next_avail_dpos) || (ii->dpos+ii->dlen > c->infile->len)) {
390 de_err(c, "item %u: Bad data segment position", (UI)i);
391 goto done;
393 d->next_avail_dpos = ii->dpos+ii->dlen;
396 retval = 1;
397 done:
398 return retval;
401 static void de_run_clp(deark *c, de_module_params *mparams)
403 lctx *d = NULL;
404 const char *tmps;
405 int ret;
406 i64 pos = 0;
408 d = de_malloc(c, sizeof(lctx));
410 d->input_encoding_ansi = de_get_input_encoding(c, NULL, DE_ENCODING_WINDOWS1252);
412 d->input_encoding_oem = DE_ENCODING_CP437; // default
413 tmps = de_get_ext_option(c, "clp:oemenc");
414 if(!tmps) {
415 tmps = de_get_ext_option(c, "oemenc");
417 if(tmps) {
418 d->input_encoding_oem = de_encoding_name_to_code(tmps);
419 if(d->input_encoding_oem == DE_ENCODING_UNKNOWN) {
420 d->input_encoding_oem = DE_ENCODING_CP437;
424 ret = de_get_ext_option_bool(c, "clp:extractall", -1);
425 if(ret>0 || (c->extract_level>=2 && ret!=0)) {
426 d->extractall = 1;
429 d->sig = (UI)de_getu16le_p(&pos);
430 de_dbg(c, "signature: 0x%04x", d->sig);
431 if(d->sig<0xc350 || d->sig>0xc352) {
432 de_err(c, "Not a Windows CLP file");
433 goto done;
436 d->num_items = de_getu16le_p(&pos);
437 de_dbg(c, "num items: %u", (UI)d->num_items);
439 d->index_pos = pos;
440 d->index_item_len = (d->sig==0xc350) ? 89 : 172;
442 if(!do_read_index(c, d)) goto done;
443 do_process_items(c, d);
445 done:
446 if(d) {
447 de_free(c, d->index_array);
448 de_free(c, d);
452 static int de_identify_clp(deark *c)
454 int has_ext;
455 UI sig;
457 // TODO: Improve this
458 sig = (UI)de_getu16le(0);
459 if(sig<0xc350 || sig>0xc352) return 0;
460 has_ext = de_input_file_has_ext(c, "clp");
461 if(has_ext) return 80;
462 return 15;
465 static void de_help_clp(deark *c)
467 de_msg(c, "-opt clp:extractall : Extract all items");
468 de_msg(c, "-opt oemenc=... : The encoding for OEM Text items");
471 void de_module_clp(deark *c, struct deark_module_info *mi)
473 mi->id = "clp";
474 mi->desc = "Windows Clipboard";
475 mi->run_fn = de_run_clp;
476 mi->identify_fn = de_identify_clp;
477 mi->help_fn = de_help_clp;