1 // This file is part of Deark.
2 // Copyright (C) 2021 Jason Summers
3 // See the file COPYING for terms of use.
5 // Windows CLP saved clipboard format
7 #include <deark-private.h>
8 DE_DECLARE_MODULE(de_module_clp
);
10 // TODO: Support ENHMETAFILE, TIFF, RIFF, WAVE
13 #define CFMT_METAFILEPICT 3
17 #define CFMT_OEMTEXT 7
19 #define CFMT_PALETTE 9
20 //#define CFMT_PENDATA 10
21 //#define CFMT_RIFF 11
22 //#define CFMT_WAVE 12
23 #define CFMT_UNICODETEXT 13
24 //#define CFMT_ENHMETAFILE 14
25 //#define CFMT_HDROP 15
26 #define CFMT_LOCALE 16
28 //#define CFMT_OWNERDISPLAY 0x80
29 #define CFMT_DSPTEXT 0x81
30 #define CFMT_DSPBITMAP 0x82
31 #define CFMT_DSPMETAFILEPICT 0x83
32 //#define CFMT_DSPENHMETAFILE 0x8e
47 char clpfmtname_sz
[32];
50 typedef struct localctx_struct
{
54 de_encoding input_encoding_ansi
;
55 de_encoding input_encoding_oem
;
60 struct index_item
*index_array
; // array[num_items]
65 static void destroy_md(deark
*c
, struct member_data
*md
)
68 de_finfo_destroy(c
, md
->fi
);
69 ucstring_destroy(md
->name
);
73 static int get_cf_name(deark
*c
, lctx
*d
, UI clpfmt
, char *buf
, size_t buflen
)
75 static const char *names
[18] = { NULL
, "TEXT", "BITMAP", "METAFILEPICT",
76 "SYLK", "DIF", "TIFF", "OEMTEXT", "DIB", "PALETTE", "PENDATA", "RIFF",
77 "WAVE", "UNICODETEXT", "ENHMETAFILE", "HDROP", "LOCALE", "DIBV5" };
79 if((size_t)clpfmt
<DE_ARRAYCOUNT(names
) && names
[clpfmt
]) {
80 de_snprintf(buf
, buflen
, "CF_%s", names
[clpfmt
]);
83 // TODO: CF_OWNERDISPLAY, etc.
84 de_strlcpy(buf
, "?", buflen
);
88 static void extract_binary(deark
*c
, lctx
*d
, struct member_data
*md
, struct index_item
*ii
)
90 if(ii
->dlen
<=0) goto done
;
91 dbuf_create_file_from_slice(c
->infile
, ii
->dpos
, ii
->dlen
, "bin", md
->fi
, 0);
96 static void create_text_file_from_slice(dbuf
*inf
, i64 pos1
, i64 len
,
97 de_ext_encoding ee
, const char *ext
, de_finfo
*fi
)
101 outf
= dbuf_create_output_file(inf
->c
, ext
, fi
, 0);
102 if(inf
->c
->write_bom
) {
103 dbuf_write_uchar_as_utf8(outf
, 0xfeff);
105 dbuf_copy_slice_convert_to_utf8(inf
, pos1
, len
, ee
, outf
, 0);
109 static void extract_text(deark
*c
, lctx
*d
, struct member_data
*md
, struct index_item
*ii
)
116 if(ii
->clpfmt
==CFMT_TEXT
|| ii
->clpfmt
==CFMT_DSPTEXT
) {
117 ee
= DE_EXTENC_MAKE(d
->input_encoding_ansi
, DE_ENCSUBTYPE_HYBRID
);
119 else if(ii
->clpfmt
==CFMT_OEMTEXT
) {
120 ee
= DE_EXTENC_MAKE(d
->input_encoding_oem
, DE_ENCSUBTYPE_HYBRID
);
122 else if(ii
->clpfmt
==CFMT_UNICODETEXT
) {
123 ee
= DE_ENCODING_UTF16LE
;
129 // Search for the NUL terminator, to refine the data len.
130 if(ii
->clpfmt
==CFMT_UNICODETEXT
) {
131 i64 bytes_consumed
= 0;
133 if(dbuf_get_utf16_NULterm_len(c
->infile
, ii
->dpos
, ii
->dlen
, &bytes_consumed
)) {
134 dlen
= bytes_consumed
- 2;
140 if(dbuf_search_byte(c
->infile
, 0x00, ii
->dpos
, ii
->dlen
, &foundpos
)) {
141 dlen
= foundpos
- ii
->dpos
;
145 create_text_file_from_slice(c
->infile
, ii
->dpos
, dlen
, ee
, ext
, md
->fi
);
150 static void extract_ddb(deark
*c
, lctx
*d
, struct member_data
*md
, struct index_item
*ii
)
152 int old_extract_count
;
153 de_module_params
*mparams
= NULL
;
155 mparams
= de_malloc(c
, sizeof(de_module_params
));
156 de_dbg(c
, "reading ddb");
158 mparams
->in_params
.codes
= "N";
159 mparams
->in_params
.fi
= md
->fi
;
161 mparams
->in_params
.obj1
= (void*)d
->pal
;
163 old_extract_count
= c
->num_files_extracted
;
164 de_run_module_by_id_on_slice(c
, "ddb", mparams
, c
->infile
, ii
->dpos
, ii
->dlen
);
165 de_dbg_indent(c
, -1);
167 if(c
->num_files_extracted
>old_extract_count
&& !d
->ddb_warned
) {
168 de_warn(c
, "Nonportable DDB images might not be decoded correctly");
175 static void extract_dib(deark
*c
, lctx
*d
, struct member_data
*md
, struct index_item
*ii
)
177 de_module_params
*mparams
= NULL
;
179 mparams
= de_malloc(c
, sizeof(de_module_params
));
180 de_dbg(c
, "reading dib");
182 mparams
->in_params
.fi
= md
->fi
;
183 de_run_module_by_id_on_slice(c
, "dib", mparams
, c
->infile
, ii
->dpos
, ii
->dlen
);
184 de_dbg_indent(c
, -1);
188 static void extract_wmf(deark
*c
, lctx
*d
, struct member_data
*md
, struct index_item
*ii
)
190 if(ii
->dlen
<= 8) goto done
;
191 dbuf_create_file_from_slice(c
->infile
, ii
->dpos
+8, ii
->dlen
-8, "wmf", md
->fi
, 0);
196 static void read_palette(deark
*c
, lctx
*d
, struct member_data
*md
, struct index_item
*ii
)
200 if(d
->have_pal
) goto done
;
204 de_dbg(c
, "reading palette");
207 de_read_palette_rgb(c
->infile
, dpos
, dlen
/4, 4,
209 de_dbg_indent(c
, -1);
215 // Assign a name to md->fi, if possible
216 static void set_output_filename(deark
*c
, lctx
*d
, struct member_data
*md
, struct index_item
*ii
)
220 de_ucstring
*s
= NULL
;
222 s
= ucstring_create(c
);
224 if(c
->filenames_from_file
) {
225 for(i
=0; i
<md
->name
->len
; i
++) {
226 if(md
->name
->str
[i
]=='&' && !escape_flag
) {
230 ucstring_append_char(s
, md
->name
->str
[i
]);
236 if(ii
->clpfmt
==CFMT_BITMAP
|| ii
->clpfmt
==CFMT_DSPBITMAP
) {
237 // Add an indication that this was a device-dependent bitmap
238 if(ucstring_isnonempty(s
)) {
239 ucstring_append_char(s
, '.');
241 ucstring_append_sz(s
, "ddb", DE_ENCODING_LATIN1
);
244 if(ucstring_isempty(s
) && md
->clpfmtname_known
) {
245 ucstring_append_sz(s
, md
->clpfmtname_sz
, DE_ENCODING_UTF8
);
248 if(ucstring_isnonempty(s
)) {
249 de_finfo_set_name_from_ucstring(c
, md
->fi
, s
, 0);
255 static void do_item(deark
*c
, lctx
*d
, UI idx
)
257 struct index_item
*ii
;
258 struct member_data
*md
= NULL
;
260 int old_extract_count
;
261 int saved_indent_level
;
263 de_dbg_indent_save(c
, &saved_indent_level
);
264 md
= de_malloc(c
, sizeof(struct member_data
));
266 md
->fi
= de_finfo_create(c
);
267 md
->name
= ucstring_create(c
);
268 md
->hpos
= d
->index_pos
+ (i64
)md
->idx
* d
->index_item_len
;
269 ii
= &d
->index_array
[md
->idx
];
272 de_dbg(c
, "item #%u, header at %"I64_FMT
, md
->idx
, md
->hpos
);
275 pos
+= 2; // clipfmt, already read
280 md
->clpfmtname_known
= get_cf_name(c
, d
, ii
->clpfmt
, md
->clpfmtname_sz
, sizeof(md
->clpfmtname_sz
));
281 de_dbg(c
, "format: 0x%04x (%s)", ii
->clpfmt
, md
->clpfmtname_sz
);
282 if(ii
->clpfmt
==0) goto done
;
283 pos
+= 4; // dlen, already read
284 pos
+= 4; // dpos, already read
285 de_dbg(c
, "data at %"I64_FMT
", len=%"I64_FMT
, ii
->dpos
, ii
->dlen
);
288 dbuf_read_to_ucstring(c
->infile
, pos
, 79, md
->name
, DE_CONVFLAG_STOP_AT_NUL
,
289 d
->input_encoding_ansi
);
292 dbuf_read_to_ucstring(c
->infile
, pos
, 79*2, md
->name
, 0, DE_ENCODING_UTF16LE
);
293 ucstring_truncate_at_NUL(md
->name
);
295 de_dbg(c
, "name: \"%s\"", ucstring_getpsz_d(md
->name
));
301 set_output_filename(c
, d
, md
, ii
);
303 // So we can figure out if we successfully extracted anything
304 old_extract_count
= c
->num_files_extracted
;
307 case CFMT_BITMAP
: case CFMT_DSPBITMAP
:
308 extract_ddb(c
, d
, md
, ii
);
310 case CFMT_DIB
: case CFMT_DIBV5
:
311 extract_dib(c
, d
, md
, ii
);
313 case CFMT_TEXT
: case CFMT_OEMTEXT
: case CFMT_UNICODETEXT
: case CFMT_DSPTEXT
:
314 extract_text(c
, d
, md
, ii
);
317 read_palette(c
, d
, md
, ii
);
319 case CFMT_METAFILEPICT
: case CFMT_DSPMETAFILEPICT
:
320 extract_wmf(c
, d
, md
, ii
);
324 if(c
->num_files_extracted
==old_extract_count
) {
326 extract_binary(c
, d
, md
, ii
);
328 else if(c
->debug_level
>=2) {
329 de_dbg_hexdump(c
, c
->infile
, ii
->dpos
, ii
->dlen
, 256, NULL
, 0x1);
335 de_dbg_indent_restore(c
, saved_indent_level
);
338 static void do_process_items(deark
*c
, lctx
*d
)
342 // Items to read first (PALETTE, LOCALE)
343 for(i
=0; i
<(UI
)d
->num_items
; i
++) {
344 if(d
->index_array
[i
].clpfmt
==CFMT_PALETTE
|| d
->index_array
[i
].clpfmt
==CFMT_LOCALE
) {
346 d
->index_array
[i
].handled
= 1;
351 for(i
=0; i
<(UI
)d
->num_items
; i
++) {
352 if(d
->index_array
[i
].handled
==0) {
358 // Returns 0 if we should stop processing the CLP file
359 static int do_read_index(deark
*c
, lctx
*d
)
364 // d->num_items is untrusted, but can be no more than 64K.
365 d
->index_array
= de_mallocarray(c
, d
->num_items
, sizeof(struct index_item
));
367 de_dbg(c
, "[scanning index]");
368 for(i
=0; i
<d
->num_items
; i
++) {
369 struct index_item
*ii
;
372 ii
= &d
->index_array
[i
];
373 pos
= d
->index_pos
+ i
*d
->index_item_len
;
376 ii
->clpfmt
= (UI
)de_getu16le_p(&pos
);
379 ii
->clpfmt
= (UI
)de_getu32le_p(&pos
);
381 ii
->dlen
= de_getu32le_p(&pos
);
382 ii
->dpos
= de_getu32le_p(&pos
);
384 if(ii
->clpfmt
==0) ii
->dlen
= 0;
387 // Sanity check. I don't know if the data segments have to be in order and
388 // non-overlapping, but for now I'm assuming they do.
389 if((ii
->dpos
< d
->next_avail_dpos
) || (ii
->dpos
+ii
->dlen
> c
->infile
->len
)) {
390 de_err(c
, "item %u: Bad data segment position", (UI
)i
);
393 d
->next_avail_dpos
= ii
->dpos
+ii
->dlen
;
401 static void de_run_clp(deark
*c
, de_module_params
*mparams
)
408 d
= de_malloc(c
, sizeof(lctx
));
410 d
->input_encoding_ansi
= de_get_input_encoding(c
, NULL
, DE_ENCODING_WINDOWS1252
);
412 d
->input_encoding_oem
= DE_ENCODING_CP437
; // default
413 tmps
= de_get_ext_option(c
, "clp:oemenc");
415 tmps
= de_get_ext_option(c
, "oemenc");
418 d
->input_encoding_oem
= de_encoding_name_to_code(tmps
);
419 if(d
->input_encoding_oem
== DE_ENCODING_UNKNOWN
) {
420 d
->input_encoding_oem
= DE_ENCODING_CP437
;
424 ret
= de_get_ext_option_bool(c
, "clp:extractall", -1);
425 if(ret
>0 || (c
->extract_level
>=2 && ret
!=0)) {
429 d
->sig
= (UI
)de_getu16le_p(&pos
);
430 de_dbg(c
, "signature: 0x%04x", d
->sig
);
431 if(d
->sig
<0xc350 || d
->sig
>0xc352) {
432 de_err(c
, "Not a Windows CLP file");
436 d
->num_items
= de_getu16le_p(&pos
);
437 de_dbg(c
, "num items: %u", (UI
)d
->num_items
);
440 d
->index_item_len
= (d
->sig
==0xc350) ? 89 : 172;
442 if(!do_read_index(c
, d
)) goto done
;
443 do_process_items(c
, d
);
447 de_free(c
, d
->index_array
);
452 static int de_identify_clp(deark
*c
)
457 // TODO: Improve this
458 sig
= (UI
)de_getu16le(0);
459 if(sig
<0xc350 || sig
>0xc352) return 0;
460 has_ext
= de_input_file_has_ext(c
, "clp");
461 if(has_ext
) return 80;
465 static void de_help_clp(deark
*c
)
467 de_msg(c
, "-opt clp:extractall : Extract all items");
468 de_msg(c
, "-opt oemenc=... : The encoding for OEM Text items");
471 void de_module_clp(deark
*c
, struct deark_module_info
*mi
)
474 mi
->desc
= "Windows Clipboard";
475 mi
->run_fn
= de_run_clp
;
476 mi
->identify_fn
= de_identify_clp
;
477 mi
->help_fn
= de_help_clp
;