1 // This file is part of Deark.
2 // Copyright (C) 2021 Jason Summers
3 // See the file COPYING for terms of use.
5 // OS Extended Attributes, including "EA DATA. SF" files
7 #include <deark-private.h>
8 DE_DECLARE_MODULE(de_module_ea_data
);
15 de_encoding input_encoding
;
16 UI createflags_for_icons
;
17 i64 bytes_per_cluster
;
20 static int eadata_is_ea_sector_at_offset(deark
*c
, struct eadata_ctx
*d
, i64 pos
, int strictmode
)
24 if((UI
)de_getu16be(pos
)!=0x4541) return 0;
26 if((UI
)de_getu32be(pos
+4)!=0) return 0;
27 b
= de_getbyte(pos
+8);
29 if((UI
)de_getu32be(pos
+22)!=0) return 0;
34 static const char *eadata_get_data_type_name(UI t
)
36 const char *name
= NULL
;
38 case 0xffde: name
="multi-val/single-type"; break;
39 case 0xffdf: name
="multi-val/multi-type"; break;
40 case 0xfffe: name
="binary"; break;
41 case 0xfffd: name
="text"; break;
42 case 0xfff9: name
="icon"; break;
48 static void eadata_extract_icon(deark
*c
, struct eadata_ctx
*d
, i64 pos
, i64 len
)
50 dbuf_create_file_from_slice(c
->infile
, pos
, len
, "os2.ico", NULL
, d
->createflags_for_icons
);
53 static void eadata_do_text_attrib(deark
*c
, struct eadata_ctx
*d
, i64 pos
, i64 len
)
55 de_ucstring
*s
= NULL
;
57 s
= ucstring_create(c
);
58 // Documented as "ASCII text" -- but I wonder if the actual encoding might
59 // depend on the attribute name.
60 dbuf_read_to_ucstring_n(c
->infile
, pos
, len
, 2048, s
, 0, DE_ENCODING_ASCII
);
61 de_dbg(c
, "text: \"%s\"", ucstring_getpsz_d(s
));
65 static int eadata_do_attribute_lowlevel_singleval(deark
*c
, struct eadata_ctx
*d
,
66 UI attr_dtype
, i64 pos1
, i64 maxlen
, i64
*pbytes_consumed
)
72 attr_dlen
= de_getu16le(pos1
);
73 de_dbg(c
, "inner data len: %"I64_FMT
, attr_dlen
);
74 if(attr_dlen
<2 || attr_dlen
>maxlen
) goto done
;
75 *pbytes_consumed
= 2 + attr_dlen
;
81 eadata_extract_icon(c
, d
, dpos
, attr_dlen
);
84 eadata_do_text_attrib(c
, d
, dpos
, attr_dlen
);
87 de_dbg_hexdump(c
, c
->infile
, dpos
, attr_dlen
, 256, NULL
, 0x1);
94 static int eadata_do_attribute_lowlevel(deark
*c
, struct eadata_ctx
*d
,
95 UI attr_dtype
, i64 pos1
, i64 nbytes_avail
, i64
*pbytes_consumed
, int nesting_level
);
97 // multi-val, multi-type container attribute
98 static int eadata_do_MVMT(deark
*c
, struct eadata_ctx
*d
,
99 i64 pos1
, i64 nbytes_avail
, i64
*pbytes_consumed
, int nesting_level
)
107 int saved_indent_level
;
109 de_dbg_indent_save(c
, &saved_indent_level
);
110 codepage
= (UI
)de_getu16le_p(&pos
);
111 de_dbg(c
, "code page: %u", codepage
);
113 num_entries
= de_getu16le_p(&pos
);
114 de_dbg(c
, "num entries: %d", (int)num_entries
);
115 for(i
=0; i
<num_entries
; i
++) {
117 i64 bytes_consumed2
= 0;
119 if(pos
> pos1
+nbytes_avail
) goto done
;
120 de_dbg(c
, "entry %d at %"I64_FMT
, (int)i
, pos
);
122 attr_dtype
= (UI
)de_getu16le_p(&pos
);
123 de_dbg(c
, "data type: 0x%04x (%s)", attr_dtype
, eadata_get_data_type_name(attr_dtype
));
125 ret
= eadata_do_attribute_lowlevel(c
, d
, attr_dtype
, pos
, pos1
+nbytes_avail
-pos
,
126 &bytes_consumed2
, nesting_level
+1);
128 pos
+= bytes_consumed2
;
129 de_dbg_indent(c
, -1);
132 *pbytes_consumed
= pos
- pos1
;
135 de_dbg_indent_restore(c
, saved_indent_level
);
139 static int eadata_do_attribute_lowlevel(deark
*c
, struct eadata_ctx
*d
,
140 UI attr_dtype
, i64 pos1
, i64 nbytes_avail
, i64
*pbytes_consumed
, int nesting_level
)
144 *pbytes_consumed
= 0;
146 // I don't know if multi-val attributes are allowed to contain other multi-val attributes.
147 if(nesting_level
>5) goto done
;
151 if(!eadata_do_MVMT(c
, d
, pos1
, nbytes_avail
, pbytes_consumed
, nesting_level
)) goto done
;
153 case 0xffde: // MVST (TODO)
157 if(!eadata_do_attribute_lowlevel_singleval(c
, d
, attr_dtype
, pos1
, nbytes_avail
,
171 // FEA2 structure, starting at the 'fEA' field (1 byte before the name-length byte).
172 static int eadata_do_attribute(deark
*c
, struct eadata_ctx
*d
, i64 pos1
, i64 maxlen
,
173 de_ucstring
*tmps
, i64
*pbytes_consumed
)
183 namelen
= (i64
)de_getbyte_p(&pos
);
185 attr_dlen
= (i64
)de_getu16le_p(&pos
);
186 ucstring_empty(tmps
);
187 dbuf_read_to_ucstring(c
->infile
, pos
, namelen
, tmps
, 0, DE_ENCODING_ASCII
);
188 de_dbg(c
, "name: \"%s\"", ucstring_getpsz_d(tmps
));
191 de_dbg(c
, "outer data len: %"I64_FMT
, attr_dlen
);
192 if(attr_dpos
+ attr_dlen
> pos1
+maxlen
) goto done
;
194 attr_dtype
= (UI
)de_getu16le_p(&pos
);
195 de_dbg(c
, "data type: 0x%04x (%s)", attr_dtype
, eadata_get_data_type_name(attr_dtype
));
198 eadata_do_attribute_lowlevel(c
, d
, attr_dtype
, attr_dpos
+2, attr_dlen
-2, &tmpbc
, 0);
200 pos
= attr_dpos
+ attr_dlen
;
201 *pbytes_consumed
= pos
- pos1
;
207 // Sets md->ea_data_len.
208 static void eadata_do_ea_data(deark
*c
, struct eadata_ctx
*d
, struct easector_ctx
*md
,
213 int saved_indent_level
;
214 de_ucstring
*s
= NULL
;
216 de_dbg_indent_save(c
, &saved_indent_level
);
217 de_dbg(c
, "EA data at %"I64_FMT
, pos1
);
220 md
->ea_data_len
= de_getu16le_p(&pos
); // TODO: Is this actually a 4-byte field?
221 de_dbg(c
, "data len: %"I64_FMT
, md
->ea_data_len
);
224 endpos
= pos1
+ md
->ea_data_len
;
225 s
= ucstring_create(c
);
227 while(pos
< endpos
-4) {
229 i64 bytes_consumed
= 0;
231 de_dbg(c
, "attribute at %"I64_FMT
, pos
);
233 ret
= eadata_do_attribute(c
, d
, pos
, endpos
-pos
, s
, &bytes_consumed
);
234 de_dbg_indent(c
, -1);
235 if(!ret
|| bytes_consumed
<1) goto done
;
236 pos
+= bytes_consumed
;
241 de_dbg_indent_restore(c
, saved_indent_level
);
244 static void eadata_do_FEA2LIST(deark
*c
, struct eadata_ctx
*d
)
250 de_ucstring
*tmps
= NULL
;
251 int saved_indent_level
;
253 de_dbg_indent_save(c
, &saved_indent_level
);
254 tmps
= ucstring_create(c
);
256 de_dbg(c
, "FEA2LIST at %"I64_FMT
, pos1
);
259 fea2list_len
= de_getu32le_p(&pos
);
260 endpos
= pos1
+ fea2list_len
;
262 de_dbg(c
, "list len: %"I64_FMT
, fea2list_len
);
265 i64 bytes_consumed
= 0;
266 i64 offset_to_next_attr
;
269 if(pos
>= endpos
) goto done
;
272 de_dbg(c
, "attribute at %"I64_FMT
, attr_pos
);
274 offset_to_next_attr
= de_getu32le_p(&pos
);
275 de_dbg(c
, "offset to next attr: %"I64_FMT
, offset_to_next_attr
);
277 ret
= eadata_do_attribute(c
, d
, pos
, endpos
-pos
, tmps
, &bytes_consumed
);
278 if(!ret
|| bytes_consumed
<1) goto done
;
279 if(offset_to_next_attr
==0) goto done
;
280 pos
= attr_pos
+ offset_to_next_attr
;
281 de_dbg_indent(c
, -1);
285 ucstring_destroy(tmps
);
286 de_dbg_indent_restore(c
, saved_indent_level
);
289 static void eadata_do_ea_sector_by_offset(deark
*c
, struct eadata_ctx
*d
, i64 pos1
,
290 i64
*pbytes_consumed1
)
294 de_ucstring
*fn
= NULL
;
295 struct easector_ctx
*md
= NULL
;
296 int saved_indent_level
;
298 de_dbg_indent_save(c
, &saved_indent_level
);
299 if(pbytes_consumed1
) {
300 *pbytes_consumed1
= 0;
302 md
= de_malloc(c
, sizeof(struct easector_ctx
));
304 if(!eadata_is_ea_sector_at_offset(c
, d
, pos1
, 0)) {
305 de_err(c
, "EA sector not found at %"I64_FMT
, pos1
);
309 de_dbg(c
, "EA sector at %"I64_FMT
, pos1
);
312 n
= de_getu16le_p(&pos
);
313 de_dbg(c
, "sector number (consistency check): %u", (UI
)n
);
317 fn
= ucstring_create(c
);
318 dbuf_read_to_ucstring(c
->infile
, pos
, 12, fn
, DE_CONVFLAG_STOP_AT_NUL
, d
->input_encoding
);
319 de_dbg(c
, "file name: \"%s\"", ucstring_getpsz_d(fn
));
325 eadata_do_ea_data(c
, d
, md
, pos
);
326 pos
+= md
->ea_data_len
;
328 if(pbytes_consumed1
) {
329 *pbytes_consumed1
= pos
- pos1
;
333 ucstring_destroy(fn
);
335 de_dbg_indent_restore(c
, saved_indent_level
);
338 static int eadata_id_to_offset(deark
*c
, struct eadata_ctx
*d
, UI id
, i64
*poffset
)
349 if(a_idx
>=240) goto done
;
350 a_val
= (UI
)de_getu16le(32+2*(i64
)a_idx
);
351 b_val
= (UI
)de_getu16le(512+2*(i64
)id
);
352 if(b_val
==0xffff) goto done
;
354 cluster_num
= (i64
)b_val
+ (i64
)a_val
;
355 *poffset
= d
->bytes_per_cluster
* cluster_num
;
357 if(eadata_is_ea_sector_at_offset(c
, d
, *poffset
, 0)) {
365 static void eadata_scan_file(deark
*c
, struct eadata_ctx
*d
)
369 while(pos
< c
->infile
->len
) {
370 if(eadata_is_ea_sector_at_offset(c
, d
, pos
, 1)) {
373 eadata_do_ea_sector_by_offset(c
, d
, pos
, &bytes_consumed
);
375 if(bytes_consumed
<1) bytes_consumed
= 1;
376 pos
= de_pad_to_n(pos
+bytes_consumed
, 512);
384 static void de_run_eadata(deark
*c
, de_module_params
*mparams
)
391 de_declare_fmt(c
, "OS/2 extended attributes data");
393 struct eadata_ctx
*d
= de_malloc(c
, sizeof(struct eadata_ctx
));
395 if(de_havemodcode(c
, mparams
, 'L')) {
396 d
->createflags_for_icons
= DE_CREATEFLAG_IS_AUX
;
397 eadata_do_FEA2LIST(c
, d
);
399 else if(mparams
&& (mparams
->in_params
.flags
& 0x1)) {
400 // We're being used by another module, to handle a specific ea_id.
401 ea_id
= (UI
)mparams
->in_params
.uint1
;
402 if(ea_id
==0) goto done
;
403 d
->createflags_for_icons
= DE_CREATEFLAG_IS_AUX
;
406 s
= de_get_ext_option(c
, "ea_data:handle");
408 ea_id
= (UI
)de_atoi(s
);
412 d
->input_encoding
= de_get_input_encoding(c
, mparams
, DE_ENCODING_CP437
);
413 d
->bytes_per_cluster
= 512;
416 eadata_scan_file(c
, d
);
419 ret
= eadata_id_to_offset(c
, d
, ea_id
, &pos
);
421 eadata_do_ea_sector_by_offset(c
, d
, pos
, NULL
);
428 static int de_identify_eadata(deark
*c
)
430 if(de_getu16be(0)!=0x4544) return 0;
431 if(de_input_file_has_ext(c
, " sf")) return 100;
432 if(dbuf_is_all_zeroes(c
->infile
, 2, 30)) {
438 static void de_help_eadata(deark
*c
)
440 de_msg(c
, "-opt ea_data:handle=<n> : Decode only EA handle/pointer <n>");
443 void de_module_ea_data(deark
*c
, struct deark_module_info
*mi
)
446 mi
->desc
= "EA DATA (OS/2 extended attributes)";
447 mi
->run_fn
= de_run_eadata
;
448 mi
->identify_fn
= de_identify_eadata
;
449 mi
->help_fn
= de_help_eadata
;