1 // This file is part of Deark.
2 // Copyright (C) 2019 Jason Summers
3 // See the file COPYING for terms of use.
6 // Special module that reads SAUCE metadata for other modules to use,
7 // and handles files with SAUCE records if they aren't otherwise handled.
8 // SAUCE = Standard Architecture for Universal Comment Extensions
10 #include <deark-config.h>
11 #include <deark-private.h>
12 #include <deark-fmtutil.h>
13 DE_DECLARE_MODULE(de_module_sauce
);
15 struct sauce_private_ctx
{
18 de_ext_encoding encoding_for_strings
;
19 de_ext_encoding encoding_for_comments
;
22 static i64
sauce_get_string_length(const u8
*buf
, i64 len
, int respect_trailing_spaces
)
25 i64 last_nonpadding_char_pos
= -1;
27 for(i
=len
-1; i
>=0; i
--) {
28 // Spec says to use spaces for padding, and for nonexistent data.
29 // But some files use NUL bytes.
30 if((buf
[i
]==0x20 && !respect_trailing_spaces
) || buf
[i
]==0x00) {
34 last_nonpadding_char_pos
= i
;
38 return last_nonpadding_char_pos
+1;
41 static void sauce_strip_trailing_whitespace(de_ucstring
*s
)
44 (s
->str
[s
->len
-1]==' ' || s
->str
[s
->len
-1]==0x0a)) {
45 ucstring_truncate(s
, s
->len
-1);
49 static void sauce_bytes_to_ucstring(deark
*c
, const u8
*buf
, i64 len
,
50 de_ucstring
*s
, de_ext_encoding ee
)
52 ucstring_append_bytes(s
, buf
, len
, 0, ee
);
55 static int sauce_is_valid_date_string(const u8
*buf
, i64 len
)
59 for(i
=0; i
<len
; i
++) {
60 if(buf
[i
]>='0' && buf
[i
]<='9') continue;
61 // Spaces aren't allowed, but some files use them.
62 if(buf
[i
]==' ' && (i
==4 || i
==6)) continue;
68 static const char *get_sauce_datatype_name(u8 dt
)
73 case 0: n
="undefined"; break;
74 case 1: n
="character"; break;
75 case 2: n
="bitmap graphics"; break;
76 case 3: n
="vector graphics"; break;
77 case 4: n
="audio"; break;
78 case 5: n
="BinaryText"; break;
79 case 6: n
="XBIN"; break;
80 case 7: n
="archive"; break;
81 case 8: n
="executable"; break;
86 static const char *get_sauce_filetype_name(u8 dt
, unsigned int t
)
90 if(dt
==5) return "=width/2";
92 case 0x0100: n
="ASCII"; break;
93 case 0x0101: n
="ANSI"; break;
94 case 0x0102: n
="ANSiMation"; break;
95 case 0x0103: n
="RIP script"; break;
96 case 0x0104: n
="PCBoard"; break;
97 case 0x0105: n
="Avatar"; break;
98 case 0x0106: n
="HTML"; break;
99 case 0x0108: n
="TundraDraw"; break;
100 case 0x0200: n
="GIF"; break;
101 case 0x0206: n
="BMP"; break;
102 case 0x020a: n
="PNG"; break;
103 case 0x020b: n
="JPEG"; break;
104 case 0x0600: n
="XBIN"; break;
105 case 0x0800: n
="executable"; break;
107 // There are many more SAUCE file types defined, but it's not clear how
108 // many have actually been used.
113 // The SAUCE spec has insufficient detail about how comments are to be
114 // interpreted. And some ANSI editors don't obey the spec, anyway.
116 // * We have two modes, depending on the combine_comments flag.
117 // * We interpret 0x0a as a newline. Most other bytes are CP437 printable
119 // * If !combine_comments, trailing spaces and trailing NUL bytes are ignored for
121 // * If combine_comments, same as above except that trailing spaces are
122 // respected for each comment except the last.
123 // * If !combine_comments, we add a newline after every comment except the
125 // (Autodetecting which mode to use would be nice, and it's possible to make
126 // a pretty good guess, but it's not possible to get it right every time.)
127 static void sauce_read_comments(deark
*c
, struct sauce_private_ctx
*d
, dbuf
*inf
,
128 struct de_SAUCE_info
*si
)
135 de_ucstring
*tmpcomment
= NULL
;
137 if(d
->num_comments
<1) goto done
;
138 cmnt_blk_start
= inf
->len
- 128 - (5 + d
->num_comments
*64);
140 if(dbuf_memcmp(inf
, cmnt_blk_start
, "COMNT", 5)) {
141 de_dbg(c
, "invalid SAUCE comment, not found at %d", (int)cmnt_blk_start
);
146 de_dbg(c
, "SAUCE comment block at %d", (int)cmnt_blk_start
);
148 si
->comment
= ucstring_create(c
);
149 tmpcomment
= ucstring_create(c
);
152 for(k
=0; k
<d
->num_comments
; k
++) {
153 int respect_trailing_spaces
= 0;
155 cmnt_pos
= cmnt_blk_start
+5+k
*64;
156 dbuf_read(inf
, buf
, cmnt_pos
, 64);
158 if(d
->combine_comments
&& k
!=(d
->num_comments
-1)) {
159 respect_trailing_spaces
= 1;
161 cmnt_len
= sauce_get_string_length(buf
, 64, respect_trailing_spaces
);
163 de_dbg(c
, "comment at %d, len=%d", (int)cmnt_pos
, (int)cmnt_len
);
166 ucstring_empty(tmpcomment
);
167 sauce_bytes_to_ucstring(c
, buf
, cmnt_len
, tmpcomment
, d
->encoding_for_comments
);
168 ucstring_append_ucstring(si
->comment
, tmpcomment
);
169 if(!d
->combine_comments
&& k
!=(d
->num_comments
-1)) {
170 ucstring_append_char(si
->comment
, 0x0a);
173 de_dbg(c
, "comment: \"%s\"", ucstring_getpsz(tmpcomment
));
174 de_dbg_indent(c
, -1);
177 sauce_strip_trailing_whitespace(si
->comment
);
178 if(ucstring_isempty(si
->comment
)) {
179 ucstring_destroy(si
->comment
);
184 if(c
->extract_level
>=2) {
185 dbuf
*cmnt_outf
= NULL
;
187 cmnt_outf
= dbuf_create_output_file(c
, "comment.txt", NULL
, DE_CREATEFLAG_IS_AUX
);
188 ucstring_write_as_utf8(c
, si
->comment
, cmnt_outf
, 1);
189 dbuf_puts(cmnt_outf
, "\n");
190 dbuf_close(cmnt_outf
);
193 de_dbg_indent(c
, -1);
196 ucstring_destroy(tmpcomment
);
199 static void do_SAUCE_creation_date(deark
*c
, struct de_SAUCE_info
*si
,
200 const u8
*date_raw
, size_t date_raw_len
)
203 char timestamp_buf
[64];
206 if(date_raw_len
!=8) return;
208 // Convert to de_timestamp format
211 de_memcpy(scanbuf
, &date_raw
[0], 4);
213 yr
= de_atoi64(scanbuf
);
216 de_memcpy(scanbuf
, &date_raw
[4], 2);
218 mon
= de_atoi64(scanbuf
);
221 de_memcpy(scanbuf
, &date_raw
[6], 2);
223 mday
= de_atoi64(scanbuf
);
225 de_make_timestamp(&si
->creation_date
, yr
, mon
, mday
, 12, 0, 0);
226 si
->creation_date
.precision
= DE_TSPREC_1DAY
;
228 de_timestamp_to_string(&si
->creation_date
, timestamp_buf
, sizeof(timestamp_buf
), 0);
229 de_dbg(c
, "creation date: %s", timestamp_buf
);
232 // Caller allocates si using de_create_SAUCE().
233 // Caller must later free si using de_free_SAUCE().
234 static int do_read_SAUCE(deark
*c
, dbuf
*f
, struct de_SAUCE_info
*si
)
241 de_ucstring
*tflags_descr
= NULL
;
243 struct sauce_private_ctx
*d
= NULL
;
246 if(dbuf_memcmp(f
, pos
+0, "SAUCE00", 7)) {
252 d
= de_malloc(c
, sizeof(struct sauce_private_ctx
));
253 d
->encoding_for_strings
= DE_ENCODING_CP437
;
254 d
->encoding_for_comments
= DE_EXTENC_MAKE(d
->encoding_for_strings
, DE_ENCSUBTYPE_HYBRID
);
255 d
->combine_comments
= de_get_ext_option_bool(c
, "sauce:combinecomments", 0);
258 dbuf_read(f
, tmpbuf
, pos
+7, 35);
259 tmpbuf_len
= sauce_get_string_length(tmpbuf
, 35, 0);
261 si
->title
= ucstring_create(c
);
262 sauce_bytes_to_ucstring(c
, tmpbuf
, tmpbuf_len
, si
->title
, d
->encoding_for_strings
);
263 de_dbg(c
, "title: \"%s\"", ucstring_getpsz_d(si
->title
));
267 dbuf_read(f
, tmpbuf
, pos
+42, 20);
268 tmpbuf_len
= sauce_get_string_length(tmpbuf
, 20, 0);
270 si
->artist
= ucstring_create(c
);
271 sauce_bytes_to_ucstring(c
, tmpbuf
, tmpbuf_len
, si
->artist
, d
->encoding_for_strings
);
272 de_dbg(c
, "artist: \"%s\"", ucstring_getpsz_d(si
->artist
));
276 dbuf_read(f
, tmpbuf
, pos
+62, 20);
277 tmpbuf_len
= sauce_get_string_length(tmpbuf
, 20, 0);
279 si
->organization
= ucstring_create(c
);
280 sauce_bytes_to_ucstring(c
, tmpbuf
, tmpbuf_len
, si
->organization
, d
->encoding_for_strings
);
281 de_dbg(c
, "organization: \"%s\"", ucstring_getpsz_d(si
->organization
));
285 dbuf_read(f
, tmpbuf
, pos
+82, 8);
286 if(sauce_is_valid_date_string(tmpbuf
, 8)) {
287 do_SAUCE_creation_date(c
, si
, tmpbuf
, 8);
290 si
->original_file_size
= dbuf_getu32le(f
, pos
+90);
291 de_dbg(c
, "original file size: %d", (int)si
->original_file_size
);
293 si
->data_type
= dbuf_getbyte(f
, pos
+94);
294 name
= get_sauce_datatype_name(si
->data_type
);
295 de_dbg(c
, "data type: %d (%s)", (int)si
->data_type
, name
);
297 si
->file_type
= dbuf_getbyte(f
, pos
+95);
298 t
= 256*(unsigned int)si
->data_type
+ si
->file_type
;
299 name
= get_sauce_filetype_name(si
->data_type
, t
);
300 de_dbg(c
, "file type: %d (%s)", (int)si
->file_type
, name
);
302 si
->tinfo1
= (u16
)dbuf_getu16le(f
, pos
+96);
303 si
->tinfo2
= (u16
)dbuf_getu16le(f
, pos
+98);
304 si
->tinfo3
= (u16
)dbuf_getu16le(f
, pos
+100);
305 si
->tinfo4
= (u16
)dbuf_getu16le(f
, pos
+102);
306 de_dbg(c
, "TInfo1: %u", (unsigned int)si
->tinfo1
);
307 de_dbg(c
, "TInfo2: %u", (unsigned int)si
->tinfo2
);
308 de_dbg(c
, "TInfo3: %u", (unsigned int)si
->tinfo3
);
309 de_dbg(c
, "TInfo4: %u", (unsigned int)si
->tinfo4
);
311 if(t
==0x0100 || t
==0x0101 || t
==0x0102 || t
==0x0104 || t
==0x0105 || t
==0x0108 || t
==0x0600) {
312 si
->width_in_chars
= (i64
)si
->tinfo1
;
313 de_dbg(c
, "width in chars: %d", (int)si
->width_in_chars
);
315 if(t
==0x0100 || t
==0x0101 || t
==0x0104 || t
==0x0105 || t
==0x0108 || t
==0x0600) {
316 si
->number_of_lines
= (i64
)si
->tinfo2
;
317 de_dbg(c
, "number of lines: %d", (int)si
->number_of_lines
);
320 d
->num_comments
= (i64
)dbuf_getbyte(f
, pos
+104);
321 de_dbg(c
, "num comments: %d", (int)d
->num_comments
);
322 if(d
->num_comments
>0) {
323 sauce_read_comments(c
, d
, f
, si
);
326 si
->tflags
= dbuf_getbyte(f
, pos
+105);
328 tflags_descr
= ucstring_create(c
);
329 if(t
==0x0100 || t
==0x0101 || t
==0x0102 || si
->data_type
==5) {
331 if(si
->tflags
&0x01) {
332 ucstring_append_flags_item(tflags_descr
, "non-blink mode");
334 if((si
->tflags
& 0x06)>>1 == 1) {
335 ucstring_append_flags_item(tflags_descr
, "8-pixel font");
337 else if((si
->tflags
& 0x06)>>1 == 2) {
338 ucstring_append_flags_item(tflags_descr
, "9-pixel font");
340 if((si
->tflags
& 0x18)>>3 == 1) {
341 ucstring_append_flags_item(tflags_descr
, "non-square pixels");
343 else if((si
->tflags
& 0x18)>>3 == 2) {
344 ucstring_append_flags_item(tflags_descr
, "square pixels");
348 de_dbg(c
, "tflags: 0x%02x (%s)", (unsigned int)si
->tflags
,
349 ucstring_getpsz(tflags_descr
));
352 if(si
->original_file_size
==0 || si
->original_file_size
>f
->len
-128) {
353 // If this field seems bad, try to correct it.
354 si
->original_file_size
= f
->len
-128-(5+d
->num_comments
*64);
359 ucstring_destroy(tflags_descr
);
364 // When running as a submodule, we assume the caller already detected the
365 // presence of SAUCE (probably using detect_SAUCE()), printed a header line
366 // (again probably using detect_SAUCE()), and indented as needed.
367 static void run_sauce_as_submodule(deark
*c
, de_module_params
*mparams
)
369 struct de_SAUCE_info
*si_local
= NULL
;
370 struct de_SAUCE_info
*si_to_use
;
372 if(mparams
&& mparams
->out_params
.obj1
) {
373 si_to_use
= (struct de_SAUCE_info
*)mparams
->out_params
.obj1
;
376 si_local
= fmtutil_create_SAUCE(c
);
377 si_to_use
= si_local
;
380 do_read_SAUCE(c
, c
->infile
, si_to_use
);
382 fmtutil_free_SAUCE(c
, si_local
);
385 static void run_sauce_direct(deark
*c
, de_module_params
*mparams
)
387 struct de_SAUCE_info
*si
= NULL
;
388 struct de_SAUCE_detection_data sdd
;
391 fmtutil_detect_SAUCE(c
, c
->infile
, &sdd
, 0x1);
393 if(c
->module_disposition
==DE_MODDISP_EXPLICIT
) {
394 de_err(c
, "No SAUCE record found");
399 si
= fmtutil_create_SAUCE(c
);
401 ret
= do_read_SAUCE(c
, c
->infile
, si
);
402 de_dbg_indent(c
, -1);
403 if(ret
&& c
->module_disposition
==DE_MODDISP_AUTODETECT
) {
404 de_err(c
, "This file has a SAUCE metadata record that identifies it as "
405 "DataType %d, FileType %d, but it is not a supported format.",
406 (int)si
->data_type
, (int)si
->file_type
);
410 fmtutil_free_SAUCE(c
, si
);
413 static void de_run_sauce(deark
*c
, de_module_params
*mparams
)
415 if(c
->module_disposition
==DE_MODDISP_INTERNAL
) {
416 run_sauce_as_submodule(c
, mparams
);
419 run_sauce_direct(c
, mparams
);
423 static int de_identify_sauce(deark
*c
)
425 c
->detection_data
->SAUCE_detection_attempted
= 1;
426 if(fmtutil_detect_SAUCE(c
, c
->infile
, &c
->detection_data
->sauce
, 0)) {
427 // This module should have a very low priority, but other modules can use
428 // the results of its detection.
434 void de_module_sauce(deark
*c
, struct deark_module_info
*mi
)
437 mi
->desc
= "SAUCE metadata";
438 mi
->run_fn
= de_run_sauce
;
439 mi
->identify_fn
= de_identify_sauce
;
440 mi
->flags
|= DE_MODFLAG_HIDDEN
| DE_MODFLAG_SHAREDDETECTION
;