nrg: Improved support for v2
[deark.git] / modules / sauce.c
blob3ac74b7913962f135afc47ba404ca5347ad08b1b
1 // This file is part of Deark.
2 // Copyright (C) 2019 Jason Summers
3 // See the file COPYING for terms of use.
5 // SAUCE
6 // Special module that reads SAUCE metadata for other modules to use,
7 // and handles files with SAUCE records if they aren't otherwise handled.
8 // SAUCE = Standard Architecture for Universal Comment Extensions
10 #include <deark-config.h>
11 #include <deark-private.h>
12 #include <deark-fmtutil.h>
13 DE_DECLARE_MODULE(de_module_sauce);
15 struct sauce_private_ctx {
16 int combine_comments;
17 i64 num_comments;
18 de_ext_encoding encoding_for_strings;
19 de_ext_encoding encoding_for_comments;
22 static i64 sauce_get_string_length(const u8 *buf, i64 len, int respect_trailing_spaces)
24 i64 i;
25 i64 last_nonpadding_char_pos = -1;
27 for(i=len-1; i>=0; i--) {
28 // Spec says to use spaces for padding, and for nonexistent data.
29 // But some files use NUL bytes.
30 if((buf[i]==0x20 && !respect_trailing_spaces) || buf[i]==0x00) {
33 else {
34 last_nonpadding_char_pos = i;
35 break;
38 return last_nonpadding_char_pos+1;
41 static void sauce_strip_trailing_whitespace(de_ucstring *s)
43 while(s->len>=1 &&
44 (s->str[s->len-1]==' ' || s->str[s->len-1]==0x0a)) {
45 ucstring_truncate(s, s->len-1);
49 static void sauce_bytes_to_ucstring(deark *c, const u8 *buf, i64 len,
50 de_ucstring *s, de_ext_encoding ee)
52 ucstring_append_bytes(s, buf, len, 0, ee);
55 static int sauce_is_valid_date_string(const u8 *buf, i64 len)
57 i64 i;
59 for(i=0; i<len; i++) {
60 if(buf[i]>='0' && buf[i]<='9') continue;
61 // Spaces aren't allowed, but some files use them.
62 if(buf[i]==' ' && (i==4 || i==6)) continue;
63 return 0;
65 return 1;
68 static const char *get_sauce_datatype_name(u8 dt)
70 const char *n = "?";
72 switch(dt) {
73 case 0: n="undefined"; break;
74 case 1: n="character"; break;
75 case 2: n="bitmap graphics"; break;
76 case 3: n="vector graphics"; break;
77 case 4: n="audio"; break;
78 case 5: n="BinaryText"; break;
79 case 6: n="XBIN"; break;
80 case 7: n="archive"; break;
81 case 8: n="executable"; break;
83 return n;
86 static const char *get_sauce_filetype_name(u8 dt, unsigned int t)
88 const char *n = "?";
90 if(dt==5) return "=width/2";
91 switch(t) {
92 case 0x0100: n="ASCII"; break;
93 case 0x0101: n="ANSI"; break;
94 case 0x0102: n="ANSiMation"; break;
95 case 0x0103: n="RIP script"; break;
96 case 0x0104: n="PCBoard"; break;
97 case 0x0105: n="Avatar"; break;
98 case 0x0106: n="HTML"; break;
99 case 0x0108: n="TundraDraw"; break;
100 case 0x0200: n="GIF"; break;
101 case 0x0206: n="BMP"; break;
102 case 0x020a: n="PNG"; break;
103 case 0x020b: n="JPEG"; break;
104 case 0x0600: n="XBIN"; break;
105 case 0x0800: n="executable"; break;
107 // There are many more SAUCE file types defined, but it's not clear how
108 // many have actually been used.
110 return n;
113 // The SAUCE spec has insufficient detail about how comments are to be
114 // interpreted. And some ANSI editors don't obey the spec, anyway.
115 // Our behavior:
116 // * We have two modes, depending on the combine_comments flag.
117 // * We interpret 0x0a as a newline. Most other bytes are CP437 printable
118 // charaters.
119 // * If !combine_comments, trailing spaces and trailing NUL bytes are ignored for
120 // each comment.
121 // * If combine_comments, same as above except that trailing spaces are
122 // respected for each comment except the last.
123 // * If !combine_comments, we add a newline after every comment except the
124 // last.
125 // (Autodetecting which mode to use would be nice, and it's possible to make
126 // a pretty good guess, but it's not possible to get it right every time.)
127 static void sauce_read_comments(deark *c, struct sauce_private_ctx *d, dbuf *inf,
128 struct de_SAUCE_info *si)
130 i64 cmnt_blk_start;
131 i64 k;
132 i64 cmnt_pos;
133 i64 cmnt_len;
134 u8 buf[64];
135 de_ucstring *tmpcomment = NULL;
137 if(d->num_comments<1) goto done;
138 cmnt_blk_start = inf->len - 128 - (5 + d->num_comments*64);
140 if(dbuf_memcmp(inf, cmnt_blk_start, "COMNT", 5)) {
141 de_dbg(c, "invalid SAUCE comment, not found at %d", (int)cmnt_blk_start);
142 d->num_comments = 0;
143 goto done;
146 de_dbg(c, "SAUCE comment block at %d", (int)cmnt_blk_start);
148 si->comment = ucstring_create(c);
149 tmpcomment = ucstring_create(c);
151 de_dbg_indent(c, 1);
152 for(k=0; k<d->num_comments; k++) {
153 int respect_trailing_spaces = 0;
155 cmnt_pos = cmnt_blk_start+5+k*64;
156 dbuf_read(inf, buf, cmnt_pos, 64);
158 if(d->combine_comments && k!=(d->num_comments-1)) {
159 respect_trailing_spaces = 1;
161 cmnt_len = sauce_get_string_length(buf, 64, respect_trailing_spaces);
163 de_dbg(c, "comment at %d, len=%d", (int)cmnt_pos, (int)cmnt_len);
164 de_dbg_indent(c, 1);
166 ucstring_empty(tmpcomment);
167 sauce_bytes_to_ucstring(c, buf, cmnt_len, tmpcomment, d->encoding_for_comments);
168 ucstring_append_ucstring(si->comment, tmpcomment);
169 if(!d->combine_comments && k!=(d->num_comments-1)) {
170 ucstring_append_char(si->comment, 0x0a);
173 de_dbg(c, "comment: \"%s\"", ucstring_getpsz(tmpcomment));
174 de_dbg_indent(c, -1);
177 sauce_strip_trailing_whitespace(si->comment);
178 if(ucstring_isempty(si->comment)) {
179 ucstring_destroy(si->comment);
180 si->comment = NULL;
181 goto done;
184 if(c->extract_level>=2) {
185 dbuf *cmnt_outf = NULL;
187 cmnt_outf = dbuf_create_output_file(c, "comment.txt", NULL, DE_CREATEFLAG_IS_AUX);
188 ucstring_write_as_utf8(c, si->comment, cmnt_outf, 1);
189 dbuf_puts(cmnt_outf, "\n");
190 dbuf_close(cmnt_outf);
193 de_dbg_indent(c, -1);
195 done:
196 ucstring_destroy(tmpcomment);
199 static void do_SAUCE_creation_date(deark *c, struct de_SAUCE_info *si,
200 const u8 *date_raw, size_t date_raw_len)
202 i64 yr, mon, mday;
203 char timestamp_buf[64];
204 char scanbuf[16];
206 if(date_raw_len!=8) return;
208 // Convert to de_timestamp format
210 // year
211 de_memcpy(scanbuf, &date_raw[0], 4);
212 scanbuf[4] = '\0';
213 yr = de_atoi64(scanbuf);
215 // month
216 de_memcpy(scanbuf, &date_raw[4], 2);
217 scanbuf[2] = '\0';
218 mon = de_atoi64(scanbuf);
220 // day of month
221 de_memcpy(scanbuf, &date_raw[6], 2);
222 scanbuf[2] = '\0';
223 mday = de_atoi64(scanbuf);
225 de_make_timestamp(&si->creation_date, yr, mon, mday, 12, 0, 0);
226 si->creation_date.precision = DE_TSPREC_1DAY;
228 de_timestamp_to_string(&si->creation_date, timestamp_buf, sizeof(timestamp_buf), 0);
229 de_dbg(c, "creation date: %s", timestamp_buf);
232 // Caller allocates si using de_create_SAUCE().
233 // Caller must later free si using de_free_SAUCE().
234 static int do_read_SAUCE(deark *c, dbuf *f, struct de_SAUCE_info *si)
236 unsigned int t;
237 u8 tmpbuf[40];
238 i64 tmpbuf_len;
239 i64 pos;
240 const char *name;
241 de_ucstring *tflags_descr = NULL;
242 int retval = 0;
243 struct sauce_private_ctx *d = NULL;
245 pos = f->len - 128;
246 if(dbuf_memcmp(f, pos+0, "SAUCE00", 7)) {
247 goto done;
250 si->is_valid = 1;
252 d = de_malloc(c, sizeof(struct sauce_private_ctx));
253 d->encoding_for_strings = DE_ENCODING_CP437;
254 d->encoding_for_comments = DE_EXTENC_MAKE(d->encoding_for_strings, DE_ENCSUBTYPE_HYBRID);
255 d->combine_comments = de_get_ext_option_bool(c, "sauce:combinecomments", 0);
257 // Title
258 dbuf_read(f, tmpbuf, pos+7, 35);
259 tmpbuf_len = sauce_get_string_length(tmpbuf, 35, 0);
260 if(tmpbuf_len>0) {
261 si->title = ucstring_create(c);
262 sauce_bytes_to_ucstring(c, tmpbuf, tmpbuf_len, si->title, d->encoding_for_strings);
263 de_dbg(c, "title: \"%s\"", ucstring_getpsz_d(si->title));
266 // Artist / Creator
267 dbuf_read(f, tmpbuf, pos+42, 20);
268 tmpbuf_len = sauce_get_string_length(tmpbuf, 20, 0);
269 if(tmpbuf_len>0) {
270 si->artist = ucstring_create(c);
271 sauce_bytes_to_ucstring(c, tmpbuf, tmpbuf_len, si->artist, d->encoding_for_strings);
272 de_dbg(c, "artist: \"%s\"", ucstring_getpsz_d(si->artist));
275 // Organization
276 dbuf_read(f, tmpbuf, pos+62, 20);
277 tmpbuf_len = sauce_get_string_length(tmpbuf, 20, 0);
278 if(tmpbuf_len>0) {
279 si->organization = ucstring_create(c);
280 sauce_bytes_to_ucstring(c, tmpbuf, tmpbuf_len, si->organization, d->encoding_for_strings);
281 de_dbg(c, "organization: \"%s\"", ucstring_getpsz_d(si->organization));
284 // Creation date
285 dbuf_read(f, tmpbuf, pos+82, 8);
286 if(sauce_is_valid_date_string(tmpbuf, 8)) {
287 do_SAUCE_creation_date(c, si, tmpbuf, 8);
290 si->original_file_size = dbuf_getu32le(f, pos+90);
291 de_dbg(c, "original file size: %d", (int)si->original_file_size);
293 si->data_type = dbuf_getbyte(f, pos+94);
294 name = get_sauce_datatype_name(si->data_type);
295 de_dbg(c, "data type: %d (%s)", (int)si->data_type, name);
297 si->file_type = dbuf_getbyte(f, pos+95);
298 t = 256*(unsigned int)si->data_type + si->file_type;
299 name = get_sauce_filetype_name(si->data_type, t);
300 de_dbg(c, "file type: %d (%s)", (int)si->file_type, name);
302 si->tinfo1 = (u16)dbuf_getu16le(f, pos+96);
303 si->tinfo2 = (u16)dbuf_getu16le(f, pos+98);
304 si->tinfo3 = (u16)dbuf_getu16le(f, pos+100);
305 si->tinfo4 = (u16)dbuf_getu16le(f, pos+102);
306 de_dbg(c, "TInfo1: %u", (unsigned int)si->tinfo1);
307 de_dbg(c, "TInfo2: %u", (unsigned int)si->tinfo2);
308 de_dbg(c, "TInfo3: %u", (unsigned int)si->tinfo3);
309 de_dbg(c, "TInfo4: %u", (unsigned int)si->tinfo4);
311 if(t==0x0100 || t==0x0101 || t==0x0102 || t==0x0104 || t==0x0105 || t==0x0108 || t==0x0600) {
312 si->width_in_chars = (i64)si->tinfo1;
313 de_dbg(c, "width in chars: %d", (int)si->width_in_chars);
315 if(t==0x0100 || t==0x0101 || t==0x0104 || t==0x0105 || t==0x0108 || t==0x0600) {
316 si->number_of_lines = (i64)si->tinfo2;
317 de_dbg(c, "number of lines: %d", (int)si->number_of_lines);
320 d->num_comments = (i64)dbuf_getbyte(f, pos+104);
321 de_dbg(c, "num comments: %d", (int)d->num_comments);
322 if(d->num_comments>0) {
323 sauce_read_comments(c, d, f, si);
326 si->tflags = dbuf_getbyte(f, pos+105);
327 if(si->tflags!=0) {
328 tflags_descr = ucstring_create(c);
329 if(t==0x0100 || t==0x0101 || t==0x0102 || si->data_type==5) {
330 // ANSiFlags
331 if(si->tflags&0x01) {
332 ucstring_append_flags_item(tflags_descr, "non-blink mode");
334 if((si->tflags & 0x06)>>1 == 1) {
335 ucstring_append_flags_item(tflags_descr, "8-pixel font");
337 else if((si->tflags & 0x06)>>1 == 2) {
338 ucstring_append_flags_item(tflags_descr, "9-pixel font");
340 if((si->tflags & 0x18)>>3 == 1) {
341 ucstring_append_flags_item(tflags_descr, "non-square pixels");
343 else if((si->tflags & 0x18)>>3 == 2) {
344 ucstring_append_flags_item(tflags_descr, "square pixels");
348 de_dbg(c, "tflags: 0x%02x (%s)", (unsigned int)si->tflags,
349 ucstring_getpsz(tflags_descr));
352 if(si->original_file_size==0 || si->original_file_size>f->len-128) {
353 // If this field seems bad, try to correct it.
354 si->original_file_size = f->len-128-(5+d->num_comments*64);
357 retval = 1;
358 done:
359 ucstring_destroy(tflags_descr);
360 de_free(c, d);
361 return retval;
364 // When running as a submodule, we assume the caller already detected the
365 // presence of SAUCE (probably using detect_SAUCE()), printed a header line
366 // (again probably using detect_SAUCE()), and indented as needed.
367 static void run_sauce_as_submodule(deark *c, de_module_params *mparams)
369 struct de_SAUCE_info *si_local = NULL;
370 struct de_SAUCE_info *si_to_use;
372 if(mparams && mparams->out_params.obj1) {
373 si_to_use = (struct de_SAUCE_info*)mparams->out_params.obj1;
375 else {
376 si_local = fmtutil_create_SAUCE(c);
377 si_to_use = si_local;
380 do_read_SAUCE(c, c->infile, si_to_use);
382 fmtutil_free_SAUCE(c, si_local);
385 static void run_sauce_direct(deark *c, de_module_params *mparams)
387 struct de_SAUCE_info *si = NULL;
388 struct de_SAUCE_detection_data sdd;
389 int ret;
391 fmtutil_detect_SAUCE(c, c->infile, &sdd, 0x1);
392 if(!sdd.has_SAUCE) {
393 if(c->module_disposition==DE_MODDISP_EXPLICIT) {
394 de_err(c, "No SAUCE record found");
396 goto done;
399 si = fmtutil_create_SAUCE(c);
400 de_dbg_indent(c, 1);
401 ret = do_read_SAUCE(c, c->infile, si);
402 de_dbg_indent(c, -1);
403 if(ret && c->module_disposition==DE_MODDISP_AUTODETECT) {
404 de_err(c, "This file has a SAUCE metadata record that identifies it as "
405 "DataType %d, FileType %d, but it is not a supported format.",
406 (int)si->data_type, (int)si->file_type);
409 done:
410 fmtutil_free_SAUCE(c, si);
413 static void de_run_sauce(deark *c, de_module_params *mparams)
415 if(c->module_disposition==DE_MODDISP_INTERNAL) {
416 run_sauce_as_submodule(c, mparams);
418 else {
419 run_sauce_direct(c, mparams);
423 static int de_identify_sauce(deark *c)
425 c->detection_data->SAUCE_detection_attempted = 1;
426 if(fmtutil_detect_SAUCE(c, c->infile, &c->detection_data->sauce, 0)) {
427 // This module should have a very low priority, but other modules can use
428 // the results of its detection.
429 return 2;
431 return 0;
434 void de_module_sauce(deark *c, struct deark_module_info *mi)
436 mi->id = "sauce";
437 mi->desc = "SAUCE metadata";
438 mi->run_fn = de_run_sauce;
439 mi->identify_fn = de_identify_sauce;
440 mi->flags |= DE_MODFLAG_HIDDEN | DE_MODFLAG_SHAREDDETECTION;