1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // Microsoft Windows Write (.wri) format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_wri
);
11 #define WRI_STG_METAFILE 0x88
12 #define WRI_STG_BITMAP 0xe3
13 #define WRI_STG_OLE 0xe4
15 struct picctx_struct
{
16 unsigned int mm
; // WRI_STG_*
19 unsigned int ole_objectType
;
23 struct text_styles_struct
{
28 i64 thisparapos
, thisparalen
;
29 i64 bfprop_offset
; // file-level offset
34 int xpos
; // Current length of this line in the source code
35 int has_content
; // Have we emitted a non-space char in this paragraph?
39 struct text_styles_struct text_styles_wanted
; // Styles for the next char to be emitted
40 struct text_styles_struct text_styles_current
; // Effective current styles
43 typedef struct localctx_struct
{
53 i64 pnFntb
, pnSep
, pnSetb
, pnPgtb
, pnFfntb
;
57 struct de_encconv_state es
;
60 static void do_emit_raw_sz(deark
*c
, lctx
*d
, struct para_info
*pinfo
, const char *sz
);
61 static void do_emit_ucstring(deark
*c
, lctx
*d
, struct para_info
*pinfo
,
63 static void end_para(deark
*c
, lctx
*d
, struct para_info
*pinfo
);
65 static void default_text_styles(struct text_styles_struct
*ts
)
67 de_zeromem(ts
, sizeof(struct text_styles_struct
));
70 static int text_styles_differ(const struct text_styles_struct
*ts1
,
71 const struct text_styles_struct
*ts2
)
73 if(ts1
->tab_style
!= ts2
->tab_style
) return 1;
77 static int do_header(deark
*c
, lctx
*d
, i64 pos
)
79 de_dbg(c
, "header at %d", (int)pos
);
82 d
->fcMac
= de_getu32le(pos
+7*2);
83 de_dbg(c
, "fcMac: %d", (int)d
->fcMac
);
84 d
->pnChar
= (d
->fcMac
+ 127) / 128;
85 d
->pnChar_offs
= d
->pnChar
* 128;
86 de_dbg(c
, "pnChar: page %d (offset %d)", (int)d
->pnChar
, (int)d
->pnChar_offs
);
88 d
->pnPara
= de_getu16le(pos
+9*2);
89 d
->pnPara_offs
= d
->pnPara
* 128;
90 de_dbg(c
, "pnPara: page %d (offset %d)", (int)d
->pnPara
, (int)d
->pnPara_offs
);
92 d
->pnFntb
= de_getu16le(pos
+10*2);
93 de_dbg(c
, "pnFntb: page %d", (int)d
->pnFntb
);
95 d
->pnSep
= de_getu16le(pos
+11*2);
96 de_dbg(c
, "pnSep: page %d", (int)d
->pnSep
);
98 d
->pnSetb
= de_getu16le(pos
+12*2);
99 de_dbg(c
, "pnSetb: page %d", (int)d
->pnSetb
);
101 d
->pnPgtb
= de_getu16le(pos
+13*2);
102 de_dbg(c
, "pnPgtb: page %d", (int)d
->pnPgtb
);
104 d
->pnFfntb
= de_getu16le(pos
+14*2);
105 de_dbg(c
, "pnFfntb: page %d", (int)d
->pnFfntb
);
107 d
->pnMac
= de_getu16le(pos
+48*2);
108 de_dbg(c
, "pnMac: %d pages", (int)d
->pnMac
);
110 d
->pnPara_npages
= d
->pnFntb
- d
->pnPara
;
112 de_dbg_indent(c
, -1);
116 static const char *get_objecttype1_name(unsigned int t
)
120 case 1: name
="static"; break;
121 case 2: name
="embedded"; break;
122 case 3: name
="link"; break;
123 default: name
="?"; break;
128 // Read the usually-40-byte picture header.
129 // The header has 3 variants (metafile, bitmap, OLE), which have some common
130 // fields, and some different fields. So, this function is kind of ugly.
131 static void do_picture_header(deark
*c
, lctx
*d
, struct para_info
*pinfo
,
132 struct picctx_struct
*picctx
)
134 i64 pos1
= pinfo
->thisparapos
;
138 // (The initial "mm" / "storage type" field has already been read.)
140 if(picctx
->mm
==WRI_STG_METAFILE
) {
142 n1
= de_getu16le_p(&pos
);
143 n2
= de_getu16le_p(&pos
);
144 de_dbg(c
, "xExt,yExt: %d"DE_CHAR_TIMES
"%d twips", (int)n1
, (int)n2
);
147 if(picctx
->mm
==WRI_STG_OLE
) {
148 // This field seems important, but we don't use it, because the OLE
149 // FormatID field is sufficient.
150 picctx
->ole_objectType
= (unsigned int)de_getu16le(pos1
+6);
151 de_dbg(c
, "objectType: %u (%s)", picctx
->ole_objectType
,
152 get_objecttype1_name(picctx
->ole_objectType
));
156 n1
= de_getu16le_p(&pos
);
157 de_dbg(c
, "dxaOffset: %d twips", (int)n1
);
158 n1
= de_getu16le_p(&pos
);
159 n2
= de_getu16le_p(&pos
);
160 de_dbg(c
, "dxaSize,dyaSize: %d"DE_CHAR_TIMES
"%d twips", (int)n1
, (int)n2
);
162 if(picctx
->mm
==WRI_STG_BITMAP
) {
163 de_dbg(c
, "[DDB header at %"I64_FMT
"]", pos1
+16);
166 if(picctx
->mm
==WRI_STG_OLE
) {
167 picctx
->ole_dwDataSize
= de_getu32le(pos1
+16);
168 de_dbg(c
, "dwDataSize: %d", (int)picctx
->ole_dwDataSize
);
169 n1
= de_getu32le(pos1
+24);
170 de_dbg(c
, "dwObjNum: 0x%08x", (unsigned int)n1
);
173 picctx
->cbHeader
= de_getu16le(pos1
+30);
174 de_dbg(c
, "header size: %d", (int)picctx
->cbHeader
);
176 if(picctx
->mm
==WRI_STG_METAFILE
|| picctx
->mm
==WRI_STG_BITMAP
) {
177 picctx
->cbSize
= de_getu32le(pos1
+32);
178 de_dbg(c
, "data size: %d", (int)picctx
->cbSize
);
182 n1
= de_getu16le_p(&pos
);
183 n2
= de_getu16le_p(&pos
);
184 de_dbg(c
, "scaling factor x,y: %d,%d", (int)n1
, (int)n2
);
187 static void do_picture_metafile(deark
*c
, lctx
*d
, struct para_info
*pinfo
,
188 struct picctx_struct
*picctx
)
190 i64 pos
= pinfo
->thisparapos
;
192 if(picctx
->cbHeader
+picctx
->cbSize
> pinfo
->thisparalen
) goto done
;
193 de_dbg(c
, "metafile data at %"I64_FMT
, pos
+picctx
->cbHeader
);
194 dbuf_create_file_from_slice(c
->infile
, pos
+picctx
->cbHeader
, picctx
->cbSize
,
200 static void do_picture_bitmap(deark
*c
, lctx
*d
, struct para_info
*pinfo
,
201 struct picctx_struct
*picctx
)
207 if(picctx
->cbHeader
+ picctx
->cbSize
> pinfo
->thisparalen
) goto done
;
208 hdrpos
= pinfo
->thisparapos
+ 16;
209 bitspos
= pinfo
->thisparapos
+ picctx
->cbHeader
;
210 bitssize
= picctx
->cbSize
;
211 de_dbg(c
, "processing DDB, header at %"I64_FMT
", pixels at %"I64_FMT
,
214 // Most commonly, the DDB bits immediately follow the header. But in this
215 // format, they are separated, with some non-DDB data in between.
216 // We'll construct a temporary DDB, in which they are contiguous, to pass
217 // to the ddb module.
218 tmpf
= dbuf_create_membuf(c
, 14+bitssize
, 0);
219 dbuf_copy(c
->infile
, hdrpos
, 14, tmpf
);
220 dbuf_copy(c
->infile
, bitspos
, bitssize
, tmpf
);
223 de_run_module_by_id_on_slice2(c
, "ddb", "N", tmpf
, 0, tmpf
->len
);
224 de_dbg_indent(c
, -1);
230 static const char *get_picture_storage_type_name(unsigned int t
)
234 case WRI_STG_METAFILE
: name
="metafile"; break;
235 case WRI_STG_BITMAP
: name
="bitmap"; break;
236 case WRI_STG_OLE
: name
="OLE object"; break;
237 default: name
="?"; break;
242 static void do_picture_ole(deark
*c
, lctx
*d
, struct para_info
*pinfo
,
243 struct picctx_struct
*picctx
)
245 i64 pos
= pinfo
->thisparapos
;
247 de_module_params
*mparams
= NULL
;
249 pos
+= picctx
->cbHeader
;
251 mparams
= de_malloc(c
, sizeof(de_module_params
));
252 mparams
->in_params
.input_encoding
= d
->input_encoding
;
254 ole_len
= de_min_int(picctx
->ole_dwDataSize
, pinfo
->thisparapos
+pinfo
->thisparalen
-pos
);
255 de_dbg(c
, "OLE1 data at %"I64_FMT
", len=%"I64_FMT
, pos
, ole_len
);
257 de_run_module_by_id_on_slice(c
, "ole1", mparams
, c
->infile
, pos
, ole_len
);
258 de_dbg_indent(c
, -1);
263 static int get_next_output_file_id(deark
*c
)
265 return c
->file_count
;
268 static void do_picture(deark
*c
, lctx
*d
, struct para_info
*pinfo
)
270 int orig_file_count
, curr_file_count
;
271 struct picctx_struct
*picctx
= NULL
;
272 i64 pos
= pinfo
->thisparapos
;
274 picctx
= de_malloc(c
, sizeof(struct picctx_struct
));
275 if(pinfo
->thisparalen
<2) goto done
;
276 picctx
->mm
= (unsigned int)de_getu16le(pos
);
277 de_dbg(c
, "picture storage type: 0x%04x (%s)", picctx
->mm
,
278 get_picture_storage_type_name(picctx
->mm
));
280 orig_file_count
= get_next_output_file_id(c
);
282 do_picture_header(c
, d
, pinfo
, picctx
);
285 case WRI_STG_METAFILE
:
286 do_picture_metafile(c
, d
, pinfo
, picctx
);
289 do_picture_bitmap(c
, d
, pinfo
, picctx
);
292 do_picture_ole(c
, d
, pinfo
, picctx
);
295 de_err(c
, "Picture storage type 0x%04x not supported", picctx
->mm
);
299 // We want to include the image file ID numbers in the HTML document,
300 // so that the user can figure out which image goes where.
301 // To deduce the ID number, we watch the global file ID counter.
302 // It's totally a hack, but unfortunately our high level functions that
303 // create an output file (e.g. de_convert_and_write_image_bilevel) do
304 // not have a way return the ID number of the file they created. It
305 // would be a lot of trouble to create such a mechanism.
307 do_emit_raw_sz(c
, d
, pinfo
, "<p class=r>");
309 ucstring_empty(d
->tmpstr
);
310 ucstring_append_sz(d
->tmpstr
, "object", DE_ENCODING_LATIN1
);
312 curr_file_count
= get_next_output_file_id(c
);
313 if(curr_file_count
== orig_file_count
+1) {
314 ucstring_printf(d
->tmpstr
, DE_ENCODING_LATIN1
, " %d", orig_file_count
);
316 else if(curr_file_count
== orig_file_count
) {
317 ucstring_append_sz(d
->tmpstr
, " (not extracted)", DE_ENCODING_LATIN1
);
320 ucstring_printf(d
->tmpstr
, DE_ENCODING_UTF8
, "s %d" "\xe2\x80\x93" "%d",
321 orig_file_count
, curr_file_count
-1);
323 do_emit_ucstring(c
, d
, pinfo
, d
->tmpstr
);
324 end_para(c
, d
, pinfo
);
331 static void ensure_in_para(deark
*c
, lctx
*d
, struct para_info
*pinfo
)
333 if(pinfo
->in_para
) return;
334 do_emit_raw_sz(c
, d
, pinfo
, "<p");
335 switch(pinfo
->justification
) {
336 case 1: do_emit_raw_sz(c
, d
, pinfo
, " class=tc"); break;
337 case 2: do_emit_raw_sz(c
, d
, pinfo
, " class=tr"); break;
338 case 3: do_emit_raw_sz(c
, d
, pinfo
, " class=tj"); break;
340 do_emit_raw_sz(c
, d
, pinfo
, ">");
344 // Emit a data codepoint, inside a paragraph.
345 static void do_emit_codepoint(deark
*c
, lctx
*d
, struct para_info
*pinfo
, i32 outcp
)
349 if(!pinfo
->in_para
) {
350 ensure_in_para(c
, d
, pinfo
);
353 styles_changed
= text_styles_differ(&pinfo
->text_styles_current
, &pinfo
->text_styles_wanted
);
355 if(pinfo
->in_span
&& styles_changed
) {
356 do_emit_raw_sz(c
, d
, pinfo
, "</span>");
360 if(pinfo
->text_styles_wanted
.tab_style
) {
361 do_emit_raw_sz(c
, d
, pinfo
, "<span class=c>");
364 pinfo
->text_styles_current
= pinfo
->text_styles_wanted
; // struct copy
367 de_write_codepoint_to_html(c
, d
->html_outf
, outcp
);
369 // FIXME: We'd like to know how many characters (not bytes) were written,
370 // but we don't currently have a good way to do that in the case where the
371 // codepoint was written as an HTML entity.
375 pinfo
->has_content
= 1;
379 // Same as calling do_emit_codepoint() on each character.
380 static void do_emit_ucstring(deark
*c
, lctx
*d
, struct para_info
*pinfo
,
386 for(k
=0; k
<s
->len
; k
++) {
387 do_emit_codepoint(c
, d
, pinfo
, s
->str
[k
]);
391 // Emit a raw string. Does not force a paragraph to be open.
392 // Updates pinfo->xpos (assumes 1 byte per char).
393 // For xpos, handles the case where sz ends with a newline, but does not
394 // handle internal newlines.
395 static void do_emit_raw_sz(deark
*c
, lctx
*d
, struct para_info
*pinfo
, const char *sz
)
397 size_t sz_len
= de_strlen(sz
);
399 dbuf_write(d
->html_outf
, (const u8
*)sz
, (i64
)sz_len
);
400 if(sz
[sz_len
-1]=='\n') {
404 pinfo
->xpos
+= (int)sz_len
;
408 static void end_para(deark
*c
, lctx
*d
, struct para_info
*pinfo
)
410 if(!pinfo
->in_para
) return;
413 do_emit_raw_sz(c
, d
, pinfo
, "</span>");
417 if(!pinfo
->has_content
) {
418 // No empty paragraphs allowed. HTML will collapse them, but Write does not.
419 do_emit_codepoint(c
, d
, pinfo
, 0xa0);
421 do_emit_raw_sz(c
, d
, pinfo
, "</p>\n");
423 default_text_styles(&pinfo
->text_styles_current
);
426 static void do_text_paragraph(deark
*c
, lctx
*d
, struct para_info
*pinfo
)
430 if(!d
->html_outf
) return;
432 if((pinfo
->papflags
& 0x06)!=0) {
433 // TODO: Decode headers and footers somehow.
434 do_emit_raw_sz(c
, d
, pinfo
, "<p class=r>");
435 do_emit_raw_sz(c
, d
, pinfo
, (pinfo
->papflags
&0x01)?"footer":"header");
436 do_emit_raw_sz(c
, d
, pinfo
, " definition</p>\n");
442 pinfo
->space_count
= 0;
443 pinfo
->has_content
= 0;
445 default_text_styles(&pinfo
->text_styles_wanted
);
446 default_text_styles(&pinfo
->text_styles_current
);
448 for(i
=0; i
<pinfo
->thisparalen
; i
++) {
451 incp
= de_getbyte(pinfo
->thisparapos
+i
);
452 if(incp
==0x0d && i
<pinfo
->thisparalen
-1) {
453 if(de_getbyte(pinfo
->thisparapos
+i
+1)==0x0a) {
456 ensure_in_para(c
, d
, pinfo
);
457 end_para(c
, d
, pinfo
);
462 if(incp
!=32 && pinfo
->space_count
>0) {
463 int nonbreaking_count
, breaking_count
;
465 if(!pinfo
->in_para
&& pinfo
->space_count
==1) {
466 // If the paragraph starts with a single space, make it nonbreaking.
467 nonbreaking_count
= 1;
471 // Else make all spaces but the last one nonbreaking
472 nonbreaking_count
= pinfo
->space_count
-1;
476 ensure_in_para(c
, d
, pinfo
);
478 for(k
=0; k
<nonbreaking_count
; k
++) {
479 do_emit_codepoint(c
, d
, pinfo
, 0xa0);
482 if(breaking_count
>0) {
484 // We don't do proper word wrapping of the HTML source, but
485 // maybe this is better than nothing.
486 do_emit_raw_sz(c
, d
, pinfo
, "\n");
489 do_emit_codepoint(c
, d
, pinfo
, 32);
493 pinfo
->space_count
=0;
499 // TODO: Decide if we ought to support multi-code-unit encodings
501 outcp
= de_char_to_unicode_ex((i32
)incp
, &d
->es
);
502 do_emit_codepoint(c
, d
, pinfo
, outcp
);
507 pinfo
->text_styles_wanted
.tab_style
= 1;
508 do_emit_codepoint(c
, d
, pinfo
, 0x2192);
509 pinfo
->text_styles_wanted
.tab_style
= 0;
513 ensure_in_para(c
, d
, pinfo
);
514 do_emit_raw_sz(c
, d
, pinfo
, "<br>\n");
515 pinfo
->has_content
= 1;
517 case 12: // page break
518 end_para(c
, d
, pinfo
);
519 do_emit_raw_sz(c
, d
, pinfo
, "<hr>\n");
524 pinfo
->space_count
++;
527 do_emit_codepoint(c
, d
, pinfo
, 0xfffd);
532 end_para(c
, d
, pinfo
);
535 static void do_paragraph(deark
*c
, lctx
*d
, struct para_info
*pinfo
)
537 if(pinfo
->papflags
&0x10) {
538 de_dbg(c
, "picture at %d, len=%d", (int)pinfo
->thisparapos
,
539 (int)pinfo
->thisparalen
);
541 do_picture(c
, d
, pinfo
);
542 de_dbg_indent(c
, -1);
545 de_dbg(c
, "text paragraph at %d, len=%d", (int)pinfo
->thisparapos
,
546 (int)pinfo
->thisparalen
);
547 do_text_paragraph(c
, d
, pinfo
);
551 static void do_para_fprop(deark
*c
, lctx
*d
, struct para_info
*pinfo
,
552 i64 bfprop
, u8 is_dup
)
556 // bfprop is a pointer into the 123 bytes of data starting
557 // at pos+4. The maximum sensible value is at most 122.
559 // It appears that the length prefix does not include itself,
560 // contrary to what one source says.
561 fprop_dlen
= (i64
)de_getbyte(pinfo
->bfprop_offset
);
562 if(!is_dup
) de_dbg(c
, "fprop dlen: %d", (int)fprop_dlen
);
566 pinfo
->justification
= de_getbyte(pinfo
->bfprop_offset
+ 1 + 1) & 0x03;
567 if(!is_dup
&& pinfo
->justification
!=0) {
568 de_dbg(c
, "justification: %d", (int)pinfo
->justification
);
573 pinfo
->papflags
= de_getbyte(pinfo
->bfprop_offset
+ 1 + 16);
575 de_ucstring
*flagstr
= ucstring_create(c
);
576 if(pinfo
->papflags
&0x06) {
577 ucstring_append_flags_item(flagstr
, (pinfo
->papflags
&0x01)?"footer":"header");
578 ucstring_append_flags_item(flagstr
, (pinfo
->papflags
&0x08)?"print on first page":
579 "do not print on first page");
581 if(pinfo
->papflags
&0x10) ucstring_append_flags_item(flagstr
, "picture");
582 de_dbg(c
, "paragraph flags: 0x%02x (%s)", (unsigned int)pinfo
->papflags
,
583 ucstring_getpsz(flagstr
));
584 ucstring_destroy(flagstr
);
589 static void do_para_info_page(deark
*c
, lctx
*d
, i64 pos
)
594 i64 fod_array_startpos
;
598 de_zeromem(fprop_seen
, sizeof(fprop_seen
));
599 de_dbg(c
, "paragraph info page at %d", (int)pos
);
602 cfod
= (i64
)de_getbyte(pos
+127);
603 de_dbg(c
, "number of FODs on this page: %d", (int)cfod
);
605 // There are up to 123 bytes available for the FOD array, and each FOD is
606 // 6 bytes. So I assume the maximum possible is 20.
609 fcFirst
= de_getu32le(pos
);
610 de_dbg(c
, "fcFirst: %d", (int)fcFirst
);
612 fod_array_startpos
= pos
+ 4;
614 prevtextpos
= fcFirst
;
616 for(i
=0; i
<cfod
; i
++) {
617 struct para_info
*pinfo
= NULL
;
618 i64 fcLim_orig
, fcLim_adj
;
620 i64 fodpos
= fod_array_startpos
+ 6*i
;
622 pinfo
= de_malloc(c
, sizeof(struct para_info
));
624 de_dbg(c
, "FOD[%d] at %d", (int)i
, (int)fodpos
);
627 fcLim_orig
= de_getu32le(fodpos
);
628 fcLim_adj
= fcLim_orig
;
629 if(fcLim_adj
> d
->fcMac
) fcLim_adj
= d
->fcMac
;
630 pinfo
->thisparapos
= prevtextpos
;
631 pinfo
->thisparalen
= fcLim_adj
- prevtextpos
;
632 de_dbg(c
, "fcLim: %d (paragraph from %d to %d)", (int)fcLim_orig
,
633 (int)pinfo
->thisparapos
, (int)(fcLim_adj
-1));
634 prevtextpos
= fcLim_adj
;
636 bfprop
= de_getu16le(fodpos
+4);
638 de_dbg(c
, "bfprop: %d (none)", (int)bfprop
);
641 pinfo
->bfprop_offset
= fod_array_startpos
+ bfprop
;
643 de_dbg(c
, "bfprop: %d (+ %d = %d)", (int)bfprop
,
644 (int)fod_array_startpos
, (int)pinfo
->bfprop_offset
);
648 if(fprop_seen
[bfprop
]) {
649 // An FPROP can be referenced multiple times. Only print the
650 // debug info for it once.
651 de_dbg(c
, "[already decoded FPROP at %d on this paragraph info page]", (int)bfprop
);
653 do_para_fprop(c
, d
, pinfo
, bfprop
, fprop_seen
[bfprop
]);
654 fprop_seen
[bfprop
] = 1;
656 de_dbg_indent(c
, -1);
659 do_paragraph(c
, d
, pinfo
);
663 de_dbg_indent(c
, -1);
666 de_dbg_indent(c
, -1);
669 static void do_para_info(deark
*c
, lctx
*d
)
673 if(d
->pnPara_npages
<1) return;
674 de_dbg(c
, "paragraph info at %d, len=%d page(s)", (int)d
->pnPara_offs
, (int)d
->pnPara_npages
);
677 for(i
=0; i
<d
->pnPara_npages
; i
++) {
678 do_para_info_page(c
, d
, d
->pnPara_offs
+ 128*i
);
680 de_dbg_indent(c
, -1);
683 static void do_html_begin(deark
*c
, lctx
*d
)
686 if(d
->html_outf
) return;
687 d
->html_outf
= dbuf_create_output_file(c
, "html", NULL
, 0);
688 dbuf_enable_wbuffer(d
->html_outf
);
690 if(c
->write_bom
&& !c
->ascii_html
) dbuf_write_uchar_as_utf8(f
, 0xfeff);
691 dbuf_puts(f
, "<!DOCTYPE html>\n");
692 dbuf_puts(f
, "<html>\n");
693 dbuf_puts(f
, "<head>\n");
694 dbuf_printf(f
, "<meta charset=\"%s\">\n", c
->ascii_html
?"US-ASCII":"UTF-8");
695 dbuf_puts(f
, "<title></title>\n");
697 dbuf_puts(f
, "<style type=\"text/css\">\n");
698 dbuf_puts(f
, " body { color: #000; background-color: #fff }\n");
699 dbuf_puts(f
, " p { margin-top: 0; margin-bottom: 0 }\n");
700 dbuf_puts(f
, " .c { color: #ccc }\n"); // Visible control characters
702 // Replacement object
703 dbuf_puts(f
, " .r { padding: 0.5ex; color: #800; background-color: #eee;\n");
704 dbuf_puts(f
, " font-style: italic; border: 0.34ex dotted #800 }\n");
706 dbuf_puts(f
, " .tc { text-align: center }\n");
707 dbuf_puts(f
, " .tr { text-align: right }\n");
708 dbuf_puts(f
, " .tj { text-align: justify }\n");
709 dbuf_puts(f
, "</style>\n");
711 dbuf_puts(f
, "</head>\n");
712 dbuf_puts(f
, "<body>\n");
715 static void do_html_end(deark
*c
, lctx
*d
)
717 if(!d
->html_outf
) return;
718 dbuf_puts(d
->html_outf
, "</body>\n</html>\n");
719 dbuf_close(d
->html_outf
);
723 static void de_run_wri(deark
*c
, de_module_params
*mparams
)
728 d
= de_malloc(c
, sizeof(lctx
));
730 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_WINDOWS1252
);
731 d
->extract_text
= de_get_ext_option_bool(c
, "wri:extracttext", 1);
732 d
->extract_ole
= de_get_ext_option_bool(c
, "wri:extractole",
733 (c
->extract_level
>=2)?1:0);
735 de_encconv_init(&d
->es
, d
->input_encoding
);
737 d
->tmpstr
= ucstring_create(c
);
740 if(!do_header(c
, d
, pos
)) goto done
;
741 if(d
->extract_text
) {
750 ucstring_destroy(d
->tmpstr
);
755 static int de_identify_wri(deark
*c
)
760 if((buf
[0]==0x31 || buf
[0]==0x32) &&
761 !de_memcmp(&buf
[1], "\xbe\x00\x00\x00\xab", 5))
764 pnMac
= de_getu16le(48*2);
765 if(pnMac
==0) return 0; // Apparently MSWord, not Write
771 static void de_help_wri(deark
*c
)
773 de_msg(c
, "-opt wri:extracttext=0 : Do not extract text");
774 de_msg(c
, "-opt wri:extractole : Extract unidentified OLE objects");
777 void de_module_wri(deark
*c
, struct deark_module_info
*mi
)
780 mi
->desc
= "Microsoft Write";
781 mi
->run_fn
= de_run_wri
;
782 mi
->identify_fn
= de_identify_wri
;
783 mi
->help_fn
= de_help_wri
;