3 * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
6 * MS Word to "text" functions
14 #include "DeskLib:Hourglass.h"
20 #define INITIAL_SIZE 40
21 #define EXTENTION_SIZE 20
24 /* Macros to make sure all such statements will be identical */
25 #define OUTPUT_LINE() \
27 vAlign2Window(pDiag, pAnchor, lWidthMax, ucAlignment);\
28 TRACE_MSG("after vAlign2Window");\
29 pAnchor = pStartNewOutput(pAnchor, NULL);\
33 #define RESET_LINE() \
35 pAnchor = pStartNewOutput(pAnchor, NULL);\
40 /* Length of the document in characters */
41 static ULONG ulDocumentLength
;
42 /* Number of characters processed so far */
43 static ULONG ulCharCounter
;
44 static int iCurrPct
, iPrevPct
;
46 /* The document is in the format belonging to this version of Word */
47 static int iWordVersion
= -1;
48 /* Special treatment for files from Word 4/5/6 on an Apple Macintosh */
49 static BOOL bOldMacFile
= FALSE
;
50 /* Section Information */
51 static const section_block_type
*pSection
= NULL
;
52 static const section_block_type
*pSectionNext
= NULL
;
53 /* All the (command line) options */
54 static options_type tOptions
;
55 /* Needed for reading a complete table row */
56 static const row_block_type
*pRowInfo
= NULL
;
57 static BOOL bStartRow
= FALSE
;
58 static BOOL bEndRowNorm
= FALSE
;
59 static BOOL bEndRowFast
= FALSE
;
60 static BOOL bIsTableRow
= FALSE
;
61 /* Index of the next style and font information */
62 static USHORT usIstdNext
= ISTD_NORMAL
;
63 /* Needed for finding the start of a style */
64 static const style_block_type
*pStyleInfo
= NULL
;
65 static style_block_type tStyleNext
;
66 static BOOL bStartStyle
= FALSE
;
67 static BOOL bStartStyleNext
= FALSE
;
68 /* Needed for finding the start of a font */
69 static const font_block_type
*pFontInfo
= NULL
;
70 static font_block_type tFontNext
;
71 static BOOL bStartFont
= FALSE
;
72 static BOOL bStartFontNext
= FALSE
;
73 /* Needed for finding an image */
74 static ULONG ulFileOffsetImage
= FC_INVALID
;
78 * vUpdateCounters - Update the counters for the hourglass
85 iCurrPct
= (int)((ulCharCounter
* 100) / ulDocumentLength
);
86 if (iCurrPct
!= iPrevPct
) {
87 Hourglass_Percentage(iCurrPct
);
91 } /* end of vUpdateCounters */
94 * bOutputContainsText - see if the output contains more than white space
97 bOutputContainsText(const output_type
*pAnchor
)
99 const output_type
*pCurr
;
102 fail(pAnchor
== NULL
);
104 for (pCurr
= pAnchor
; pCurr
!= NULL
; pCurr
= pCurr
->pNext
) {
105 fail(pCurr
->lStringWidth
< 0);
106 for (tIndex
= 0; tIndex
< pCurr
->tNextFree
; tIndex
++) {
107 if (isspace((int)(UCHAR
)pCurr
->szStorage
[tIndex
])) {
111 if (pCurr
->szStorage
[tIndex
] == FILLER_CHAR
) {
119 } /* end of bOutputContainsText */
122 * lTotalStringWidth - compute the total width of the output string
125 lTotalStringWidth(const output_type
*pAnchor
)
127 const output_type
*pCurr
;
131 for (pCurr
= pAnchor
; pCurr
!= NULL
; pCurr
= pCurr
->pNext
) {
132 DBG_DEC_C(pCurr
->lStringWidth
< 0, pCurr
->lStringWidth
);
133 fail(pCurr
->lStringWidth
< 0);
134 lTotal
+= pCurr
->lStringWidth
;
137 } /* end of lTotalStringWidth */
140 * vStoreByte - store one byte
143 vStoreByte(UCHAR ucChar
, output_type
*pOutput
)
145 fail(pOutput
== NULL
);
148 pOutput
->szStorage
[pOutput
->tNextFree
] = '\0';
152 while (pOutput
->tNextFree
+ 2 > pOutput
->tStorageSize
) {
153 pOutput
->tStorageSize
+= EXTENTION_SIZE
;
154 pOutput
->szStorage
= xrealloc(pOutput
->szStorage
,
155 pOutput
->tStorageSize
);
157 pOutput
->szStorage
[pOutput
->tNextFree
] = (char)ucChar
;
158 pOutput
->szStorage
[pOutput
->tNextFree
+ 1] = '\0';
159 pOutput
->tNextFree
++;
160 } /* end of vStoreByte */
163 * vStoreChar - store a character as one or more bytes
166 vStoreChar(ULONG ulChar
, BOOL bChangeAllowed
, output_type
*pOutput
)
171 fail(pOutput
== NULL
);
173 if (tOptions
.eEncoding
== encoding_utf_8
&& bChangeAllowed
) {
174 DBG_HEX_C(ulChar
> 0xffff, ulChar
);
175 fail(ulChar
> 0xffff);
176 tLen
= tUcs2Utf8(ulChar
, szResult
, sizeof(szResult
));
177 for (tIndex
= 0; tIndex
< tLen
; tIndex
++) {
178 vStoreByte((UCHAR
)szResult
[tIndex
], pOutput
);
181 DBG_HEX_C(ulChar
> 0xff, ulChar
);
183 vStoreByte((UCHAR
)ulChar
, pOutput
);
186 pOutput
->lStringWidth
+= lComputeStringWidth(
187 pOutput
->szStorage
+ pOutput
->tNextFree
- tLen
,
190 pOutput
->usFontSize
);
191 } /* end of vStoreChar */
194 * vStoreCharacter - store one character
197 vStoreCharacter(ULONG ulChar
, output_type
*pOutput
)
199 vStoreChar(ulChar
, TRUE
, pOutput
);
200 } /* end of vStoreCharacter */
203 * vStoreString - store a string
206 vStoreString(const char *szString
, size_t tStringLength
, output_type
*pOutput
)
210 fail(szString
== NULL
|| pOutput
== NULL
);
212 for (tIndex
= 0; tIndex
< tStringLength
; tIndex
++) {
213 vStoreCharacter((ULONG
)(UCHAR
)szString
[tIndex
], pOutput
);
215 } /* end of vStoreString */
218 * vStoreNumberAsDecimal - store a number as a decimal number
221 vStoreNumberAsDecimal(UINT uiNumber
, output_type
*pOutput
)
224 char szString
[3 * sizeof(UINT
) + 1];
227 fail(pOutput
== NULL
);
229 tLen
= (size_t)sprintf(szString
, "%u", uiNumber
);
230 vStoreString(szString
, tLen
, pOutput
);
231 } /* end of vStoreNumberAsDecimal */
234 * vStoreNumberAsRoman - store a number as a roman numerical
237 vStoreNumberAsRoman(UINT uiNumber
, output_type
*pOutput
)
243 fail(pOutput
== NULL
);
245 tLen
= tNumber2Roman(uiNumber
, FALSE
, szString
);
246 vStoreString(szString
, tLen
, pOutput
);
247 } /* end of vStoreNumberAsRoman */
250 * vStoreStyle - store a style
253 vStoreStyle(diagram_type
*pDiag
, output_type
*pOutput
,
254 const style_block_type
*pStyle
)
260 fail(pOutput
== NULL
);
261 fail(pStyle
== NULL
);
263 if (tOptions
.eConversionType
== conversion_xml
) {
264 vSetHeaders(pDiag
, pStyle
->usIstd
);
266 tLen
= tStyle2Window(szString
, sizeof(szString
),
268 vStoreString(szString
, tLen
, pOutput
);
270 } /* end of vStoreStyle */
273 * vPutIndentation - output the specified amount of indentation
276 vPutIndentation(diagram_type
*pDiag
, output_type
*pOutput
,
277 BOOL bNoMarks
, BOOL bFirstLine
,
278 UINT uiListNumber
, UCHAR ucNFC
, const char *szListChar
,
279 long lLeftIndentation
, long lLeftIndentation1
)
282 size_t tIndex
, tNextFree
;
286 fail(pOutput
== NULL
);
287 fail(szListChar
== NULL
);
288 fail(lLeftIndentation
< 0);
290 if (tOptions
.eConversionType
== conversion_xml
) {
291 /* XML does its own indentation at rendering time */
297 lLeftIndentation
+= lLeftIndentation1
;
299 if (lLeftIndentation
< 0) {
300 lLeftIndentation
= 0;
302 vSetLeftIndentation(pDiag
, lLeftIndentation
);
305 if (lLeftIndentation
<= 0) {
306 DBG_HEX_C(ucNFC
!= 0x00, ucNFC
);
307 vSetLeftIndentation(pDiag
, 0);
312 if (tOptions
.eEncoding
== encoding_utf_8
) {
313 fail(strlen(szListChar
) > 3);
315 DBG_HEX_C(iscntrl((int)szListChar
[0]), szListChar
[0]);
316 fail(iscntrl((int)szListChar
[0]));
317 fail(szListChar
[1] != '\0');
322 case LIST_ARABIC_NUM
:
323 case LIST_NUMBER_TXT
:
324 tNextFree
= (size_t)sprintf(szLine
, "%u", uiListNumber
);
326 case LIST_UPPER_ROMAN
:
327 case LIST_LOWER_ROMAN
:
328 tNextFree
= tNumber2Roman(uiListNumber
,
329 ucNFC
== LIST_UPPER_ROMAN
, szLine
);
331 case LIST_UPPER_ALPHA
:
332 case LIST_LOWER_ALPHA
:
333 tNextFree
= tNumber2Alpha(uiListNumber
,
334 ucNFC
== LIST_UPPER_ALPHA
, szLine
);
336 case LIST_ORDINAL_NUM
:
337 case LIST_ORDINAL_TXT
:
338 if (uiListNumber
% 10 == 1 && uiListNumber
!= 11) {
340 (size_t)sprintf(szLine
, "%ust", uiListNumber
);
341 } else if (uiListNumber
% 10 == 2 && uiListNumber
!= 12) {
343 (size_t)sprintf(szLine
, "%und", uiListNumber
);
344 } else if (uiListNumber
% 10 == 3 && uiListNumber
!= 13) {
346 (size_t)sprintf(szLine
, "%urd", uiListNumber
);
349 (size_t)sprintf(szLine
, "%uth", uiListNumber
);
352 case LIST_OUTLINE_NUM
:
353 tNextFree
= (size_t)sprintf(szLine
, "%02u", uiListNumber
);
363 tNextFree
= (size_t)sprintf(szLine
, "%u", uiListNumber
);
366 tNextFree
+= (size_t)sprintf(szLine
+ tNextFree
, "%.3s", szListChar
);
367 szLine
[tNextFree
++] = ' ';
368 szLine
[tNextFree
] = '\0';
369 lWidth
= lComputeStringWidth(szLine
, tNextFree
,
370 pOutput
->tFontRef
, pOutput
->usFontSize
);
371 lLeftIndentation
-= lWidth
;
372 if (lLeftIndentation
< 0) {
373 lLeftIndentation
= 0;
375 vSetLeftIndentation(pDiag
, lLeftIndentation
);
376 for (tIndex
= 0; tIndex
< tNextFree
; tIndex
++) {
377 vStoreChar((ULONG
)(UCHAR
)szLine
[tIndex
], FALSE
, pOutput
);
379 } /* end of vPutIndentation */
382 * vPutSeparatorLine - output a separator line
384 * A separator line is a horizontal line two inches long.
385 * Two inches equals 144000 millipoints.
388 vPutSeparatorLine(output_type
*pOutput
)
391 int iCounter
, iChars
;
394 fail(pOutput
== NULL
);
396 szOne
[0] = OUR_EM_DASH
;
398 lCharWidth
= lComputeStringWidth(szOne
, 1,
399 pOutput
->tFontRef
, pOutput
->usFontSize
);
400 NO_DBG_DEC(lCharWidth
);
401 iChars
= (int)((144000 + lCharWidth
/ 2) / lCharWidth
);
403 for (iCounter
= 0; iCounter
< iChars
; iCounter
++) {
404 vStoreCharacter((ULONG
)(UCHAR
)OUR_EM_DASH
, pOutput
);
406 } /* end of vPutSeparatorLine */
409 * pStartNextOutput - start the next output record
411 * returns a pointer to the next record
414 pStartNextOutput(output_type
*pCurrent
)
418 TRACE_MSG("pStartNextOutput");
420 if (pCurrent
->tNextFree
== 0) {
421 /* The current record is empty, re-use */
422 fail(pCurrent
->szStorage
[0] != '\0');
423 fail(pCurrent
->lStringWidth
!= 0);
426 /* The current record is in use, make a new one */
427 pNew
= xmalloc(sizeof(*pNew
));
428 pCurrent
->pNext
= pNew
;
429 pNew
->tStorageSize
= INITIAL_SIZE
;
430 pNew
->szStorage
= xmalloc(pNew
->tStorageSize
);
431 pNew
->szStorage
[0] = '\0';
433 pNew
->lStringWidth
= 0;
434 pNew
->ucFontColor
= FONT_COLOR_DEFAULT
;
435 pNew
->usFontStyle
= FONT_REGULAR
;
436 pNew
->tFontRef
= (drawfile_fontref
)0;
437 pNew
->usFontSize
= DEFAULT_FONT_SIZE
;
438 pNew
->pPrev
= pCurrent
;
441 } /* end of pStartNextOutput */
447 pStartNewOutput(output_type
*pAnchor
, output_type
*pLeftOver
)
449 output_type
*pCurr
, *pNext
;
450 USHORT usFontStyle
, usFontSize
;
451 drawfile_fontref tFontRef
;
454 TRACE_MSG("pStartNewOutput");
456 ucFontColor
= FONT_COLOR_DEFAULT
;
457 usFontStyle
= FONT_REGULAR
;
458 tFontRef
= (drawfile_fontref
)0;
459 usFontSize
= DEFAULT_FONT_SIZE
;
460 /* Free the old output space */
462 while (pCurr
!= NULL
) {
463 TRACE_MSG("Free the old output space");
464 pNext
= pCurr
->pNext
;
465 pCurr
->szStorage
= xfree(pCurr
->szStorage
);
466 if (pCurr
->pNext
== NULL
) {
467 ucFontColor
= pCurr
->ucFontColor
;
468 usFontStyle
= pCurr
->usFontStyle
;
469 tFontRef
= pCurr
->tFontRef
;
470 usFontSize
= pCurr
->usFontSize
;
472 pCurr
= xfree(pCurr
);
475 if (pLeftOver
== NULL
) {
476 /* Create new output space */
477 TRACE_MSG("Create new output space");
478 pLeftOver
= xmalloc(sizeof(*pLeftOver
));
479 pLeftOver
->tStorageSize
= INITIAL_SIZE
;
480 NO_DBG_DEC(pLeftOver
->tStorageSize
);
481 TRACE_MSG("before 2nd xmalloc");
482 pLeftOver
->szStorage
= xmalloc(pLeftOver
->tStorageSize
);
483 TRACE_MSG("after 2nd xmalloc");
484 pLeftOver
->szStorage
[0] = '\0';
485 pLeftOver
->tNextFree
= 0;
486 pLeftOver
->lStringWidth
= 0;
487 pLeftOver
->ucFontColor
= ucFontColor
;
488 pLeftOver
->usFontStyle
= usFontStyle
;
489 pLeftOver
->tFontRef
= tFontRef
;
490 pLeftOver
->usFontSize
= usFontSize
;
491 pLeftOver
->pPrev
= NULL
;
492 pLeftOver
->pNext
= NULL
;
494 fail(!bCheckDoubleLinkedList(pLeftOver
));
496 } /* end of pStartNewOutput */
499 * ulGetChar - get the next character from the specified list
501 * returns the next character of EOF
504 ulGetChar(FILE *pFile
, list_id_enum eListID
)
506 const font_block_type
*pCurr
;
507 ULONG ulChar
, ulFileOffset
, ulCharPos
;
508 row_info_enum eRowInfo
;
509 USHORT usChar
, usPropMod
;
517 usChar
= usNextChar(pFile
, eListID
,
518 &ulFileOffset
, &ulCharPos
, &usPropMod
);
519 if (usChar
== (USHORT
)EOF
) {
525 eRowInfo
= ePropMod2RowInfo(usPropMod
, iWordVersion
);
528 bStartRow
= eRowInfo
== found_a_cell
||
530 ulFileOffset
== pRowInfo
->ulFileOffsetStart
&&
531 eRowInfo
!= found_not_a_cell
);
533 bStartRow
= pRowInfo
!= NULL
&&
534 ulFileOffset
== pRowInfo
->ulFileOffsetStart
;
536 NO_DBG_HEX_C(bStartRow
, pRowInfo
->ulFileOffsetStart
);
540 bEndRow
= eRowInfo
== found_end_of_row
||
542 ulFileOffset
== pRowInfo
->ulFileOffsetEnd
&&
543 eRowInfo
!= found_not_end_of_row
);
545 bEndRowNorm
= pRowInfo
!= NULL
&&
546 ulFileOffset
== pRowInfo
->ulFileOffsetEnd
;
548 NO_DBG_HEX_C(bEndRowNorm
, pRowInfo
->ulFileOffsetEnd
);
551 bEndRowFast
= eRowInfo
== found_end_of_row
;
552 NO_DBG_HEX_C(bEndRowFast
, pRowInfo
->ulFileOffsetEnd
);
556 bStartStyle
= pStyleInfo
!= NULL
&&
557 ulFileOffset
== pStyleInfo
->ulFileOffset
;
558 NO_DBG_HEX_C(bStartStyle
, ulFileOffset
);
560 if (pCurr
!= NULL
&& ulFileOffset
== pCurr
->ulFileOffset
) {
562 NO_DBG_HEX(ulFileOffset
);
564 pCurr
= pGetNextFontInfoListItem(pCurr
);
567 /* Skip embedded characters */
568 if (usChar
== START_EMBEDDED
) {
572 if (usChar
== END_IGNORE
|| usChar
== END_EMBEDDED
) {
579 ulChar
= ulTranslateCharacters(usChar
,
582 tOptions
.eConversionType
,
585 if (ulChar
== IGNORE_CHARACTER
) {
588 if (ulChar
== PICTURE
) {
589 ulFileOffsetImage
= ulGetPictInfoListItem(ulFileOffset
);
591 ulFileOffsetImage
= FC_INVALID
;
593 if (ulChar
== PAR_END
) {
594 /* End of paragraph seen, prepare for the next */
595 vFillStyleFromStylesheet(usIstdNext
, &tStyleNext
);
596 vCorrectStyleValues(&tStyleNext
);
597 bStartStyleNext
= TRUE
;
598 vFillFontFromStylesheet(usIstdNext
, &tFontNext
);
599 vCorrectFontValues(&tFontNext
);
600 bStartFontNext
= TRUE
;
602 if (ulChar
== PAGE_BREAK
) {
603 /* Might be the start of a new section */
604 pSectionNext
= pGetSectionInfo(pSection
, ulCharPos
);
608 } /* end of ulGetChar */
611 * lGetWidthMax - get the maximum line width from the paragraph break value
613 * Returns the maximum line width in millipoints
616 lGetWidthMax(int iParagraphBreak
)
618 fail(iParagraphBreak
< 0);
620 if (iParagraphBreak
== 0) {
623 if (iParagraphBreak
< MIN_SCREEN_WIDTH
) {
624 return lChar2MilliPoints(MIN_SCREEN_WIDTH
);
626 if (iParagraphBreak
> MAX_SCREEN_WIDTH
) {
627 return lChar2MilliPoints(MAX_SCREEN_WIDTH
);
629 return lChar2MilliPoints(iParagraphBreak
);
630 } /* end of lGetWidthMax */
633 * bWordDecryptor - turn Word to something more useful
635 * returns TRUE when succesful, otherwise FALSE
638 bWordDecryptor(FILE *pFile
, long lFilesize
, diagram_type
*pDiag
)
640 imagedata_type tImage
;
641 const style_block_type
*pStyleTmp
;
642 const font_block_type
*pFontTmp
;
643 const char *szListChar
;
644 output_type
*pAnchor
, *pOutput
, *pLeftOver
;
646 long lBeforeIndentation
, lAfterIndentation
;
647 long lLeftIndentation
, lLeftIndentation1
, lRightIndentation
;
648 long lWidthCurr
, lWidthMax
, lDefaultTabWidth
, lHalfSpaceWidth
, lTmp
;
649 list_id_enum eListID
;
650 image_info_enum eRes
;
651 UINT uiFootnoteNumber
, uiEndnoteNumber
, uiTmp
;
653 BOOL bWasTableRow
, bTableFontClosed
, bWasEndOfParagraph
;
654 BOOL bInList
, bWasInList
, bNoMarks
, bFirstLine
;
655 BOOL bAllCapitals
, bHiddenText
, bMarkDelText
, bSuccess
;
657 USHORT usFontStyle
, usFontStyleMinimal
, usFontSize
, usTmp
;
658 UCHAR ucFontNumber
, ucFontColor
;
659 UCHAR ucNFC
, ucAlignment
;
661 fail(pFile
== NULL
|| lFilesize
<= 0 || pDiag
== NULL
);
663 TRACE_MSG("bWordDecryptor");
665 iWordVersion
= iInitDocument(pFile
, lFilesize
);
666 if (iWordVersion
< 0) {
667 DBG_DEC(iWordVersion
);
671 vGetOptions(&tOptions
);
672 bOldMacFile
= bIsOldMacFile();
673 vPrepareHdrFtrText(pFile
);
674 vPrepareFootnoteText(pFile
);
676 vPrologue2(pDiag
, iWordVersion
);
679 #if defined(__riscos)
683 ulDocumentLength
= ulGetDocumentLength();
684 #endif /* __riscos */
685 pSection
= pGetSectionInfo(NULL
, 0);
686 pSectionNext
= pSection
;
687 lDefaultTabWidth
= lGetDefaultTabWidth();
688 DBG_DEC_C(lDefaultTabWidth
!= 36000, lDefaultTabWidth
);
689 pRowInfo
= pGetNextRowInfoListItem();
690 DBG_HEX_C(pRowInfo
!= NULL
, pRowInfo
->ulFileOffsetStart
);
691 DBG_HEX_C(pRowInfo
!= NULL
, pRowInfo
->ulFileOffsetEnd
);
692 DBG_MSG_C(pRowInfo
== NULL
, "No rows at all");
697 bWasTableRow
= FALSE
;
699 pStyleInfo
= pGetNextTextStyle(NULL
);
704 usIstdNext
= ISTD_NORMAL
;
706 pFontInfo
= pGetNextFontInfoListItem(NULL
);
707 DBG_HEX_C(pFontInfo
!= NULL
, pFontInfo
->ulFileOffset
);
708 DBG_MSG_C(pFontInfo
== NULL
, "No fonts at all");
711 usFontStyleMinimal
= FONT_REGULAR
;
712 usFontStyle
= FONT_REGULAR
;
713 usFontSize
= DEFAULT_FONT_SIZE
;
714 ucFontColor
= FONT_COLOR_DEFAULT
;
715 pAnchor
= pStartNewOutput(pAnchor
, NULL
);
717 pOutput
->ucFontColor
= ucFontColor
;
718 pOutput
->usFontStyle
= usFontStyle
;
719 pOutput
->tFontRef
= tOpenFont(ucFontNumber
, usFontStyle
, usFontSize
);
720 pOutput
->usFontSize
= usFontSize
;
721 bTableFontClosed
= TRUE
;
722 lBeforeIndentation
= 0;
723 lAfterIndentation
= 0;
724 lLeftIndentation
= 0;
725 lLeftIndentation1
= 0;
726 lRightIndentation
= 0;
727 bWasEndOfParagraph
= TRUE
;
730 ucNFC
= LIST_BULLETS
;
731 if (pStyleInfo
!= NULL
) {
732 szListChar
= pStyleInfo
->szListChar
;
733 pStyleTmp
= pStyleInfo
;
735 if (tStyleNext
.szListChar
[0] == '\0') {
736 vGetBulletValue(tOptions
.eConversionType
,
737 tOptions
.eEncoding
, tStyleNext
.szListChar
, 4);
739 szListChar
= tStyleNext
.szListChar
;
740 pStyleTmp
= &tStyleNext
;
743 ucAlignment
= ALIGNMENT_LEFT
;
744 bAllCapitals
= FALSE
;
746 bMarkDelText
= FALSE
;
747 lWidthMax
= lGetWidthMax(tOptions
.iParagraphBreak
);
748 NO_DBG_DEC(lWidthMax
);
752 uiFootnoteNumber
= 0;
756 ulChar
= ulGetChar(pFile
, eListID
);
757 if (ulChar
== (ULONG
)EOF
) {
758 if (bOutputContainsText(pAnchor
)) {
765 if (tOptions
.eConversionType
!=
767 eListID
= footnote_list
;
768 if (uiFootnoteNumber
!= 0) {
769 vPutSeparatorLine(pAnchor
);
771 uiFootnoteNumber
= 0;
775 /* No break or return */
777 eListID
= endnote_list
;
778 if (uiEndnoteNumber
!= 0) {
779 vPutSeparatorLine(pAnchor
);
785 eListID
= textbox_list
;
786 if (bExistsTextBox()) {
787 vPutSeparatorLine(pAnchor
);
792 eListID
= hdrtextbox_list
;
793 if (bExistsHdrTextBox()) {
794 vPutSeparatorLine(pAnchor
);
798 case hdrtextbox_list
:
800 eListID
= end_of_lists
;
803 if (eListID
== end_of_lists
) {
809 if (ulChar
== UNKNOWN_NOTE_CHAR
) {
812 ulChar
= FOOTNOTE_CHAR
;
815 ulChar
= ENDNOTE_CHAR
;
823 /* Begin of a tablerow found */
824 if (bOutputContainsText(pAnchor
)) {
829 fail(pAnchor
!= pOutput
);
830 if (bTableFontClosed
) {
831 /* Start special table font */
834 * Compensate for the fact that Word uses
835 * proportional fonts for its tables and we
836 * only one fixed-width font
838 uiTmp
= ((UINT
)usFontSize
* 5 + 3) / 6;
839 if (uiTmp
< MIN_TABLEFONT_SIZE
) {
840 uiTmp
= MIN_TABLEFONT_SIZE
;
841 } else if (uiTmp
> MAX_TABLEFONT_SIZE
) {
842 uiTmp
= MAX_TABLEFONT_SIZE
;
844 pOutput
->usFontSize
= (USHORT
)uiTmp
;
846 tOpenTableFont(pOutput
->usFontSize
);
847 pOutput
->usFontStyle
= FONT_REGULAR
;
848 pOutput
->ucFontColor
= FONT_COLOR_BLACK
;
849 bTableFontClosed
= FALSE
;
858 ulChar
!= HARD_RETURN
&&
859 ulChar
!= PAGE_BREAK
&&
860 ulChar
!= COLUMN_FEED
) {
862 * The end of a table should be followed by an
863 * empty line, like the end of a paragraph
866 vEndOfParagraph(pDiag
,
869 (long)pOutput
->usFontSize
* 600);
876 /* Ignore when in a table */
879 if (bOutputContainsText(pAnchor
)) {
884 if (ulChar
== PAGE_BREAK
) {
885 vEndOfPage(pDiag
, lAfterIndentation
,
886 pSection
!= pSectionNext
);
888 vEndOfParagraph(pDiag
,
898 if (bStartFont
|| (bStartFontNext
&& ulChar
!= PAR_END
)) {
899 /* Begin of a font found */
901 /* bStartFont takes priority */
902 fail(pFontInfo
== NULL
);
903 pFontTmp
= pFontInfo
;
905 pFontTmp
= &tFontNext
;
907 bAllCapitals
= bIsCapitals(pFontTmp
->usFontStyle
);
908 bHiddenText
= bIsHidden(pFontTmp
->usFontStyle
);
909 bMarkDelText
= bIsMarkDel(pFontTmp
->usFontStyle
);
910 usTmp
= pFontTmp
->usFontStyle
&
911 (FONT_BOLD
|FONT_ITALIC
|FONT_UNDERLINE
|
912 FONT_STRIKE
|FONT_MARKDEL
|
913 FONT_SUPERSCRIPT
|FONT_SUBSCRIPT
);
915 (usFontSize
!= pFontTmp
->usFontSize
||
916 ucFontNumber
!= pFontTmp
->ucFontNumber
||
917 usFontStyleMinimal
!= usTmp
||
918 ucFontColor
!= pFontTmp
->ucFontColor
)) {
919 pOutput
= pStartNextOutput(pOutput
);
921 pOutput
->ucFontColor
= pFontTmp
->ucFontColor
;
922 pOutput
->usFontStyle
= pFontTmp
->usFontStyle
;
923 pOutput
->usFontSize
= pFontTmp
->usFontSize
;
924 pOutput
->tFontRef
= tOpenFont(
925 pFontTmp
->ucFontNumber
,
926 pFontTmp
->usFontStyle
,
927 pFontTmp
->usFontSize
);
928 fail(!bCheckDoubleLinkedList(pAnchor
));
930 ucFontNumber
= pFontTmp
->ucFontNumber
;
931 usFontSize
= pFontTmp
->usFontSize
;
932 ucFontColor
= pFontTmp
->ucFontColor
;
933 usFontStyle
= pFontTmp
->usFontStyle
;
934 usFontStyleMinimal
= usTmp
;
936 /* Get the next font info */
937 pFontInfo
= pGetNextFontInfoListItem(pFontInfo
);
938 NO_DBG_HEX_C(pFontInfo
!= NULL
,
939 pFontInfo
->ulFileOffset
);
940 DBG_MSG_C(pFontInfo
== NULL
, "No more fonts");
943 bStartFontNext
= FALSE
;
946 if (bStartStyle
|| (bStartStyleNext
&& ulChar
!= PAR_END
)) {
948 /* Begin of a style found */
950 /* bStartStyle takes priority */
951 fail(pStyleInfo
== NULL
);
952 pStyleTmp
= pStyleInfo
;
954 pStyleTmp
= &tStyleNext
;
957 vStoreStyle(pDiag
, pOutput
, pStyleTmp
);
959 usIstdNext
= pStyleTmp
->usIstdNext
;
961 lTwips2MilliPoints(pStyleTmp
->usBeforeIndent
);
963 lTwips2MilliPoints(pStyleTmp
->usAfterIndent
);
965 lTwips2MilliPoints(pStyleTmp
->sLeftIndent
);
967 lTwips2MilliPoints(pStyleTmp
->sLeftIndent1
);
969 lTwips2MilliPoints(pStyleTmp
->sRightIndent
);
970 bInList
= bStyleImpliesList(pStyleTmp
, iWordVersion
);
971 bNoMarks
= !bInList
|| pStyleTmp
->bNumPause
;
972 ucNFC
= pStyleTmp
->ucNFC
;
973 szListChar
= pStyleTmp
->szListChar
;
974 ucAlignment
= pStyleTmp
->ucAlignment
;
975 if (bInList
&& !bWasInList
) {
976 /* Start of a list */
978 vStartOfList(pDiag
, ucNFC
,
979 bWasTableRow
&& !bIsTableRow
);
981 if (!bInList
&& bWasInList
) {
985 bWasInList
= bInList
;
987 pStyleInfo
= pGetNextTextStyle(pStyleInfo
);
988 NO_DBG_HEX_C(pStyleInfo
!= NULL
,
989 pStyleInfo
->ulFileOffset
);
990 DBG_MSG_C(pStyleInfo
== NULL
,
994 bStartStyleNext
= FALSE
;
997 if (bWasEndOfParagraph
) {
998 vStartOfParagraph1(pDiag
, lBeforeIndentation
);
1002 lTotalStringWidth(pAnchor
) == 0) {
1004 usListNumber
= usGetListValue(iListSeqNumber
,
1008 if (bInList
&& bFirstLine
) {
1009 vStartOfListItem(pDiag
, bNoMarks
);
1011 vPutIndentation(pDiag
, pAnchor
, bNoMarks
, bFirstLine
,
1012 usListNumber
, ucNFC
, szListChar
,
1013 lLeftIndentation
, lLeftIndentation1
);
1015 /* One number or mark per paragraph will do */
1019 if (bWasEndOfParagraph
) {
1020 vStartOfParagraph2(pDiag
);
1021 bWasEndOfParagraph
= FALSE
;
1026 (void)memset(&tImage
, 0, sizeof(tImage
));
1027 eRes
= eExamineImage(pFile
, ulFileOffsetImage
, &tImage
);
1029 case image_no_information
:
1032 case image_minimal_information
:
1033 case image_full_information
:
1035 if (bOutputContainsText(pAnchor
)) {
1041 bSuccess
= bTranslateImage(pDiag
, pFile
,
1042 eRes
== image_minimal_information
,
1043 ulFileOffsetImage
, &tImage
);
1051 vStoreString("[pic]", 5, pOutput
);
1056 if (tOptions
.eConversionType
== conversion_xml
) {
1057 vStoreCharacter((ULONG
)FOOTNOTE_OR_ENDNOTE
,
1061 vStoreCharacter((ULONG
)'[', pOutput
);
1062 vStoreNumberAsDecimal(uiFootnoteNumber
, pOutput
);
1063 vStoreCharacter((ULONG
)']', pOutput
);
1067 vStoreCharacter((ULONG
)'[', pOutput
);
1068 vStoreNumberAsRoman(uiEndnoteNumber
, pOutput
);
1069 vStoreCharacter((ULONG
)']', pOutput
);
1071 case UNKNOWN_NOTE_CHAR
:
1072 vStoreString("[?]", 3, pOutput
);
1076 vStoreCharacter((ULONG
)'\n', pOutput
);
1079 if (bOutputContainsText(pAnchor
)) {
1082 vMove2NextLine(pDiag
,
1083 pOutput
->tFontRef
, pOutput
->usFontSize
);
1086 vEndOfParagraph(pDiag
,
1088 pOutput
->usFontSize
,
1090 bWasEndOfParagraph
= TRUE
;
1094 vStoreCharacter((ULONG
)'\n', pOutput
);
1097 if (bOutputContainsText(pAnchor
)) {
1100 vMove2NextLine(pDiag
,
1101 pOutput
->tFontRef
, pOutput
->usFontSize
);
1107 pSection
= pSectionNext
;
1109 case TABLE_SEPARATOR
:
1111 vStoreCharacter(ulChar
, pOutput
);
1114 vStoreCharacter((ULONG
)' ', pOutput
);
1115 vStoreCharacter((ULONG
)TABLE_SEPARATOR_CHAR
, pOutput
);
1119 tOptions
.eConversionType
== conversion_xml
) {
1120 vStoreCharacter((ULONG
)' ', pOutput
);
1123 if (tOptions
.iParagraphBreak
== 0 &&
1124 (tOptions
.eConversionType
== conversion_text
||
1125 tOptions
.eConversionType
== conversion_fmt_text
)) {
1126 /* No logical lines, so no tab expansion */
1127 vStoreCharacter(TAB
, pOutput
);
1130 lHalfSpaceWidth
= (lComputeSpaceWidth(
1132 pOutput
->usFontSize
) + 1) / 2;
1133 lTmp
= lTotalStringWidth(pAnchor
);
1134 lTmp
+= lDrawUnits2MilliPoints(pDiag
->lXleft
);
1135 lTmp
/= lDefaultTabWidth
;
1137 vStoreCharacter((ULONG
)FILLER_CHAR
, pOutput
);
1138 lWidthCurr
= lTotalStringWidth(pAnchor
);
1140 lDrawUnits2MilliPoints(pDiag
->lXleft
);
1141 } while (lTmp
== lWidthCurr
/ lDefaultTabWidth
&&
1142 lWidthCurr
< lWidthMax
+ lRightIndentation
);
1145 if (bHiddenText
&& tOptions
.bHideHiddenText
) {
1148 if (bMarkDelText
&& tOptions
.bRemoveRemovedText
) {
1151 if (ulChar
== UNICODE_ELLIPSIS
&&
1152 tOptions
.eEncoding
!= encoding_utf_8
) {
1153 vStoreString("...", 3, pOutput
);
1156 ulChar
= ulToUpper(ulChar
);
1158 vStoreCharacter(ulChar
, pOutput
);
1163 if (bWasTableRow
&& !bIsTableRow
) {
1164 /* End of a table */
1166 /* Resume normal font */
1167 NO_DBG_MSG("End of table font");
1169 bTableFontClosed
= TRUE
;
1170 pOutput
->ucFontColor
= ucFontColor
;
1171 pOutput
->usFontStyle
= usFontStyle
;
1172 pOutput
->usFontSize
= usFontSize
;
1173 pOutput
->tFontRef
= tOpenFont(
1174 ucFontNumber
, usFontStyle
, usFontSize
);
1176 bWasTableRow
= bIsTableRow
;
1179 fail(pAnchor
!= pOutput
);
1180 if (!bEndRowNorm
&& !bEndRowFast
) {
1183 /* End of a table row */
1185 fail(pRowInfo
== NULL
);
1186 vTableRow2Window(pDiag
, pAnchor
, pRowInfo
,
1187 tOptions
.eConversionType
,
1188 tOptions
.iParagraphBreak
);
1193 pAnchor
= pStartNewOutput(pAnchor
, NULL
);
1196 pRowInfo
= pGetNextRowInfoListItem();
1198 bIsTableRow
= FALSE
;
1199 bEndRowNorm
= FALSE
;
1200 bEndRowFast
= FALSE
;
1201 NO_DBG_HEX_C(pRowInfo
!= NULL
,
1202 pRowInfo
->ulFileOffsetStart
);
1203 NO_DBG_HEX_C(pRowInfo
!= NULL
,
1204 pRowInfo
->ulFileOffsetEnd
);
1207 lWidthCurr
= lTotalStringWidth(pAnchor
);
1208 lWidthCurr
+= lDrawUnits2MilliPoints(pDiag
->lXleft
);
1209 if (lWidthCurr
< lWidthMax
+ lRightIndentation
) {
1212 pLeftOver
= pSplitList(pAnchor
);
1213 vJustify2Window(pDiag
, pAnchor
,
1214 lWidthMax
, lRightIndentation
, ucAlignment
);
1215 pAnchor
= pStartNewOutput(pAnchor
, pLeftOver
);
1216 for (pOutput
= pAnchor
;
1217 pOutput
->pNext
!= NULL
;
1218 pOutput
= pOutput
->pNext
)
1220 fail(pOutput
== NULL
);
1221 if (lTotalStringWidth(pAnchor
) > 0) {
1222 vSetLeftIndentation(pDiag
, lLeftIndentation
);
1226 pAnchor
= pStartNewOutput(pAnchor
, NULL
);
1227 pAnchor
->szStorage
= xfree(pAnchor
->szStorage
);
1228 pAnchor
= xfree(pAnchor
);
1233 } /* end of bWordDecryptor */
1236 * lLastStringWidth - compute the width of the last part of the output string
1239 lLastStringWidth(const output_type
*pAnchor
)
1241 const output_type
*pCurr
, *pStart
;
1244 for (pCurr
= pAnchor
; pCurr
!= NULL
; pCurr
= pCurr
->pNext
) {
1245 if (pCurr
->tNextFree
== 1 &&
1246 (pCurr
->szStorage
[0] == PAR_END
||
1247 pCurr
->szStorage
[0] == HARD_RETURN
)) {
1248 /* Found a separator. Start after the separator */
1249 pStart
= pCurr
->pNext
;
1252 if (pStart
== NULL
) {
1253 /* No separators. Use the whole output string */
1256 return lTotalStringWidth(pStart
);
1257 } /* end of lLastStringWidth */
1260 * pHdrFtrDecryptor - turn a header/footer list element to something useful
1263 pHdrFtrDecryptor(FILE *pFile
, ULONG ulCharPosStart
, ULONG ulCharPosNext
)
1265 output_type
*pAnchor
, *pOutput
, *pLeftOver
;
1266 ULONG ulChar
, ulFileOffset
, ulCharPos
;
1267 long lWidthCurr
, lWidthMax
;
1268 long lRightIndentation
;
1273 fail(iWordVersion
< 0);
1274 fail(tOptions
.eConversionType
== conversion_unknown
);
1275 fail(tOptions
.eEncoding
== 0);
1277 if (ulCharPosStart
== ulCharPosNext
) {
1278 /* There are no bytes to decrypt */
1282 lRightIndentation
= 0;
1283 ucAlignment
= ALIGNMENT_LEFT
;
1285 lWidthMax
= lGetWidthMax(tOptions
.iParagraphBreak
);
1286 pAnchor
= pStartNewOutput(NULL
, NULL
);
1288 pOutput
->tFontRef
= tOpenFont(0, FONT_REGULAR
, DEFAULT_FONT_SIZE
);
1289 usChar
= usToHdrFtrPosition(pFile
, ulCharPosStart
);
1290 ulCharPos
= ulCharPosStart
;
1291 ulFileOffset
= ulCharPos2FileOffset(ulCharPos
);
1292 while (usChar
!= (USHORT
)EOF
&& ulCharPos
!= ulCharPosNext
) {
1293 /* Skip embedded characters */
1294 if (usChar
== START_EMBEDDED
) {
1296 } else if (usChar
== END_IGNORE
|| usChar
== END_EMBEDDED
) {
1299 /* Translate character */
1300 if (bSkip
|| usChar
== END_IGNORE
|| usChar
== END_EMBEDDED
) {
1301 ulChar
= IGNORE_CHARACTER
;
1303 ulChar
= ulTranslateCharacters(usChar
,
1306 tOptions
.eConversionType
,
1310 /* Process character */
1311 if (ulChar
!= IGNORE_CHARACTER
) {
1314 vStoreString("[pic]", 5, pOutput
);
1320 /* To the next substring */
1321 pOutput
= pStartNextOutput(pOutput
);
1323 pOutput
->tFontRef
= tOpenFont(0,
1324 FONT_REGULAR
, DEFAULT_FONT_SIZE
);
1325 /* A substring with just one character */
1326 if (ulChar
== HARD_RETURN
) {
1327 vStoreCharacter(HARD_RETURN
, pOutput
);
1329 vStoreCharacter(PAR_END
, pOutput
);
1331 /* To the next substring */
1332 pOutput
= pStartNextOutput(pOutput
);
1334 pOutput
->tFontRef
= tOpenFont(0,
1335 FONT_REGULAR
, DEFAULT_FONT_SIZE
);
1336 fail(!bCheckDoubleLinkedList(pAnchor
));
1338 case TABLE_SEPARATOR
:
1339 vStoreCharacter((ULONG
)' ', pOutput
);
1340 vStoreCharacter((ULONG
)TABLE_SEPARATOR_CHAR
,
1344 vStoreCharacter((ULONG
)FILLER_CHAR
, pOutput
);
1347 vStoreCharacter(ulChar
, pOutput
);
1351 lWidthCurr
= lLastStringWidth(pAnchor
);
1352 if (lWidthCurr
>= lWidthMax
+ lRightIndentation
) {
1353 pLeftOver
= pSplitList(pAnchor
);
1354 for (pOutput
= pAnchor
;
1355 pOutput
->pNext
!= NULL
;
1356 pOutput
= pOutput
->pNext
)
1358 fail(pOutput
== NULL
);
1359 /* To the next substring */
1360 pOutput
= pStartNextOutput(pOutput
);
1361 /* A substring with just one HARD_RETURN */
1362 vStoreCharacter(HARD_RETURN
, pOutput
);
1363 /* Put the leftover piece(s) at the end */
1364 pOutput
->pNext
= pLeftOver
;
1365 if (pLeftOver
!= NULL
) {
1366 pLeftOver
->pPrev
= pOutput
;
1368 fail(!bCheckDoubleLinkedList(pAnchor
));
1369 for (pOutput
= pAnchor
;
1370 pOutput
->pNext
!= NULL
;
1371 pOutput
= pOutput
->pNext
)
1373 fail(pOutput
== NULL
);
1375 usChar
= usNextChar(pFile
, hdrftr_list
,
1376 &ulFileOffset
, &ulCharPos
, NULL
);
1379 if (bOutputContainsText(pAnchor
)) {
1382 pAnchor
= pStartNewOutput(pAnchor
, NULL
);
1383 pAnchor
->szStorage
= xfree(pAnchor
->szStorage
);
1384 pAnchor
= xfree(pAnchor
);
1386 } /* end of pHdrFtrDecryptor */
1389 * pFootnoteDecryptor - turn a footnote text list element into text
1392 szFootnoteDecryptor(FILE *pFile
, ULONG ulCharPosStart
, ULONG ulCharPosNext
)
1395 ULONG ulChar
, ulFileOffset
, ulCharPos
;
1397 size_t tLen
, tIndex
, tNextFree
, tStorageSize
;
1401 fail(iWordVersion
< 0);
1402 fail(tOptions
.eConversionType
== conversion_unknown
);
1403 fail(tOptions
.eEncoding
== 0);
1405 if (ulCharPosStart
== ulCharPosNext
) {
1406 /* There are no bytes to decrypt */
1410 if (tOptions
.eConversionType
!= conversion_xml
) {
1411 /* Only implemented for XML output */
1417 /* Initialise the text buffer */
1418 tStorageSize
= INITIAL_SIZE
;
1419 szText
= xmalloc(tStorageSize
);
1421 szText
[tNextFree
] = '\0';
1423 /* Goto the start */
1424 usChar
= usToFootnotePosition(pFile
, ulCharPosStart
);
1425 ulCharPos
= ulCharPosStart
;
1426 ulFileOffset
= ulCharPos2FileOffset(ulCharPos
);
1427 /* Skip the unwanted starting characters */
1428 while (usChar
!= (USHORT
)EOF
&& ulCharPos
!= ulCharPosNext
&&
1429 (usChar
== FOOTNOTE_OR_ENDNOTE
||
1430 usChar
== PAR_END
||
1432 usChar
== (USHORT
)' ')) {
1433 usChar
= usNextChar(pFile
, footnote_list
,
1434 &ulFileOffset
, &ulCharPos
, NULL
);
1436 /* Process the footnote text */
1437 while (usChar
!= (USHORT
)EOF
&& ulCharPos
!= ulCharPosNext
) {
1438 /* Skip embedded characters */
1439 if (usChar
== START_EMBEDDED
) {
1441 } else if (usChar
== END_IGNORE
|| usChar
== END_EMBEDDED
) {
1444 /* Translate character */
1446 usChar
== END_IGNORE
||
1447 usChar
== END_EMBEDDED
||
1448 usChar
== FOOTNOTE_OR_ENDNOTE
) {
1449 ulChar
= IGNORE_CHARACTER
;
1451 ulChar
= ulTranslateCharacters(usChar
,
1454 tOptions
.eConversionType
,
1458 /* Process character */
1459 if (ulChar
== PICTURE
) {
1461 strcpy(szResult
, "[pic]");
1462 } else if (ulChar
== IGNORE_CHARACTER
) {
1471 ulChar
= (ULONG
)PAR_END
;
1474 ulChar
= (ULONG
)' ';
1479 tLen
= tUcs2Utf8(ulChar
, szResult
, sizeof(szResult
));
1481 /* Add the results to the text */
1482 if (tNextFree
+ tLen
+ 1 > tStorageSize
) {
1483 tStorageSize
+= EXTENTION_SIZE
;
1484 szText
= xrealloc(szText
, tStorageSize
);
1486 for (tIndex
= 0; tIndex
< tLen
; tIndex
++) {
1487 szText
[tNextFree
++] = szResult
[tIndex
];
1489 szText
[tNextFree
] = '\0';
1490 /* Next character */
1491 usChar
= usNextChar(pFile
, footnote_list
,
1492 &ulFileOffset
, &ulCharPos
, NULL
);
1494 /* Remove redundant spaces */
1495 while (tNextFree
!= 0 && szText
[tNextFree
- 1] == ' ') {
1496 szText
[tNextFree
- 1] = '\0';
1499 if (tNextFree
== 0) {
1501 szText
= xfree(szText
);
1505 } /* end of szFootnoteDecryptor */