crashtesting: assert on reimport of docx export of ooo102874-2.doc
[LibreOffice.git] / sw / source / filter / html / swhtml.hxx
blob1f50803633dc4407d23cfd7575576daf6aa26d7c
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #ifndef INCLUDED_SW_SOURCE_FILTER_HTML_SWHTML_HXX
20 #define INCLUDED_SW_SOURCE_FILTER_HTML_SWHTML_HXX
22 #include <config_java.h>
24 #include <sfx2/sfxhtml.hxx>
25 #include <svl/listener.hxx>
26 #include <svl/macitem.hxx>
27 #include <svtools/htmltokn.h>
28 #include <editeng/svxenum.hxx>
29 #include <rtl/ref.hxx>
30 #include <rtl/ustrbuf.hxx>
31 #include <deletelistener.hxx>
32 #include <fmtftn.hxx>
33 #include <fltshell.hxx>
34 #include <swtypes.hxx>
35 #include <txtftn.hxx>
36 #include <com/sun/star/drawing/XShape.hpp>
37 #include <com/sun/star/form/XFormComponent.hpp>
38 #include <com/sun/star/beans/XPropertySet.hpp>
40 #include <memory>
41 #include <utility>
42 #include <vector>
43 #include <deque>
44 #include <stack>
45 #include <set>
47 class SfxMedium;
48 class SfxViewFrame;
49 class SdrObject;
50 class SvxMacroTableDtor;
51 class SwDoc;
52 class SwPaM;
53 class SwViewShell;
54 class SwStartNode;
55 class SwFormatColl;
56 class SwField;
57 class SwHTMLForm_Impl;
58 class SwApplet_Impl;
59 struct SwHTMLFootEndNote_Impl;
60 class HTMLTableCnts;
61 struct SwPending;
62 class SvxCSS1PropertyInfo;
63 struct ImplSVEvent;
65 constexpr tools::Long HTML_CJK_PARSPACE = o3tl::toTwips(25, o3tl::Length::mm10); // 2.5mm
66 constexpr tools::Long HTML_CTL_PARSPACE = o3tl::toTwips(25, o3tl::Length::mm10); // 2.5mm
68 constexpr tools::Long HTML_DFLT_IMG_WIDTH = o3tl::toTwips(2, o3tl::Length::cm); // 2cm
69 constexpr tools::Long HTML_DFLT_IMG_HEIGHT = o3tl::toTwips(1, o3tl::Length::cm); // 1cm
71 // some things you often need
72 extern HTMLOptionEnum<SvxAdjust> const aHTMLPAlignTable[];
73 extern HTMLOptionEnum<sal_Int16> const aHTMLImgHAlignTable[];
74 extern HTMLOptionEnum<sal_Int16> const aHTMLImgVAlignTable[];
76 // attribute stack:
78 class HTMLAttr;
79 typedef std::deque<HTMLAttr *> HTMLAttrs;
81 // Table of attributes: The order here is important: The attributes in the
82 // beginning of the table will set first in EndAllAttrs.
83 struct HTMLAttrTable
85 HTMLAttr* pKeep; // frame attributes
86 HTMLAttr* pBox;
87 HTMLAttr* pBrush;
88 HTMLAttr* pBreak;
89 HTMLAttr* pPageDesc;
91 HTMLAttr* pFirstLineIndent; // paragraph attributes
92 HTMLAttr* pTextLeftMargin;
93 HTMLAttr* pRightMargin;
94 HTMLAttr* pULSpace;
95 HTMLAttr* pLineSpacing;
96 HTMLAttr* pAdjust;
97 HTMLAttr* pDropCap;
98 HTMLAttr* pSplit;
99 HTMLAttr* pWidows;
100 HTMLAttr* pOrphans;
101 HTMLAttr* pDirection;
103 HTMLAttr* pCharFormats; // text attributes
104 HTMLAttr* pINetFormat;
106 HTMLAttr* pBold; // character attributes
107 HTMLAttr* pBoldCJK;
108 HTMLAttr* pBoldCTL;
109 HTMLAttr* pItalic;
110 HTMLAttr* pItalicCJK;
111 HTMLAttr* pItalicCTL;
112 HTMLAttr* pStrike;
113 HTMLAttr* pUnderline;
114 HTMLAttr* pBlink;
115 HTMLAttr* pFont;
116 HTMLAttr* pFontCJK;
117 HTMLAttr* pFontCTL;
118 HTMLAttr* pFontHeight;
119 HTMLAttr* pFontHeightCJK;
120 HTMLAttr* pFontHeightCTL;
121 HTMLAttr* pFontColor;
122 HTMLAttr* pEscapement;
123 HTMLAttr* pCaseMap;
124 HTMLAttr* pKerning; // (only for SPACER)
125 HTMLAttr* pCharBrush; // character background
126 HTMLAttr* pLanguage;
127 HTMLAttr* pLanguageCJK;
128 HTMLAttr* pLanguageCTL;
129 HTMLAttr* pCharBox;
132 class HTMLAttr
134 friend class SwHTMLParser;
135 friend class CellSaveStruct;
137 SwNodeIndex m_nStartPara;
138 SwNodeIndex m_nEndPara;
139 sal_Int32 m_nStartContent;
140 sal_Int32 m_nEndContent;
141 bool m_bInsAtStart : 1;
142 bool m_bLikePara : 1; // set attribute above the whole paragraph
143 bool m_bValid : 1; // is the attribute valid?
145 std::unique_ptr<SfxPoolItem> m_pItem;
146 std::shared_ptr<HTMLAttrTable> m_xAttrTab;
147 HTMLAttr *m_pNext; // still to close attributes with different values
148 HTMLAttr *m_pPrev; // already closed but not set attributes
149 HTMLAttr **m_ppHead; // list head
151 HTMLAttr( const SwPosition& rPos, const SfxPoolItem& rItem,
152 HTMLAttr **pHd, std::shared_ptr<HTMLAttrTable> xAttrTab );
154 HTMLAttr( const HTMLAttr &rAttr, const SwNode &rEndPara,
155 sal_Int32 nEndCnt, HTMLAttr **pHd, std::shared_ptr<HTMLAttrTable> xAttrTab );
157 public:
159 ~HTMLAttr();
161 HTMLAttr *Clone( const SwNode& rEndPara, sal_Int32 nEndCnt ) const;
162 void Reset( const SwNode& rSttPara, sal_Int32 nSttCnt,
163 HTMLAttr **pHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab );
164 inline void SetStart( const SwPosition& rPos );
166 SwNodeOffset GetStartParagraphIdx() const { return m_nStartPara.GetIndex(); }
167 SwNodeOffset GetEndParagraphIdx() const { return m_nEndPara.GetIndex(); }
169 const SwNodeIndex& GetStartParagraph() const { return m_nStartPara; }
170 const SwNodeIndex& GetEndParagraph() const { return m_nEndPara; }
172 sal_Int32 GetStartContent() const { return m_nStartContent; }
173 sal_Int32 GetEndContent() const { return m_nEndContent; }
175 bool IsLikePara() const { return m_bLikePara; }
176 void SetLikePara() { m_bLikePara = true; }
178 SfxPoolItem& GetItem() { return *m_pItem; }
179 const SfxPoolItem& GetItem() const { return *m_pItem; }
181 HTMLAttr *GetNext() const { return m_pNext; }
182 void InsertNext( HTMLAttr *pNxt ) { m_pNext = pNxt; }
184 HTMLAttr *GetPrev() const { return m_pPrev; }
185 void InsertPrev( HTMLAttr *pPrv );
186 void ClearPrev() { m_pPrev = nullptr; }
188 void SetHead(HTMLAttr **ppHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab)
190 m_ppHead = ppHd;
191 m_xAttrTab = rAttrTab;
194 // During setting attributes from styles it can happen that these
195 // shouldn't be set anymore. To delete them would be very expensive, because
196 // you don't know all the places where they are linked in. Therefore they're
197 // made invalid and deleted at the next call of SetAttr_().
198 void Invalidate() { m_bValid = false; }
201 class HTMLAttrContext_SaveDoc;
203 enum SwHTMLAppendMode {
204 AM_NORMAL, // no paragraph spacing handling
205 AM_NOSPACE, // set spacing hard to 0cm
206 AM_SPACE, // set spacing hard to 0.5cm
207 AM_SOFTNOSPACE, // don't set spacing, but save 0cm
208 AM_NONE // no append
211 class HTMLAttrContext
213 HTMLAttrs m_aAttrs; // the attributes created in the context
215 OUString m_aClass; // context class
217 std::unique_ptr<HTMLAttrContext_SaveDoc> m_pSaveDocContext;
218 std::unique_ptr<SfxItemSet> m_pFrameItemSet;
220 HtmlTokenId m_nToken; // the token of the context
222 sal_uInt16 m_nTextFormatColl; // a style created in the context or zero
224 sal_uInt16 m_nLeftMargin; // a changed left border
225 sal_uInt16 m_nRightMargin; // a changed right border
226 sal_uInt16 m_nFirstLineIndent; // a changed first line indent
228 sal_uInt16 m_nUpperSpace;
229 sal_uInt16 m_nLowerSpace;
231 SwHTMLAppendMode m_eAppend;
233 bool m_bLRSpaceChanged : 1; // left/right border, changed indent?
234 bool m_bULSpaceChanged : 1; // top/bottom border changed?
235 bool m_bDefaultTextFormatColl : 1;// nTextFormatColl is only default
236 bool m_bSpansSection : 1; // the context opens a SwSection
237 bool m_bPopStack : 1; // delete above stack elements
238 bool m_bFinishPREListingXMP : 1;
239 bool m_bRestartPRE : 1;
240 bool m_bRestartXMP : 1;
241 bool m_bRestartListing : 1;
242 bool m_bHeaderOrFooter : 1;
244 bool m_bVisible = true;
246 public:
247 void ClearSaveDocContext();
249 HTMLAttrContext( HtmlTokenId nTokn, sal_uInt16 nPoolId, OUString aClass,
250 bool bDfltColl=false );
251 explicit HTMLAttrContext( HtmlTokenId nTokn );
252 ~HTMLAttrContext();
254 HtmlTokenId GetToken() const { return m_nToken; }
256 sal_uInt16 GetTextFormatColl() const { return m_bDefaultTextFormatColl ? 0 : m_nTextFormatColl; }
257 sal_uInt16 GetDefaultTextFormatColl() const { return m_bDefaultTextFormatColl ? m_nTextFormatColl : 0; }
259 const OUString& GetClass() const { return m_aClass; }
261 inline void SetMargins( sal_uInt16 nLeft, sal_uInt16 nRight, short nIndent );
263 bool IsLRSpaceChanged() const { return m_bLRSpaceChanged; }
264 inline void GetMargins( sal_uInt16& nLeft, sal_uInt16& nRight,
265 short &nIndent ) const;
267 inline void SetULSpace( sal_uInt16 nUpper, sal_uInt16 nLower );
268 bool IsULSpaceChanged() const { return m_bULSpaceChanged; }
269 inline void GetULSpace( sal_uInt16& rUpper, sal_uInt16& rLower ) const;
271 bool HasAttrs() const { return !m_aAttrs.empty(); }
272 const HTMLAttrs& GetAttrs() const { return m_aAttrs; }
273 HTMLAttrs& GetAttrs() { return m_aAttrs; }
275 void SetSpansSection( bool bSet ) { m_bSpansSection = bSet; }
276 bool GetSpansSection() const { return m_bSpansSection; }
278 void SetPopStack( bool bSet ) { m_bPopStack = bSet; }
279 bool GetPopStack() const { return m_bPopStack; }
281 bool HasSaveDocContext() const { return m_pSaveDocContext!=nullptr; }
282 HTMLAttrContext_SaveDoc *GetSaveDocContext( bool bCreate=false );
284 const SfxItemSet *GetFrameItemSet() const { return m_pFrameItemSet.get(); }
285 SfxItemSet *GetFrameItemSet( SwDoc *pCreateDoc );
287 void SetFinishPREListingXMP( bool bSet ) { m_bFinishPREListingXMP = bSet; }
288 bool IsFinishPREListingXMP() const { return m_bFinishPREListingXMP; }
290 void SetRestartPRE( bool bSet ) { m_bRestartPRE = bSet; }
291 bool IsRestartPRE() const { return m_bRestartPRE; }
293 void SetRestartXMP( bool bSet ) { m_bRestartXMP = bSet; }
294 bool IsRestartXMP() const { return m_bRestartXMP; }
296 void SetRestartListing( bool bSet ) { m_bRestartListing = bSet; }
297 bool IsRestartListing() const { return m_bRestartListing; }
299 void SetHeaderOrFooter( bool bSet ) { m_bHeaderOrFooter = bSet; }
300 bool IsHeaderOrFooter() const { return m_bHeaderOrFooter; }
302 void SetAppendMode( SwHTMLAppendMode eMode ) { m_eAppend = eMode; }
303 SwHTMLAppendMode GetAppendMode() const { return m_eAppend; }
305 void SetVisible(bool bVisible) { m_bVisible = bVisible; }
306 bool IsVisible() const { return m_bVisible; }
309 typedef std::vector<std::unique_ptr<HTMLAttrContext>> HTMLAttrContexts;
311 class HTMLTable;
312 class SwCSS1Parser;
313 class SwHTMLNumRuleInfo;
315 typedef std::vector<std::unique_ptr<ImageMap>> ImageMaps;
317 enum class HtmlContextFlags {
318 ProtectStack = 0x0001,
319 StripPara = 0x0002,
320 KeepNumrule = 0x0004,
321 HeaderDist = 0x0008,
322 FooterDist = 0x0010,
323 KeepAttrs = 0x0020,
324 MultiColMask = StripPara | KeepNumrule | KeepAttrs // for headers, footers or footnotes
326 namespace o3tl
328 template<> struct typed_flags<HtmlContextFlags> : is_typed_flags<HtmlContextFlags, 0x03f> {};
331 enum class HtmlFrameFormatFlags {
332 Box = 0x0001,
333 Background = 0x0002,
334 Padding = 0x0004,
335 Direction = 0x0008,
337 namespace o3tl
339 template<> struct typed_flags<HtmlFrameFormatFlags> : is_typed_flags<HtmlFrameFormatFlags, 0x0f> {};
342 class SwHTMLFrameFormatListener : public SvtListener
344 SwFrameFormat* m_pFrameFormat;
345 public:
346 SwHTMLFrameFormatListener(SwFrameFormat* pFrameFormat);
347 SwFrameFormat* GetFrameFormat() { return m_pFrameFormat; }
348 virtual void Notify(const SfxHint&) override;
351 class SwHTMLParser : public SfxHTMLParser, public SvtListener
353 friend class SectionSaveStruct;
354 friend class CellSaveStruct;
355 friend class CaptionSaveStruct;
358 Progress bar
360 std::unique_ptr<ImportProgress> m_xProgress;
362 OUString m_aPathToFile;
363 OUString m_sBaseURL;
364 OUString m_aBasicLib;
365 OUString m_aBasicModule;
366 OUString m_aScriptSource; // content of the current script block
367 OUString m_aScriptType; // type of read script (StarBasic/VB/JAVA)
368 OUString m_aScriptURL; // script URL
369 OUString m_aStyleSource; // content of current style sheet
370 OUString m_aContents; // text of current marquee, field and so
371 OUStringBuffer m_sTitle;
372 OUString m_aUnknownToken; // a started unknown token
373 OUString m_aBulletGrfs[MAXLEVEL];
374 OUString m_sJmpMark;
376 std::vector<sal_uInt16> m_aBaseFontStack; // stack for <BASEFONT>
377 // Bit 0-2: font size (1-7)
378 std::vector<sal_uInt16> m_aFontStack; // stack for <FONT>, <BIG>, <SMALL>
379 // Bit 0-2: font size (1-7)
380 // Bit 15: font colour was set
382 HTMLAttrs m_aSetAttrTab;// "closed", not set attributes
383 HTMLAttrs m_aParaAttrs; // temporary paragraph attributes
384 std::shared_ptr<HTMLAttrTable> m_xAttrTab; // "open" attributes
385 HTMLAttrContexts m_aContexts;// the current context of attribute/token
386 std::vector<std::unique_ptr<SwHTMLFrameFormatListener>> m_aMoveFlyFrames;// Fly-Frames, the anchor is moved
387 std::deque<sal_Int32> m_aMoveFlyCnts;// and the Content-Positions
388 //stray SwTableBoxes which need to be deleted to avoid leaking, but hold
389 //onto them until parsing is done
390 std::vector<std::unique_ptr<SwTableBox>> m_aOrphanedTableBoxes;
392 std::unique_ptr<SwApplet_Impl> m_pAppletImpl; // current applet
394 std::unique_ptr<SwCSS1Parser> m_pCSS1Parser; // Style-Sheet-Parser
395 std::unique_ptr<SwHTMLNumRuleInfo> m_pNumRuleInfo;
396 std::vector<SwPending> m_vPendingStack;
398 rtl::Reference<SwDoc> m_xDoc;
399 SwPaM *m_pPam; // SwPosition should be enough, or ??
400 SwViewShell *m_pActionViewShell; // SwViewShell, where StartAction was called
401 SwNodeIndex *m_pSttNdIdx;
403 std::vector<HTMLTable*> m_aTables;
404 std::shared_ptr<HTMLTable> m_xTable; // current "outermost" table
405 SwHTMLForm_Impl* m_pFormImpl; // current form
406 rtl::Reference<SdrTextObj> m_pMarquee; // current marquee
407 std::unique_ptr<SwField> m_xField; // current field
408 ImageMap *m_pImageMap; // current image map
409 std::unique_ptr<ImageMaps> m_pImageMaps; ///< all Image-Maps that have been read
410 std::unique_ptr<SwHTMLFootEndNote_Impl> m_pFootEndNoteImpl;
412 Size m_aHTMLPageSize; // page size of HTML template
414 sal_uInt32 m_aFontHeights[7]; // font heights 1-7
415 ImplSVEvent * m_nEventId;
417 sal_uInt16 m_nBaseFontStMin;
418 sal_uInt16 m_nFontStMin;
419 sal_uInt16 m_nDefListDeep;
420 sal_uInt16 m_nFontStHeadStart; // elements in font stack at <Hn>
421 sal_uInt16 m_nSBModuleCnt; // counter for basic modules
422 sal_uInt16 m_nMissingImgMaps; // How many image maps are still missing?
423 size_t m_nParaCnt;
424 size_t m_nContextStMin; // lower limit of PopContext
425 size_t m_nContextStAttrMin; // lower limit of attributes
426 sal_uInt16 m_nSelectEntryCnt; // Number of entries in the actual listbox
427 HtmlTokenId m_nOpenParaToken; // opened paragraph element
429 enum class JumpToMarks { NONE, Mark, Table, Region, Graphic };
430 JumpToMarks m_eJumpTo;
432 #ifdef DBG_UTIL
433 sal_uInt16 m_nContinue; // depth of Continue calls
434 #endif
436 SvxAdjust m_eParaAdjust; // adjustment of current paragraph
437 HTMLScriptLanguage m_eScriptLang; // current script language
439 bool m_bOldIsHTMLMode : 1; // Was it a HTML document?
441 bool m_bDocInitialized : 1; // document resp. shell was initialize
442 // flag to prevent double init via recursion
443 bool m_bViewCreated : 1; // the view was already created (asynchronous)
444 bool m_bSetModEnabled : 1;
446 bool m_bInFloatingFrame : 1; // We are in a floating frame
447 bool m_bInField : 1;
448 bool m_bKeepUnknown : 1; // handle unknown/not supported tokens
449 // 8
450 bool m_bCallNextToken : 1; // In tables: call NextToken in any case
451 bool m_bIgnoreRawData : 1; // ignore content of script/style
452 bool m_bLBEntrySelected : 1; // Is the current option selected?
453 bool m_bTAIgnoreNewPara : 1; // ignore next LF in text area?
454 bool m_bFixMarqueeWidth : 1; // Change size of marquee?
456 bool m_bUpperSpace : 1; // top paragraph spacing is needed
457 bool m_bNoParSpace : 1;
458 // 16
460 bool m_bInNoEmbed : 1; // we are in a NOEMBED area
462 bool m_bInTitle : 1; // we are in title
464 bool m_bChkJumpMark : 1; // maybe jump to predetermined mark
465 bool m_bUpdateDocStat : 1;
466 bool m_bFixSelectWidth : 1; // Set new width of select?
467 bool m_bTextArea : 1;
468 // 24
469 bool m_bSelect : 1;
470 bool m_bInFootEndNoteAnchor : 1;
471 bool m_bInFootEndNoteSymbol : 1;
472 bool m_bIgnoreHTMLComments : 1;
473 bool m_bRemoveHidden : 1; // the filter implementation might set the hidden flag
475 bool m_bBodySeen : 1;
476 bool m_bReadingHeaderOrFooter : 1;
477 bool m_bNotifyMacroEventRead : 1;
478 bool m_isInTableStructure;
480 int m_nTableDepth;
481 int m_nFloatingFrames;
482 int m_nListItems;
484 /// the names corresponding to the DOCINFO field subtypes INFO[1-4]
485 OUString m_InfoNames[4];
487 SfxViewFrame* m_pTempViewFrame;
489 bool m_bXHTML = false;
490 bool m_bReqIF = false;
493 * Non-owning pointers to already inserted OLE nodes, matching opened
494 * <object> XHTML elements.
496 std::stack<SwOLENode*> m_aEmbeds;
498 std::set<OUString> m_aAllowedRTFOLEMimeTypes;
500 /// This is the URL of the outer <object> data if it's not OLE2 or an image.
501 OUString m_aEmbedURL;
503 std::unique_ptr<SfxItemSet> m_pTargetCharAttrs;
505 void DeleteFormImpl();
507 void DocumentDetected();
508 void Show();
509 void ShowStatline();
510 SwViewShell *CallStartAction( SwViewShell *pVSh = nullptr, bool bChkPtr = true );
511 SwViewShell *CallEndAction( bool bChkAction = false, bool bChkPtr = true );
512 SwViewShell *CheckActionViewShell();
514 DECL_LINK( AsyncCallback, void*, void );
516 // set attribute on document
517 void SetAttr_( bool bChkEnd, bool bBeforeTable, std::deque<std::unique_ptr<HTMLAttr>> *pPostIts );
518 void SetAttr( bool bChkEnd = true, bool bBeforeTable = false,
519 std::deque<std::unique_ptr<HTMLAttr>> *pPostIts = nullptr )
521 if( !m_aSetAttrTab.empty() || !m_aMoveFlyFrames.empty() )
522 SetAttr_( bChkEnd, bBeforeTable, pPostIts );
525 HTMLAttr **GetAttrTabEntry( sal_uInt16 nWhich );
527 // create a new text node on PaM position
528 bool AppendTextNode( SwHTMLAppendMode eMode=AM_NORMAL, bool bUpdateNum=true );
529 void AddParSpace();
531 // start/end an attribute
532 // ppDepAttr indicated an attribute table entry, which attribute has to be
533 // set, before the attribute is closed
534 void NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTab, HTMLAttr **ppAttr, const SfxPoolItem& rItem);
535 bool EndAttr( HTMLAttr *pAttr, bool bChkEmpty=true );
536 void DeleteAttr( HTMLAttr* pAttr );
538 void EndContextAttrs( HTMLAttrContext *pContext );
539 void SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab);
540 void SplitAttrTab( const SwPosition& rNewPos );
541 void SplitAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab, bool bMoveEndBack);
542 void RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab);
543 void InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart );
544 void InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs );
546 bool DoPositioning( SfxItemSet &rItemSet,
547 SvxCSS1PropertyInfo &rPropInfo,
548 HTMLAttrContext *pContext );
549 bool CreateContainer( std::u16string_view rClass, SfxItemSet &rItemSet,
550 SvxCSS1PropertyInfo &rPropInfo,
551 HTMLAttrContext *pContext );
552 bool EndSection( bool bLFStripped=false );
554 void InsertAttrs( SfxItemSet &rItemSet, SvxCSS1PropertyInfo const &rPropInfo,
555 HTMLAttrContext *pContext, bool bCharLvl=false );
556 void InsertAttr( HTMLAttr **ppAttr, const SfxPoolItem & rItem,
557 HTMLAttrContext *pCntxt );
558 void SplitPREListingXMP( HTMLAttrContext *pCntxt );
559 void FixHeaderFooterDistance( bool bHeader, const SwPosition *pOldPos );
561 void EndContext( HTMLAttrContext *pContext );
562 void ClearContext( HTMLAttrContext *pContext );
564 const SwFormatColl *GetCurrFormatColl() const;
566 SwTwips GetCurrentBrowseWidth();
568 SwHTMLNumRuleInfo& GetNumInfo() { return *m_pNumRuleInfo; }
569 // add parameter <bCountedInList>
570 void SetNodeNum( sal_uInt8 nLevel );
572 // Manage paragraph styles
574 // set the style resp. its attributes on the stack
575 void SetTextCollAttrs( HTMLAttrContext *pContext = nullptr );
577 void InsertParaAttrs( const SfxItemSet& rItemSet );
579 // Manage attribute context
581 // save current context
582 void PushContext(std::unique_ptr<HTMLAttrContext>& rCntxt)
584 m_aContexts.push_back(std::move(rCntxt));
587 // Fetch top/specified context but not outside the context with token
588 // nLimit. If bRemove set then remove it.
589 std::unique_ptr<HTMLAttrContext> PopContext(HtmlTokenId nToken = HtmlTokenId::NONE);
591 void GetMarginsFromContext( sal_uInt16 &nLeft, sal_uInt16 &nRight, short& nIndent,
592 bool bIgnoreCurrent=false ) const;
593 void GetMarginsFromContextWithNumberBullet( sal_uInt16 &nLeft, sal_uInt16 &nRight,
594 short& nIndent ) const;
595 void GetULSpaceFromContext( sal_uInt16 &rUpper, sal_uInt16 &rLower ) const;
597 void MovePageDescAttrs( SwNode *pSrcNd, SwNodeOffset nDestIdx, bool bFormatBreak );
599 // Handling of tags at paragraph level
601 // <P> and <H1> to <H6>
602 void NewPara();
603 void EndPara( bool bReal = false );
604 void NewHeading( HtmlTokenId nToken );
605 void EndHeading();
607 // <ADDRESS>, <BLOCKQUOTE> and <PRE>
608 void NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nPoolId );
609 void EndTextFormatColl( HtmlTokenId nToken );
611 // <DIV> and <CENTER>
612 void NewDivision( HtmlTokenId nToken );
613 void EndDivision();
615 // insert/close Fly-Frames
616 void InsertFlyFrame( const SfxItemSet& rItemSet, HTMLAttrContext *pCntxt,
617 const OUString& rId );
619 void SaveDocContext( HTMLAttrContext *pCntxt, HtmlContextFlags nFlags,
620 const SwPosition *pNewPos );
621 void RestoreDocContext( HTMLAttrContext *pCntxt );
623 // end all opened <DIV> areas
624 bool EndSections( bool bLFStripped );
626 // <MULTICOL>
627 void NewMultiCol( sal_uInt16 columnsFromCss=0 );
629 // <MARQUEE>
630 void NewMarquee( HTMLTable *pCurTable=nullptr );
631 void EndMarquee();
632 void InsertMarqueeText();
634 // Handling of lists
636 // order list <OL> and unordered list <UL> with <LI>
637 void NewNumberBulletList( HtmlTokenId nToken );
638 void EndNumberBulletList( HtmlTokenId nToken = HtmlTokenId::NONE );
639 void NewNumberBulletListItem( HtmlTokenId nToken );
640 void EndNumberBulletListItem( HtmlTokenId nToken, bool bSetColl);
642 // definitions lists <DL> with <DD>, <DT>
643 void NewDefList();
644 void EndDefList();
645 void NewDefListItem( HtmlTokenId nToken );
646 void EndDefListItem( HtmlTokenId nToken = HtmlTokenId::NONE );
648 // Handling of tags on character level
650 // handle tags like <B>, <I> and so, which enable/disable a certain
651 // attribute or like <SPAN> get attributes from styles
652 void NewStdAttr( HtmlTokenId nToken );
653 void NewStdAttr( HtmlTokenId nToken,
654 HTMLAttr **ppAttr, const SfxPoolItem & rItem,
655 HTMLAttr **ppAttr2=nullptr, const SfxPoolItem *pItem2=nullptr,
656 HTMLAttr **ppAttr3=nullptr, const SfxPoolItem *pItem3=nullptr );
657 void EndTag( HtmlTokenId nToken );
659 // handle font attributes
660 void NewBasefontAttr(); // for <BASEFONT>
661 void EndBasefontAttr();
662 void NewFontAttr( HtmlTokenId nToken ); // for <FONT>, <BIG> and <SMALL>
663 void EndFontAttr( HtmlTokenId nToken );
665 // tags realized via character styles
666 void NewCharFormat( HtmlTokenId nToken );
668 void DeleteSection(SwStartNode* pSttNd);
670 // <SDFIELD>
671 public:
672 static SvxNumType GetNumType( std::u16string_view rStr, SvxNumType eDfltType );
673 private:
674 void NewField();
675 void EndField();
676 void InsertFieldText();
678 // <SPACER>
679 void InsertSpacer();
681 // Inserting graphics, plug-ins and applets
683 // search image maps and link with graphic nodes
684 ImageMap *FindImageMap( std::u16string_view rURL ) const;
685 void ConnectImageMaps();
687 // find anchor of Fly-Frames and set corresponding attributes
688 // in Attrset (htmlgrin.cxx)
689 void SetAnchorAndAdjustment( sal_Int16 eVertOri,
690 sal_Int16 eHoriOri,
691 const SvxCSS1PropertyInfo &rPropInfo,
692 SfxItemSet& rFrameSet );
693 void SetAnchorAndAdjustment( sal_Int16 eVertOri,
694 sal_Int16 eHoriOri,
695 SfxItemSet& rFrameSet,
696 bool bDontAppend=false );
697 void SetAnchorAndAdjustment( const SvxCSS1PropertyInfo &rPropInfo,
698 SfxItemSet &rFrameItemSet );
700 static void SetFrameFormatAttrs( SfxItemSet &rItemSet,
701 HtmlFrameFormatFlags nFlags, SfxItemSet &rFrameItemSet );
703 // create frames and register auto bound frames
704 void RegisterFlyFrame( SwFrameFormat *pFlyFrame );
706 // Adjust the size of the Fly-Frames to requirements and conditions
707 // (not for graphics, therefore htmlplug.cxx)
708 static void SetFixSize( const Size& rPixSize, const Size& rTwipDfltSize,
709 bool bPercentWidth, bool bPercentHeight,
710 SvxCSS1PropertyInfo const &rPropInfo,
711 SfxItemSet& rFlyItemSet );
712 static void SetVarSize( SvxCSS1PropertyInfo const &rPropInfo,
713 SfxItemSet& rFlyItemSet, SwTwips nDfltWidth=MINLAY,
714 sal_uInt8 nDefaultPercentWidth=0 );
715 static void SetSpace( const Size& rPixSpace, SfxItemSet &rItemSet,
716 SvxCSS1PropertyInfo &rPropInfo, SfxItemSet& rFlyItemSet );
718 sal_uInt16 IncGrfsThatResizeTable();
720 void GetDefaultScriptType( ScriptType& rType,
721 OUString& rTypeStr ) const;
723 // the actual insert methods for <IMG>, <EMBED>, <APPLET> and <PARAM>
724 void InsertImage(); // htmlgrin.cxx
725 bool InsertEmbed(); // htmlplug.cxx
727 #if HAVE_FEATURE_JAVA
728 void NewObject(); // htmlplug.cxx
729 #endif
730 void EndObject(); // link CommandLine with applet (htmlplug.cxx)
731 #if HAVE_FEATURE_JAVA
732 void InsertApplet(); // htmlplug.cxx
733 #endif
734 void EndApplet(); // link CommandLine with applet (htmlplug.cxx)
735 void InsertParam(); // htmlplug.cxx
737 void InsertFloatingFrame();
739 // parse <BODY>-tag: set background graphic and background colour (htmlgrin.cxx)
740 void InsertBodyOptions();
742 // Inserting links and bookmarks (htmlgrin.cxx)
744 // parse <A>-tag: insert a link resp. bookmark
745 void NewAnchor();
746 void EndAnchor();
748 // insert bookmark
749 void InsertBookmark( const OUString& rName );
751 void InsertCommentText( std::string_view pTag );
752 void InsertComment( const OUString& rName, std::string_view pTag = {} );
754 // Has the current paragraph bookmarks?
755 bool HasCurrentParaBookmarks( bool bIgnoreStack=false ) const;
757 // Inserting script/basic elements
759 // parse the last read basic module (htmlbas.cxx)
760 void NewScript();
761 void EndScript();
763 void AddScriptSource();
765 // insert event in SFX configuration (htmlbas.cxx)
766 void InsertBasicDocEvent( const OUString& aEventName, const OUString& rName,
767 ScriptType eScrType, const OUString& rScrType );
769 // Inserting styles
771 // <STYLE>
772 void NewStyle();
773 void EndStyle();
775 static inline bool HasStyleOptions( std::u16string_view rStyle, std::u16string_view rId,
776 std::u16string_view rClass, const OUString *pLang=nullptr,
777 const OUString *pDir=nullptr );
778 bool ParseStyleOptions( const OUString &rStyle, const OUString &rId,
779 const OUString &rClass, SfxItemSet &rItemSet,
780 SvxCSS1PropertyInfo &rPropInfo,
781 const OUString *pLang=nullptr, const OUString *pDir=nullptr );
783 // Inserting Controls and Forms (htmlform.cxx)
785 // Insert draw object into document
786 void InsertDrawObject( SdrObject* pNewDrawObj, const Size& rSpace,
787 sal_Int16 eVertOri,
788 sal_Int16 eHoriOri,
789 SfxItemSet& rCSS1ItemSet,
790 SvxCSS1PropertyInfo& rCSS1PropInfo );
791 css::uno::Reference< css::drawing::XShape > InsertControl(
792 const css::uno::Reference< css::form::XFormComponent > & rFormComp,
793 const css::uno::Reference< css::beans::XPropertySet > & rFCompPropSet,
794 const Size& rSize,
795 sal_Int16 eVertOri,
796 sal_Int16 eHoriOri,
797 SfxItemSet& rCSS1ItemSet,
798 SvxCSS1PropertyInfo& rCSS1PropInfo,
799 const SvxMacroTableDtor& rMacroTable,
800 const std::vector<OUString>& rUnoMacroTable,
801 const std::vector<OUString>& rUnoMacroParamTable,
802 bool bSetPropSet = true,
803 bool bHidden = false );
804 void SetControlSize( const css::uno::Reference< css::drawing::XShape > & rShape, const Size& rTextSz,
805 bool bMinWidth, bool bMinHeight );
807 public:
808 static void ResizeDrawObject( SdrObject* pObj, SwTwips nWidth );
809 private:
810 static void RegisterDrawObjectToTable( HTMLTable *pCurTable, SdrObject* pObj,
811 sal_uInt8 nWidth );
813 void NewForm( bool bAppend=true );
814 void EndForm( bool bAppend=true );
816 // Insert methods for <INPUT>, <TEXTAREA> and <SELECT>
817 void InsertInput();
819 void NewTextArea();
820 void InsertTextAreaText( HtmlTokenId nToken );
821 void EndTextArea();
823 void NewSelect();
824 void InsertSelectOption();
825 void InsertSelectText();
826 void EndSelect();
828 // Inserting tables (htmltab.cxx)
829 public:
831 // Insert box content after the given node
832 const SwStartNode *InsertTableSection( const SwStartNode *pPrevStNd );
834 // Insert box content at the end of the table containing the PaM
835 // and move the PaM into the cell
836 const SwStartNode *InsertTableSection( sal_uInt16 nPoolId );
838 // Insert methods for various table tags
839 std::unique_ptr<HTMLTableCnts> InsertTableContents( bool bHead );
841 private:
842 // Create a section for the temporary storage of the table caption
843 SwStartNode *InsertTempTableCaptionSection();
845 void BuildTableCell( HTMLTable *pTable, bool bReadOptions, bool bHead );
846 void BuildTableRow( HTMLTable *pTable, bool bReadOptions,
847 SvxAdjust eGrpAdjust, sal_Int16 eVertOri );
848 void BuildTableSection( HTMLTable *pTable, bool bReadOptions, bool bHead );
849 void BuildTableColGroup( HTMLTable *pTable, bool bReadOptions );
850 void BuildTableCaption( HTMLTable *pTable );
851 std::shared_ptr<HTMLTable> BuildTable(SvxAdjust eCellAdjust,
852 bool bIsParentHead = false,
853 bool bHasParentSection=true,
854 bool bHasToFlow = false);
856 // misc ...
858 void ParseMoreMetaOptions();
860 bool FileDownload( const OUString& rURL, OUString& rStr );
861 void InsertLink();
863 void InsertIDOption();
864 void InsertLineBreak();
865 void InsertHorzRule();
867 void FillEndNoteInfo( std::u16string_view aContent );
868 void FillFootNoteInfo( std::u16string_view aContent );
869 void InsertFootEndNote( const OUString& rName, bool bEndNote, bool bFixed );
870 void FinishFootEndNote();
871 void InsertFootEndNoteText();
872 SwNodeIndex *GetFootEndNoteSection( const OUString& rName );
874 sal_Int32 StripTrailingLF();
876 // Remove empty paragraph at the PaM position
877 void StripTrailingPara();
878 // If removing an empty node would corrupt the document
879 bool CanRemoveNode(SwNodeOffset nNodeIdx) const;
881 // Are there fly frames in the current paragraph?
882 bool HasCurrentParaFlys( bool bNoSurroundOnly = false,
883 bool bSurroundOnly = false ) const;
885 class TableDepthGuard
887 private:
888 SwHTMLParser& m_rParser;
889 public:
890 TableDepthGuard(SwHTMLParser& rParser)
891 : m_rParser(rParser)
893 ++m_rParser.m_nTableDepth;
895 bool TooDeep() const { return m_rParser.m_nTableDepth > 1024; }
896 ~TableDepthGuard()
898 --m_rParser.m_nTableDepth;
902 public: // used in tables
904 // Create brush item (with new) or 0
905 SvxBrushItem* CreateBrushItem( const Color *pColor,
906 const OUString &rImageURL,
907 const OUString &rStyle,
908 const OUString &rId,
909 const OUString &rClass );
911 protected:
912 // Executed for each token recognized by CallParser
913 virtual void NextToken( HtmlTokenId nToken ) override;
914 virtual ~SwHTMLParser() override;
916 // If the document is removed, remove the parser as well
917 virtual void Notify(const SfxHint&) override;
919 virtual void AddMetaUserDefined( OUString const & i_rMetaName ) override;
921 public:
923 SwHTMLParser( SwDoc* pD, SwPaM & rCursor, SvStream& rIn,
924 OUString aFileName,
925 OUString aBaseURL,
926 bool bReadNewDoc,
927 SfxMedium* pMed, bool bReadUTF8,
928 bool bIgnoreHTMLComments,
929 const OUString& rNamespace);
931 virtual SvParserState CallParser() override;
933 static sal_uInt16 ToTwips( sal_uInt16 nPixel );
935 // for reading asynchronously from SvStream
936 virtual void Continue( HtmlTokenId nToken ) override;
938 virtual bool ParseMetaOptions( const css::uno::Reference<css::document::XDocumentProperties>&,
939 SvKeyValueIterator* ) override;
942 void RegisterHTMLTable(HTMLTable* pNew)
944 m_aTables.push_back(pNew);
947 void DeregisterHTMLTable(HTMLTable* pOld);
949 SwDoc* GetDoc() const;
951 bool IsReqIF() const;
953 bool IsReadingHeaderOrFooter() const { return m_bReadingHeaderOrFooter; }
955 void NotifyMacroEventRead();
957 /// Strips query and fragment from a URL path if base URL is a file:// one.
958 static OUString StripQueryFromPath(std::u16string_view rBase, const OUString& rPath);
961 struct SwPendingData
963 virtual ~SwPendingData() {}
966 struct SwPending
968 HtmlTokenId nToken;
969 std::unique_ptr<SwPendingData> pData;
971 SwPending( HtmlTokenId nTkn )
972 : nToken( nTkn )
976 inline void HTMLAttr::SetStart( const SwPosition& rPos )
978 m_nStartPara = rPos.GetNode();
979 m_nStartContent = rPos.GetContentIndex();
980 m_nEndPara = m_nStartPara;
981 m_nEndContent = m_nStartContent;
984 inline void HTMLAttrContext::SetMargins( sal_uInt16 nLeft, sal_uInt16 nRight,
985 short nIndent )
987 m_nLeftMargin = nLeft;
988 m_nRightMargin = nRight;
989 m_nFirstLineIndent = nIndent;
990 m_bLRSpaceChanged = true;
993 inline void HTMLAttrContext::GetMargins( sal_uInt16& nLeft,
994 sal_uInt16& nRight,
995 short& nIndent ) const
997 if( m_bLRSpaceChanged )
999 nLeft = m_nLeftMargin;
1000 nRight = m_nRightMargin;
1001 nIndent = m_nFirstLineIndent;
1005 inline void HTMLAttrContext::SetULSpace( sal_uInt16 nUpper, sal_uInt16 nLower )
1007 m_nUpperSpace = nUpper;
1008 m_nLowerSpace = nLower;
1009 m_bULSpaceChanged = true;
1012 inline void HTMLAttrContext::GetULSpace( sal_uInt16& rUpper,
1013 sal_uInt16& rLower ) const
1015 if( m_bULSpaceChanged )
1017 rUpper = m_nUpperSpace;
1018 rLower = m_nLowerSpace;
1022 inline bool SwHTMLParser::HasStyleOptions( std::u16string_view rStyle,
1023 std::u16string_view rId,
1024 std::u16string_view rClass,
1025 const OUString *pLang,
1026 const OUString *pDir )
1028 return !rStyle.empty() || !rId.empty() || !rClass.empty() ||
1029 (pLang && !pLang->isEmpty()) || (pDir && !pDir->isEmpty());
1032 class SwTextFootnote;
1034 class SwHTMLTextFootnote
1036 private:
1037 OUString m_sName;
1038 SwTextFootnote* m_pTextFootnote;
1039 std::unique_ptr<SvtDeleteListener> m_xDeleteListener;
1040 public:
1041 SwHTMLTextFootnote(OUString rName, SwTextFootnote* pInTextFootnote)
1042 : m_sName(std::move(rName))
1043 , m_pTextFootnote(pInTextFootnote)
1044 , m_xDeleteListener(new SvtDeleteListener(static_cast<SwFormatFootnote&>(pInTextFootnote->GetAttr()).GetNotifier()))
1047 const OUString& GetName() const
1049 return m_sName;
1051 const SwNodeIndex* GetStartNode() const
1053 if (m_xDeleteListener->WasDeleted())
1054 return nullptr;
1055 return m_pTextFootnote->GetStartNode();
1059 struct SwHTMLFootEndNote_Impl
1061 std::vector<SwHTMLTextFootnote> aTextFootnotes;
1063 OUString sName;
1064 OUString sContent; // information for the last footnote
1065 bool bEndNote;
1066 bool bFixed;
1069 #endif
1071 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */