Version 7.6.3.2-android, tag libreoffice-7.6.3.2-android
[LibreOffice.git] / sw / source / filter / html / swhtml.cxx
blob1f295218cee6597c714d2671b226e17dfb242a6e
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <algorithm>
23 #include <memory>
24 #include <config_java.h>
26 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
27 #include <com/sun/star/document/XDocumentProperties.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/XBreakIterator.hpp>
30 #include <comphelper/string.hxx>
31 #include <o3tl/safeint.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <svx/svxids.hrc>
34 #if OSL_DEBUG_LEVEL > 0
35 #include <stdlib.h>
36 #endif
37 #include <hintids.hxx>
39 #include <utility>
40 #include <vcl/errinf.hxx>
41 #include <svl/stritem.hxx>
42 #include <vcl/imap.hxx>
43 #include <svtools/htmltokn.h>
44 #include <svtools/htmlkywd.hxx>
45 #include <svtools/ctrltool.hxx>
46 #include <unotools/pathoptions.hxx>
47 #include <vcl/svapp.hxx>
48 #include <sfx2/event.hxx>
49 #include <sfx2/docfile.hxx>
51 #include <sfx2/linkmgr.hxx>
52 #include <editeng/kernitem.hxx>
53 #include <editeng/boxitem.hxx>
54 #include <editeng/fhgtitem.hxx>
55 #include <editeng/formatbreakitem.hxx>
56 #include <editeng/postitem.hxx>
57 #include <editeng/wghtitem.hxx>
58 #include <editeng/crossedoutitem.hxx>
59 #include <editeng/udlnitem.hxx>
60 #include <editeng/escapementitem.hxx>
61 #include <editeng/blinkitem.hxx>
62 #include <editeng/ulspitem.hxx>
63 #include <editeng/colritem.hxx>
64 #include <editeng/fontitem.hxx>
65 #include <editeng/adjustitem.hxx>
66 #include <editeng/lrspitem.hxx>
67 #include <editeng/protitem.hxx>
68 #include <editeng/flstitem.hxx>
69 #include <svx/unobrushitemhelper.hxx>
71 #include <frmatr.hxx>
72 #include <charatr.hxx>
73 #include <fmtfld.hxx>
74 #include <fmtpdsc.hxx>
75 #include <fmtanchr.hxx>
76 #include <fmtsrnd.hxx>
77 #include <fmtfsize.hxx>
78 #include <fmtclds.hxx>
79 #include <fchrfmt.hxx>
80 #include <fmtinfmt.hxx>
81 #include <fmtfollowtextflow.hxx>
82 #include <fmtornt.hxx>
83 #include <doc.hxx>
84 #include <IDocumentUndoRedo.hxx>
85 #include <IDocumentSettingAccess.hxx>
86 #include <IDocumentLayoutAccess.hxx>
87 #include <IDocumentLinksAdministration.hxx>
88 #include <IDocumentRedlineAccess.hxx>
89 #include <IDocumentFieldsAccess.hxx>
90 #include <IDocumentStylePoolAccess.hxx>
91 #include <IDocumentStatistics.hxx>
92 #include <IDocumentState.hxx>
93 #include <pam.hxx>
94 #include <ndtxt.hxx>
95 #include <mdiexp.hxx>
96 #include <poolfmt.hxx>
97 #include <pagedesc.hxx>
98 #include <IMark.hxx>
99 #include <docsh.hxx>
100 #include <editsh.hxx>
101 #include <docufld.hxx>
102 #include "swcss1.hxx"
103 #include <fltini.hxx>
104 #include <htmltbl.hxx>
105 #include "htmlnum.hxx"
106 #include "swhtml.hxx"
107 #include "wrthtml.hxx"
108 #include <linkenum.hxx>
109 #include <breakit.hxx>
110 #include <SwAppletImpl.hxx>
111 #include <swdll.hxx>
112 #include <txatbase.hxx>
114 #include <sfx2/viewfrm.hxx>
115 #include <svx/svdobj.hxx>
116 #include <officecfg/Office/Writer.hxx>
117 #include <comphelper/sequenceashashmap.hxx>
118 #include <comphelper/sequence.hxx>
119 #include <officecfg/Office/Common.hxx>
121 #include <swerror.h>
122 #include <ndole.hxx>
123 #include <unoframe.hxx>
124 #include "css1atr.hxx"
125 #include <frameformats.hxx>
127 #define FONTSIZE_MASK 7
129 #define HTML_ESC_PROP 80
130 #define HTML_ESC_SUPER DFLT_ESC_SUPER
131 #define HTML_ESC_SUB DFLT_ESC_SUB
133 #define HTML_SPTYPE_BLOCK 1
134 #define HTML_SPTYPE_HORI 2
135 #define HTML_SPTYPE_VERT 3
137 using editeng::SvxBorderLine;
138 using namespace ::com::sun::star;
140 // <P ALIGN=xxx>, <Hn ALIGN=xxx>, <TD ALIGN=xxx> etc.
141 HTMLOptionEnum<SvxAdjust> const aHTMLPAlignTable[] =
143 { OOO_STRING_SVTOOLS_HTML_AL_left, SvxAdjust::Left },
144 { OOO_STRING_SVTOOLS_HTML_AL_center, SvxAdjust::Center },
145 { OOO_STRING_SVTOOLS_HTML_AL_middle, SvxAdjust::Center }, // Netscape
146 { OOO_STRING_SVTOOLS_HTML_AL_right, SvxAdjust::Right },
147 { OOO_STRING_SVTOOLS_HTML_AL_justify, SvxAdjust::Block },
148 { OOO_STRING_SVTOOLS_HTML_AL_char, SvxAdjust::Left },
149 { nullptr, SvxAdjust(0) }
152 // <SPACER TYPE=...>
153 HTMLOptionEnum<sal_uInt16> const aHTMLSpacerTypeTable[] =
155 { OOO_STRING_SVTOOLS_HTML_SPTYPE_block, HTML_SPTYPE_BLOCK },
156 { OOO_STRING_SVTOOLS_HTML_SPTYPE_horizontal, HTML_SPTYPE_HORI },
157 { OOO_STRING_SVTOOLS_HTML_SPTYPE_vertical, HTML_SPTYPE_VERT },
158 { nullptr, 0 }
161 HTMLReader::HTMLReader()
163 m_bTemplateBrowseMode = true;
166 OUString HTMLReader::GetTemplateName(SwDoc& rDoc) const
168 if (!rDoc.getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE))
169 // HTML import into Writer, avoid loading the Writer/Web template.
170 return OUString();
172 static const OUStringLiteral sTemplateWithoutExt(u"internal/html");
173 SvtPathOptions aPathOpt;
175 // first search for OpenDocument Writer/Web template
176 // OpenDocument Writer/Web template (extension .oth)
177 OUString sTemplate( sTemplateWithoutExt + ".oth" );
178 if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::Paths::Template ))
179 return sTemplate;
181 // no OpenDocument Writer/Web template found.
182 // search for OpenOffice.org Writer/Web template
183 sTemplate = sTemplateWithoutExt + ".stw";
184 if (aPathOpt.SearchFile( sTemplate, SvtPathOptions::Paths::Template ))
185 return sTemplate;
187 OSL_ENSURE( false, "The default HTML template cannot be found in the defined template directories!");
189 return OUString();
192 bool HTMLReader::SetStrmStgPtr()
194 OSL_ENSURE( m_pMedium, "Where is the medium??" );
196 if( m_pMedium->IsRemote() || !m_pMedium->IsStorage() )
198 m_pStream = m_pMedium->GetInStream();
199 return true;
201 return false;
205 // Call for the general Reader-Interface
206 ErrCode HTMLReader::Read( SwDoc &rDoc, const OUString& rBaseURL, SwPaM &rPam, const OUString & rName )
208 SetupFilterOptions();
210 if( !m_pStream )
212 OSL_ENSURE( m_pStream, "HTML-Read without stream" );
213 return ERR_SWG_READ_ERROR;
216 if( !m_bInsertMode )
218 Reader::ResetFrameFormats( rDoc );
220 // Set the HTML page style, when it isn't a HTML document,
221 // otherwise it's already set.
222 if( !rDoc.getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE) && m_aNamespace != "reqif-xhtml" )
224 rDoc.getIDocumentContentOperations().InsertPoolItem( rPam, SwFormatPageDesc(
225 rDoc.getIDocumentStylePoolAccess().GetPageDescFromPool( RES_POOLPAGE_HTML, false )) );
229 // so nobody steals the document!
230 rtl::Reference<SwDoc> xHoldAlive(&rDoc);
231 ErrCode nRet = ERRCODE_NONE;
232 tools::SvRef<SwHTMLParser> xParser = new SwHTMLParser( &rDoc, rPam, *m_pStream,
233 rName, rBaseURL, !m_bInsertMode, m_pMedium,
234 IsReadUTF8(),
235 m_bIgnoreHTMLComments, m_aNamespace );
237 SvParserState eState = xParser->CallParser();
239 if( SvParserState::Pending == eState )
240 m_pStream->ResetError();
241 else if( SvParserState::Accepted != eState )
243 const OUString sErr(OUString::number(static_cast<sal_Int32>(xParser->GetLineNr()))
244 + "," + OUString::number(static_cast<sal_Int32>(xParser->GetLinePos())));
246 // use the stream as transport for error number
247 nRet = *new StringErrorInfo( ERR_FORMAT_ROWCOL, sErr,
248 DialogMask::ButtonsOk | DialogMask::MessageError );
251 return nRet;
254 SwHTMLParser::SwHTMLParser( SwDoc* pD, SwPaM& rCursor, SvStream& rIn,
255 OUString aPath,
256 OUString aBaseURL,
257 bool bReadNewDoc,
258 SfxMedium* pMed, bool bReadUTF8,
259 bool bNoHTMLComments,
260 const OUString& rNamespace )
261 : SfxHTMLParser( rIn, bReadNewDoc, pMed ),
262 m_aPathToFile(std::move( aPath )),
263 m_sBaseURL(std::move( aBaseURL )),
264 m_xAttrTab(std::make_shared<HTMLAttrTable>()),
265 m_pNumRuleInfo( new SwHTMLNumRuleInfo ),
266 m_xDoc( pD ),
267 m_pActionViewShell( nullptr ),
268 m_pSttNdIdx( nullptr ),
269 m_pFormImpl( nullptr ),
270 m_pImageMap( nullptr ),
271 m_nBaseFontStMin( 0 ),
272 m_nFontStMin( 0 ),
273 m_nDefListDeep( 0 ),
274 m_nFontStHeadStart( 0 ),
275 m_nSBModuleCnt( 0 ),
276 m_nMissingImgMaps( 0 ),
277 m_nParaCnt( 5 ),
278 // #i83625#
279 m_nContextStMin( 0 ),
280 m_nContextStAttrMin( 0 ),
281 m_nSelectEntryCnt( 0 ),
282 m_nOpenParaToken( HtmlTokenId::NONE ),
283 m_eJumpTo( JumpToMarks::NONE ),
284 #ifdef DBG_UTIL
285 m_nContinue( 0 ),
286 #endif
287 m_eParaAdjust( SvxAdjust::End ),
288 m_bDocInitialized( false ),
289 m_bSetModEnabled( false ),
290 m_bInFloatingFrame( false ),
291 m_bInField( false ),
292 m_bKeepUnknown( false ),
293 m_bCallNextToken( false ),
294 m_bIgnoreRawData( false ),
295 m_bLBEntrySelected ( false ),
296 m_bTAIgnoreNewPara ( false ),
297 m_bFixMarqueeWidth ( false ),
298 m_bNoParSpace( false ),
299 m_bInNoEmbed( false ),
300 m_bInTitle( false ),
301 m_bUpdateDocStat( false ),
302 m_bFixSelectWidth( false ),
303 m_bTextArea( false ),
304 m_bSelect( false ),
305 m_bInFootEndNoteAnchor( false ),
306 m_bInFootEndNoteSymbol( false ),
307 m_bIgnoreHTMLComments( bNoHTMLComments ),
308 m_bRemoveHidden( false ),
309 m_bBodySeen( false ),
310 m_bReadingHeaderOrFooter( false ),
311 m_bNotifyMacroEventRead( false ),
312 m_isInTableStructure(false),
313 m_nTableDepth( 0 ),
314 m_nFloatingFrames( 0 ),
315 m_nListItems( 0 ),
316 m_pTempViewFrame(nullptr)
318 // If requested explicitly, then force ignoring of comments (don't create postits for them).
319 if (!bFuzzing)
321 if (officecfg::Office::Writer::Filter::Import::HTML::IgnoreComments::get())
322 m_bIgnoreHTMLComments = true;
323 m_bKeepUnknown = officecfg::Office::Common::Filter::HTML::Import::UnknownTag::get();
326 m_nEventId = nullptr;
327 m_bUpperSpace = m_bViewCreated = m_bChkJumpMark = false;
329 m_eScriptLang = HTMLScriptLanguage::Unknown;
331 rCursor.DeleteMark();
332 m_pPam = &rCursor; // re-use existing cursor: avoids spurious ~SwContentIndexReg assert
333 memset(m_xAttrTab.get(), 0, sizeof(HTMLAttrTable));
335 // Read the font sizes 1-7 from the INI file
336 if (!bFuzzing)
338 m_aFontHeights[0] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_1::get() * 20;
339 m_aFontHeights[1] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_2::get() * 20;
340 m_aFontHeights[2] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_3::get() * 20;
341 m_aFontHeights[3] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_4::get() * 20;
342 m_aFontHeights[4] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_5::get() * 20;
343 m_aFontHeights[5] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_6::get() * 20;
344 m_aFontHeights[6] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_7::get() * 20;
346 else
348 m_aFontHeights[0] = m_aFontHeights[1] = m_aFontHeights[2] = m_aFontHeights[3] =
349 m_aFontHeights[4] = m_aFontHeights[5] = m_aFontHeights[6] = 12 * 20;
352 if(bReadNewDoc)
354 //CJK has different defaults, so a different object should be used for this
355 //RES_CHARTR_CJK_FONTSIZE is a valid value
356 SvxFontHeightItem aFontHeight(m_aFontHeights[2], 100, RES_CHRATR_FONTSIZE);
357 m_xDoc->SetDefault( aFontHeight );
358 SvxFontHeightItem aFontHeightCJK(m_aFontHeights[2], 100, RES_CHRATR_CJK_FONTSIZE);
359 m_xDoc->SetDefault( aFontHeightCJK );
360 SvxFontHeightItem aFontHeightCTL(m_aFontHeights[2], 100, RES_CHRATR_CTL_FONTSIZE);
361 m_xDoc->SetDefault( aFontHeightCTL );
363 // #i18732# - adjust default of option 'FollowTextFlow'
364 // TODO: not sure what the appropriate default for HTML should be?
365 m_xDoc->SetDefault( SwFormatFollowTextFlow(true) );
368 // Change to HTML mode during the import, so that the right styles are created
369 m_bOldIsHTMLMode = m_xDoc->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE);
370 m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, true);
372 m_pCSS1Parser.reset(new SwCSS1Parser(m_xDoc.get(), *this, m_aFontHeights, m_sBaseURL, IsNewDoc()));
373 if (!bFuzzing)
374 m_pCSS1Parser->SetIgnoreFontFamily( officecfg::Office::Common::Filter::HTML::Import::FontSetting::get() );
376 if( bReadUTF8 )
378 SetSrcEncoding( RTL_TEXTENCODING_UTF8 );
380 else
382 SwDocShell *pDocSh = m_xDoc->GetDocShell();
383 SvKeyValueIterator *pHeaderAttrs =
384 pDocSh->GetHeaderAttributes();
385 if( pHeaderAttrs )
386 SetEncodingByHTTPHeader( pHeaderAttrs );
388 m_pCSS1Parser->SetDfltEncoding( osl_getThreadTextEncoding() );
390 SwDocShell* pDocSh = m_xDoc->GetDocShell();
391 if( pDocSh )
393 m_bViewCreated = true; // not, load synchronous
395 // a jump mark is present
397 if( pMed )
399 m_sJmpMark = pMed->GetURLObject().GetMark();
400 if( !m_sJmpMark.isEmpty() )
402 m_eJumpTo = JumpToMarks::Mark;
403 sal_Int32 nLastPos = m_sJmpMark.lastIndexOf( cMarkSeparator );
404 sal_Int32 nPos = nLastPos != -1 ? nLastPos : 0;
406 OUString sCmp;
407 if (nPos)
409 sCmp = m_sJmpMark.copy(nPos + 1).replaceAll(" ", "");
412 if( !sCmp.isEmpty() )
414 sCmp = sCmp.toAsciiLowerCase();
415 if( sCmp == "region" )
416 m_eJumpTo = JumpToMarks::Region;
417 else if( sCmp == "table" )
418 m_eJumpTo = JumpToMarks::Table;
419 else if( sCmp == "graphic" )
420 m_eJumpTo = JumpToMarks::Graphic;
421 else if( sCmp == "outline" ||
422 sCmp == "text" ||
423 sCmp == "frame" )
424 m_eJumpTo = JumpToMarks::NONE; // this is nothing valid!
425 else
426 // otherwise this is a normal (book)mark
427 nPos = -1;
429 else
430 nPos = -1;
432 if( nPos != -1 )
433 m_sJmpMark = m_sJmpMark.copy( 0, nPos );
434 if( m_sJmpMark.isEmpty() )
435 m_eJumpTo = JumpToMarks::NONE;
440 if (!rNamespace.isEmpty())
442 SetNamespace(rNamespace);
443 m_bXHTML = true;
444 if (rNamespace == "reqif-xhtml")
445 m_bReqIF = true;
448 // Extract load parameters which are specific to this filter.
449 if (!pMed)
451 return;
454 comphelper::SequenceAsHashMap aLoadMap(pMed->GetArgs());
455 auto it = aLoadMap.find("AllowedRTFOLEMimeTypes");
456 if (it == aLoadMap.end())
458 return;
461 uno::Sequence<OUString> aTypes;
462 it->second >>= aTypes;
463 m_aAllowedRTFOLEMimeTypes = comphelper::sequenceToContainer<std::set<OUString>>(aTypes);
466 SwHTMLParser::~SwHTMLParser()
468 #ifdef DBG_UTIL
469 OSL_ENSURE( !m_nContinue, "DTOR in continue!" );
470 #endif
472 OSL_ENSURE(m_aContexts.empty(), "There are still contexts on the stack");
473 OSL_ENSURE(!m_nContextStMin, "There are protected contexts");
474 m_nContextStMin = 0;
475 while (!m_aContexts.empty())
477 std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
478 ClearContext(xCntxt.get());
481 bool bAsync = m_xDoc->IsInLoadAsynchron();
482 m_xDoc->SetInLoadAsynchron( false );
483 m_xDoc->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE, m_bOldIsHTMLMode);
485 if( m_xDoc->GetDocShell() && m_nEventId )
486 Application::RemoveUserEvent( m_nEventId );
488 // the DocumentDetected maybe can delete the DocShells, therefore fetch again
489 if( m_xDoc->GetDocShell() )
491 // update linked sections
492 sal_uInt16 nLinkMode = m_xDoc->getIDocumentSettingAccess().getLinkUpdateMode( true );
493 if( nLinkMode != NEVER && bAsync &&
494 SfxObjectCreateMode::INTERNAL!=m_xDoc->GetDocShell()->GetCreateMode() )
495 m_xDoc->getIDocumentLinksAdministration().GetLinkManager().UpdateAllLinks( nLinkMode == MANUAL, false, nullptr );
497 if ( m_xDoc->GetDocShell()->IsLoading() )
499 // #i59688#
500 m_xDoc->GetDocShell()->LoadingFinished();
504 delete m_pSttNdIdx;
506 if( !m_aSetAttrTab.empty() )
508 OSL_ENSURE( m_aSetAttrTab.empty(),"There are still attributes on the stack" );
509 for ( const auto& rpAttr : m_aSetAttrTab )
510 delete rpAttr;
511 m_aSetAttrTab.clear();
514 m_pCSS1Parser.reset();
515 m_pNumRuleInfo.reset();
516 DeleteFormImpl();
517 m_pFootEndNoteImpl.reset();
519 OSL_ENSURE(!m_xTable, "It exists still an open table");
520 m_pImageMaps.reset();
522 OSL_ENSURE( m_vPendingStack.empty(),
523 "SwHTMLParser::~SwHTMLParser: Here should not be Pending-Stack anymore" );
524 m_vPendingStack.clear();
526 m_xDoc.clear();
528 if ( m_pTempViewFrame )
530 m_pTempViewFrame->DoClose();
532 // the temporary view frame is hidden, so the hidden flag might need to be removed
533 if ( m_bRemoveHidden && m_xDoc.is() && m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->GetMedium() )
534 m_xDoc->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN );
538 IMPL_LINK_NOARG( SwHTMLParser, AsyncCallback, void*, void )
540 m_nEventId=nullptr;
542 // #i47907# - If the document has already been destructed,
543 // the parser should be aware of this:
544 if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
545 || 1 == m_xDoc->getReferenceCount() )
547 // was the import aborted by SFX?
548 eState = SvParserState::Error;
551 GetAsynchCallLink().Call(nullptr);
554 SvParserState SwHTMLParser::CallParser()
556 // create temporary index on position 0, so it won't be moved!
557 m_pSttNdIdx = new SwNodeIndex( m_xDoc->GetNodes() );
558 if( !IsNewDoc() ) // insert into existing document ?
560 const SwPosition* pPos = m_pPam->GetPoint();
562 m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
564 *m_pSttNdIdx = pPos->GetNodeIndex()-1;
565 m_xDoc->getIDocumentContentOperations().SplitNode( *pPos, false );
567 SwPaM aInsertionRangePam( *pPos );
569 m_pPam->Move( fnMoveBackward );
571 // split any redline over the insertion point
572 aInsertionRangePam.SetMark();
573 *aInsertionRangePam.GetPoint() = *m_pPam->GetPoint();
574 aInsertionRangePam.Move( fnMoveBackward );
575 m_xDoc->getIDocumentRedlineAccess().SplitRedline( aInsertionRangePam );
577 m_xDoc->SetTextFormatColl( *m_pPam,
578 m_pCSS1Parser->GetTextCollFromPool( RES_POOLCOLL_STANDARD ));
581 if( GetMedium() )
583 if( !m_bViewCreated )
585 m_nEventId = Application::PostUserEvent( LINK( this, SwHTMLParser, AsyncCallback ) );
587 else
589 m_bViewCreated = true;
590 m_nEventId = nullptr;
593 else // show progress bar
595 rInput.Seek(STREAM_SEEK_TO_END);
596 rInput.ResetError();
598 m_xProgress.reset(new ImportProgress(m_xDoc->GetDocShell(), 0, rInput.Tell()));
600 rInput.Seek(STREAM_SEEK_TO_BEGIN);
601 rInput.ResetError();
604 StartListening(m_xDoc->GetPageDesc( 0 ).GetNotifier());
606 SvParserState eRet = HTMLParser::CallParser();
607 return eRet;
610 bool SwHTMLParser::CanRemoveNode(SwNodeOffset nNodeIdx) const
612 const SwNode *pPrev = m_xDoc->GetNodes()[nNodeIdx - 1];
613 return pPrev->IsContentNode() || (pPrev->IsEndNode() && pPrev->StartOfSectionNode()->IsSectionNode());
616 void SwHTMLParser::Continue( HtmlTokenId nToken )
618 #ifdef DBG_UTIL
619 OSL_ENSURE(!m_nContinue, "Continue in Continue - not supposed to happen");
620 m_nContinue++;
621 #endif
623 // When the import (of SFX) is aborted, an error will be set but
624 // we still continue, so that we clean up properly.
625 OSL_ENSURE( SvParserState::Error!=eState,
626 "SwHTMLParser::Continue: already set an error" );
627 if( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
628 eState = SvParserState::Error;
630 // Fetch SwViewShell from document, save it and set as current.
631 SwViewShell *pInitVSh = CallStartAction();
633 if( SvParserState::Error != eState && GetMedium() && !m_bViewCreated )
635 // At first call first return, show document and wait for callback
636 // time.
637 // At this point in CallParser only one digit was read and
638 // a SaveState(0) was called.
639 eState = SvParserState::Pending;
640 m_bViewCreated = true;
641 m_xDoc->SetInLoadAsynchron( true );
643 #ifdef DBG_UTIL
644 m_nContinue--;
645 #endif
647 return;
650 m_bSetModEnabled = false;
651 if( m_xDoc->GetDocShell() )
653 m_bSetModEnabled = m_xDoc->GetDocShell()->IsEnableSetModified();
654 if( m_bSetModEnabled )
656 m_xDoc->GetDocShell()->EnableSetModified( false );
660 // during import don't call OLE-Modified
661 Link<bool,void> aOLELink( m_xDoc->GetOle2Link() );
662 m_xDoc->SetOle2Link( Link<bool,void>() );
664 bool bModified = m_xDoc->getIDocumentState().IsModified();
665 bool const bWasUndo = m_xDoc->GetIDocumentUndoRedo().DoesUndo();
666 m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
668 // When the import will be aborted, don't call Continue anymore.
669 // If a Pending-Stack exists make sure the stack is ended with a call
670 // of NextToken.
671 if( SvParserState::Error == eState )
673 OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
674 "SwHTMLParser::Continue: Pending-Stack without Token" );
675 if( !m_vPendingStack.empty() && m_vPendingStack.back().nToken != HtmlTokenId::NONE )
676 NextToken( m_vPendingStack.back().nToken );
677 OSL_ENSURE( m_vPendingStack.empty(),
678 "SwHTMLParser::Continue: There is again a Pending-Stack" );
680 else
682 HTMLParser::Continue( !m_vPendingStack.empty() ? m_vPendingStack.back().nToken : nToken );
685 // disable progress bar again
686 m_xProgress.reset();
688 bool bLFStripped = false;
689 if( SvParserState::Pending != GetStatus() )
691 // set the last attributes yet
693 if( !m_aScriptSource.isEmpty() )
695 SwScriptFieldType *pType =
696 static_cast<SwScriptFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Script ));
698 SwScriptField aField( pType, m_aScriptType, m_aScriptSource,
699 false );
700 InsertAttr( SwFormatField( aField ), false );
703 if( m_pAppletImpl )
705 if( m_pAppletImpl->GetApplet().is() )
706 EndApplet();
707 else
708 EndObject();
711 // maybe remove an existing LF after the last paragraph
712 if( IsNewDoc() )
713 bLFStripped = StripTrailingLF() > 0;
715 // close still open numbering
716 while( GetNumInfo().GetNumRule() )
717 EndNumberBulletList();
719 OSL_ENSURE( !m_nContextStMin, "There are protected contexts" );
720 // try this twice, first normally to let m_nContextStMin decrease
721 // naturally and get contexts popped in desired order, and if that
722 // fails force it
723 for (int i = 0; i < 2; ++i)
725 while (m_aContexts.size() > m_nContextStMin)
727 std::unique_ptr<HTMLAttrContext> xCntxt(PopContext());
728 if (xCntxt)
729 EndContext(xCntxt.get());
731 if (!m_nContextStMin)
732 break;
733 OSL_ENSURE(!m_nContextStMin, "There are still protected contexts");
734 m_nContextStMin = 0;
737 m_aParaAttrs.clear();
739 SetAttr( false );
741 // set the first delayed styles
742 m_pCSS1Parser->SetDelayedStyles();
745 // again correct the start
746 if( !IsNewDoc() && m_pSttNdIdx->GetIndex() )
748 SwTextNode* pTextNode = m_pSttNdIdx->GetNode().GetTextNode();
749 SwNodeIndex aNxtIdx( *m_pSttNdIdx );
750 if( pTextNode && pTextNode->CanJoinNext( &aNxtIdx ))
752 const sal_Int32 nStt = pTextNode->GetText().getLength();
753 // when the cursor is still in the node, then set him at the end
754 if( m_pPam->GetPoint()->GetNode() == aNxtIdx.GetNode() )
756 m_pPam->GetPoint()->Assign( *pTextNode, nStt );
759 #if OSL_DEBUG_LEVEL > 0
760 // !!! shouldn't be possible, or ??
761 OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound().GetNodeIndex(),
762 "Pam.Bound1 is still in the node" );
763 OSL_ENSURE( m_pSttNdIdx->GetIndex()+1 != m_pPam->GetBound( false ).GetNodeIndex(),
764 "Pam.Bound2 is still in the node" );
766 if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound().GetNodeIndex() )
768 const sal_Int32 nCntPos = m_pPam->GetBound().GetContentIndex();
769 m_pPam->GetBound().SetContent(
770 pTextNode->GetText().getLength() + nCntPos );
772 if( m_pSttNdIdx->GetIndex()+1 == m_pPam->GetBound( false ).GetNodeIndex() )
774 const sal_Int32 nCntPos = m_pPam->GetBound( false ).GetContentIndex();
775 m_pPam->GetBound( false ).SetContent(
776 pTextNode->GetText().getLength() + nCntPos );
778 #endif
779 // Keep character attribute!
780 SwTextNode* pDelNd = aNxtIdx.GetNode().GetTextNode();
781 if (pTextNode->GetText().getLength())
782 pDelNd->FormatToTextAttr( pTextNode );
783 else
784 pTextNode->ChgFormatColl( pDelNd->GetTextColl() );
785 pTextNode->JoinNext();
790 if( SvParserState::Accepted == eState )
792 if( m_nMissingImgMaps )
794 // Some Image-Map relations are still missing.
795 // Maybe now the Image-Maps are there?
796 ConnectImageMaps();
799 // now remove the last useless paragraph
800 SwPosition* pPos = m_pPam->GetPoint();
801 if( !pPos->GetContentIndex() && !bLFStripped )
803 SwTextNode* pCurrentNd;
804 SwNodeOffset nNodeIdx = pPos->GetNodeIndex();
806 bool bHasFlysOrMarks =
807 HasCurrentParaFlys() || HasCurrentParaBookmarks( true );
809 if( IsNewDoc() )
811 if (!m_pPam->GetPoint()->GetContentIndex() && CanRemoveNode(nNodeIdx))
813 SwContentNode* pCNd = m_pPam->GetPointContentNode();
814 if( pCNd && pCNd->StartOfSectionIndex()+2 <
815 pCNd->EndOfSectionIndex() && !bHasFlysOrMarks )
817 SwViewShell *pVSh = CheckActionViewShell();
818 SwCursorShell *pCursorSh = dynamic_cast<SwCursorShell *>( pVSh );
819 if( pCursorSh &&
820 pCursorSh->GetCursor()->GetPoint()
821 ->GetNodeIndex() == nNodeIdx )
823 pCursorSh->MovePara(GoPrevPara, fnParaEnd );
824 pCursorSh->SetMark();
825 pCursorSh->ClearMark();
827 SwNode& rDelNode = m_pPam->GetPoint()->GetNode();
828 // move so we don't have a dangling SwContentIndex to the deleted node
829 m_pPam->GetPoint()->Adjust(SwNodeOffset(1));
830 if (m_pPam->HasMark())
831 m_pPam->GetMark()->Adjust(SwNodeOffset(1));
832 m_xDoc->GetNodes().Delete( rDelNode );
836 else if( nullptr != ( pCurrentNd = m_xDoc->GetNodes()[ nNodeIdx ]->GetTextNode()) && !bHasFlysOrMarks )
838 if( pCurrentNd->CanJoinNext( pPos ))
840 SwTextNode* pNextNd = pPos->GetNode().GetTextNode();
841 m_pPam->SetMark(); m_pPam->DeleteMark();
842 pNextNd->JoinPrev();
844 else if (pCurrentNd->GetText().isEmpty())
846 m_pPam->SetMark(); m_pPam->DeleteMark();
847 SwNode& rDelNode = pPos->GetNode();
848 // move so we don't have a dangling SwContentIndex to the deleted node
849 m_pPam->GetPoint()->Adjust(SwNodeOffset(+1));
850 m_xDoc->GetNodes().Delete( rDelNode );
851 m_pPam->Move( fnMoveBackward );
856 // annul the SplitNode from the beginning
857 else if( !IsNewDoc() )
859 if( pPos->GetContentIndex() ) // then there was no <p> at the end
860 m_pPam->Move( fnMoveForward, GoInNode ); // therefore to the next
861 SwTextNode* pTextNode = pPos->GetNode().GetTextNode();
862 SwNodeIndex aPrvIdx( pPos->GetNode() );
863 if( pTextNode && pTextNode->CanJoinPrev( &aPrvIdx ) &&
864 *m_pSttNdIdx <= aPrvIdx )
866 // Normally here should take place a JoinNext, but all cursors and
867 // so are registered in pTextNode, so that it MUST remain.
869 // Convert paragraph to character attribute, from Prev adopt
870 // the paragraph attribute and the template!
871 SwTextNode* pPrev = aPrvIdx.GetNode().GetTextNode();
872 pTextNode->ChgFormatColl( pPrev->GetTextColl() );
873 pTextNode->FormatToTextAttr( pPrev );
874 pTextNode->ResetAllAttr();
876 if( pPrev->HasSwAttrSet() )
877 pTextNode->SetAttr( *pPrev->GetpSwAttrSet() );
879 if( &m_pPam->GetBound().GetNode() == pPrev )
880 m_pPam->GetBound().nContent.Assign( pTextNode, 0 );
881 if( &m_pPam->GetBound(false).GetNode() == pPrev )
882 m_pPam->GetBound(false).nContent.Assign( pTextNode, 0 );
884 pTextNode->JoinPrev();
888 // adjust AutoLoad in DocumentProperties
889 if (!bFuzzing && IsNewDoc())
891 SwDocShell *pDocShell(m_xDoc->GetDocShell());
892 OSL_ENSURE(pDocShell, "no SwDocShell");
893 if (pDocShell) {
894 uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
895 pDocShell->GetModel(), uno::UNO_QUERY_THROW);
896 uno::Reference<document::XDocumentProperties> xDocProps(
897 xDPS->getDocumentProperties());
898 OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
899 if ( xDocProps.is() && (xDocProps->getAutoloadSecs() > 0) &&
900 (xDocProps->getAutoloadURL().isEmpty()) )
902 xDocProps->setAutoloadURL(m_aPathToFile);
907 if( m_bUpdateDocStat )
909 m_xDoc->getIDocumentStatistics().UpdateDocStat( false, true );
913 if( SvParserState::Pending != GetStatus() )
915 delete m_pSttNdIdx;
916 m_pSttNdIdx = nullptr;
919 // should the parser be the last one who hold the document, then nothing
920 // has to be done anymore, document will be destroyed shortly!
921 if( 1 < m_xDoc->getReferenceCount() )
923 if( bWasUndo )
925 m_xDoc->GetIDocumentUndoRedo().DelAllUndoObj();
926 m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
928 else if( !pInitVSh )
930 // When at the beginning of Continue no Shell was available,
931 // it's possible in the meantime one was created.
932 // In that case the bWasUndo flag is wrong and we must
933 // enable Undo.
934 SwViewShell *pTmpVSh = CheckActionViewShell();
935 if( pTmpVSh )
937 m_xDoc->GetIDocumentUndoRedo().DoUndo(true);
941 m_xDoc->SetOle2Link( aOLELink );
942 if( !bModified )
943 m_xDoc->getIDocumentState().ResetModified();
944 if( m_bSetModEnabled && m_xDoc->GetDocShell() )
946 m_xDoc->GetDocShell()->EnableSetModified();
947 m_bSetModEnabled = false; // this is unnecessary here
951 // When the Document-SwVievShell still exists and an Action is open
952 // (doesn't have to be by abort), end the Action, disconnect from Shell
953 // and finally reconstruct the old Shell.
954 CallEndAction( true );
956 #ifdef DBG_UTIL
957 m_nContinue--;
958 #endif
961 void SwHTMLParser::Notify(const SfxHint& rHint)
963 if(rHint.GetId() == SfxHintId::Dying)
965 EndListeningAll();
966 ReleaseRef();
970 void SwHTMLParser::DocumentDetected()
972 OSL_ENSURE( !m_bDocInitialized, "DocumentDetected called multiple times" );
973 m_bDocInitialized = true;
974 if( IsNewDoc() )
976 if( IsInHeader() )
977 FinishHeader();
979 CallEndAction( true );
981 m_xDoc->GetIDocumentUndoRedo().DoUndo(false);
982 // For DocumentDetected in general a SwViewShell is created.
983 // But it also can be created later, in case the UI is captured.
984 CallStartAction();
988 // is called for every token that is recognised in CallParser
989 void SwHTMLParser::NextToken( HtmlTokenId nToken )
991 if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
992 || 1 == m_xDoc->getReferenceCount() )
994 // Was the import cancelled by SFX? If a pending stack
995 // exists, clean it.
996 eState = SvParserState::Error;
997 OSL_ENSURE( m_vPendingStack.empty() || m_vPendingStack.back().nToken != HtmlTokenId::NONE,
998 "SwHTMLParser::NextToken: Pending-Stack without token" );
999 if( 1 == m_xDoc->getReferenceCount() || m_vPendingStack.empty() )
1000 return ;
1003 #if OSL_DEBUG_LEVEL > 0
1004 if( !m_vPendingStack.empty() )
1006 switch( nToken )
1008 // tables are read by recursive method calls
1009 case HtmlTokenId::TABLE_ON:
1010 // For CSS declarations we might have to wait
1011 // for a file download to finish
1012 case HtmlTokenId::LINK:
1013 // For controls we might have to set the size.
1014 case HtmlTokenId::INPUT:
1015 case HtmlTokenId::TEXTAREA_ON:
1016 case HtmlTokenId::SELECT_ON:
1017 case HtmlTokenId::SELECT_OFF:
1018 break;
1019 default:
1020 OSL_ENSURE( m_vPendingStack.empty(), "Unknown token for Pending-Stack" );
1021 break;
1024 #endif
1026 // The following special cases have to be treated before the
1027 // filter detection, because Netscape doesn't reference the content
1028 // of the title for filter detection either.
1029 if( m_vPendingStack.empty() )
1031 if( m_bInTitle )
1033 switch( nToken )
1035 case HtmlTokenId::TITLE_OFF:
1037 OUString sTitle = m_sTitle.makeStringAndClear();
1038 if( IsNewDoc() && !sTitle.isEmpty() )
1040 if( m_xDoc->GetDocShell() ) {
1041 uno::Reference<document::XDocumentPropertiesSupplier>
1042 xDPS(m_xDoc->GetDocShell()->GetModel(),
1043 uno::UNO_QUERY_THROW);
1044 uno::Reference<document::XDocumentProperties> xDocProps(
1045 xDPS->getDocumentProperties());
1046 OSL_ENSURE(xDocProps.is(), "no DocumentProperties");
1047 if (xDocProps.is()) {
1048 xDocProps->setTitle(sTitle);
1051 m_xDoc->GetDocShell()->SetTitle(sTitle);
1054 m_bInTitle = false;
1055 break;
1058 case HtmlTokenId::NONBREAKSPACE:
1059 m_sTitle.append(" ");
1060 break;
1062 case HtmlTokenId::SOFTHYPH:
1063 m_sTitle.append("-");
1064 break;
1066 case HtmlTokenId::TEXTTOKEN:
1067 m_sTitle.append(aToken);
1068 break;
1070 default:
1071 m_sTitle.append("<");
1072 if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
1073 m_sTitle.append("/");
1074 m_sTitle.append(sSaveToken);
1075 if( !aToken.isEmpty() )
1077 m_sTitle.append(" ");
1078 m_sTitle.append(aToken);
1080 m_sTitle.append(">");
1081 break;
1084 return;
1088 // Find out what type of document it is if we don't know already.
1089 // For Controls this has to be finished before the control is inserted
1090 // because for inserting a View is needed.
1091 if( !m_bDocInitialized )
1092 DocumentDetected();
1094 bool bGetIDOption = false, bInsertUnknown = false;
1095 bool bUpperSpaceSave = m_bUpperSpace;
1096 m_bUpperSpace = false;
1098 // The following special cases may or have to be treated after the
1099 // filter detection
1100 if( m_vPendingStack.empty() )
1102 if( m_bInFloatingFrame )
1104 // <SCRIPT> is ignored here (from us), because it is ignored in
1105 // Applets as well
1106 if( HtmlTokenId::IFRAME_OFF == nToken )
1108 m_bCallNextToken = false;
1109 m_bInFloatingFrame = false;
1112 return;
1114 else if( m_bInNoEmbed )
1116 switch( nToken )
1118 case HtmlTokenId::NOEMBED_OFF:
1119 m_aContents = convertLineEnd(m_aContents, GetSystemLineEnd());
1120 InsertComment( m_aContents, OOO_STRING_SVTOOLS_HTML_noembed );
1121 m_aContents.clear();
1122 m_bCallNextToken = false;
1123 m_bInNoEmbed = false;
1124 break;
1126 case HtmlTokenId::RAWDATA:
1127 InsertCommentText( OOO_STRING_SVTOOLS_HTML_noembed );
1128 break;
1130 default:
1131 OSL_ENSURE( false, "SwHTMLParser::NextToken: invalid tag" );
1132 break;
1135 return;
1137 else if( m_pAppletImpl )
1139 // in an applet only <PARAM> tags and the </APPLET> tag
1140 // are of interest for us (for the moment)
1141 // <SCRIPT> is ignored here (from Netscape)!
1143 switch( nToken )
1145 case HtmlTokenId::APPLET_OFF:
1146 m_bCallNextToken = false;
1147 EndApplet();
1148 break;
1149 case HtmlTokenId::OBJECT_OFF:
1150 m_bCallNextToken = false;
1151 EndObject();
1152 break;
1153 case HtmlTokenId::PARAM:
1154 InsertParam();
1155 break;
1156 default: break;
1159 return;
1161 else if( m_bTextArea )
1163 // in a TextArea everything up to </TEXTAREA> is inserted as text.
1164 // <SCRIPT> is ignored here (from Netscape)!
1166 switch( nToken )
1168 case HtmlTokenId::TEXTAREA_OFF:
1169 m_bCallNextToken = false;
1170 EndTextArea();
1171 break;
1173 default:
1174 InsertTextAreaText( nToken );
1175 break;
1178 return;
1180 else if( m_bSelect )
1182 // HAS to be treated after bNoScript!
1183 switch( nToken )
1185 case HtmlTokenId::SELECT_OFF:
1186 m_bCallNextToken = false;
1187 EndSelect();
1188 return;
1190 case HtmlTokenId::OPTION:
1191 InsertSelectOption();
1192 return;
1194 case HtmlTokenId::TEXTTOKEN:
1195 InsertSelectText();
1196 return;
1198 case HtmlTokenId::INPUT:
1199 case HtmlTokenId::SCRIPT_ON:
1200 case HtmlTokenId::SCRIPT_OFF:
1201 case HtmlTokenId::NOSCRIPT_ON:
1202 case HtmlTokenId::NOSCRIPT_OFF:
1203 case HtmlTokenId::RAWDATA:
1204 // treat in normal switch
1205 break;
1207 default:
1208 // ignore
1209 return;
1212 else if( m_pMarquee )
1214 // in a TextArea everything up to </TEXTAREA> is inserted as text.
1215 // The <SCRIPT> tags are ignored from MS-IE, we ignore the whole
1216 // script.
1217 switch( nToken )
1219 case HtmlTokenId::MARQUEE_OFF:
1220 m_bCallNextToken = false;
1221 EndMarquee();
1222 break;
1224 case HtmlTokenId::TEXTTOKEN:
1225 InsertMarqueeText();
1226 break;
1227 default: break;
1230 return;
1232 else if( m_bInField )
1234 switch( nToken )
1236 case HtmlTokenId::SDFIELD_OFF:
1237 m_bCallNextToken = false;
1238 EndField();
1239 break;
1241 case HtmlTokenId::TEXTTOKEN:
1242 InsertFieldText();
1243 break;
1244 default: break;
1247 return;
1249 else if( m_bInFootEndNoteAnchor || m_bInFootEndNoteSymbol )
1251 switch( nToken )
1253 case HtmlTokenId::ANCHOR_OFF:
1254 EndAnchor();
1255 m_bCallNextToken = false;
1256 break;
1258 case HtmlTokenId::TEXTTOKEN:
1259 InsertFootEndNoteText();
1260 break;
1261 default: break;
1263 return;
1265 else if( !m_aUnknownToken.isEmpty() )
1267 // Paste content of unknown tags.
1268 // (but surely if we are not in the header section) fdo#36080 fdo#34666
1269 if (!aToken.isEmpty() && !IsInHeader() )
1271 if( !m_bDocInitialized )
1272 DocumentDetected();
1273 m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken.toString());
1275 // if there are temporary paragraph attributes and the
1276 // paragraph isn't empty then the paragraph attributes
1277 // are final.
1278 m_aParaAttrs.clear();
1280 SetAttr();
1283 // Unknown token in the header are only closed by a matching
1284 // end-token, </HEAD> or <BODY>. Text inside is ignored.
1285 switch( nToken )
1287 case HtmlTokenId::UNKNOWNCONTROL_OFF:
1288 if( m_aUnknownToken != sSaveToken )
1289 return;
1290 [[fallthrough]];
1291 case HtmlTokenId::FRAMESET_ON:
1292 case HtmlTokenId::HEAD_OFF:
1293 case HtmlTokenId::BODY_ON:
1294 case HtmlTokenId::IMAGE: // Don't know why Netscape acts this way.
1295 m_aUnknownToken.clear();
1296 break;
1297 case HtmlTokenId::TEXTTOKEN:
1298 return;
1299 default:
1300 m_aUnknownToken.clear();
1301 break;
1306 switch( nToken )
1308 case HtmlTokenId::BODY_ON:
1309 if (!m_bBodySeen)
1311 m_bBodySeen = true;
1312 if( !m_aStyleSource.isEmpty() )
1314 m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1315 m_aStyleSource.clear();
1317 if( IsNewDoc() )
1319 InsertBodyOptions();
1320 // If there is a template for the first or the right page,
1321 // it is set here.
1322 const SwPageDesc *pPageDesc = nullptr;
1323 if( m_pCSS1Parser->IsSetFirstPageDesc() )
1324 pPageDesc = m_pCSS1Parser->GetFirstPageDesc();
1325 else if( m_pCSS1Parser->IsSetRightPageDesc() )
1326 pPageDesc = m_pCSS1Parser->GetRightPageDesc();
1328 if( pPageDesc )
1330 m_xDoc->getIDocumentContentOperations().InsertPoolItem( *m_pPam, SwFormatPageDesc( pPageDesc ) );
1334 break;
1336 case HtmlTokenId::LINK:
1337 InsertLink();
1338 break;
1340 case HtmlTokenId::BASE:
1342 const HTMLOptions& rHTMLOptions = GetOptions();
1343 for (size_t i = rHTMLOptions.size(); i; )
1345 const HTMLOption& rOption = rHTMLOptions[--i];
1346 switch( rOption.GetToken() )
1348 case HtmlOptionId::HREF:
1349 m_sBaseURL = rOption.GetString();
1350 break;
1351 case HtmlOptionId::TARGET:
1352 if( IsNewDoc() )
1354 SwDocShell *pDocShell(m_xDoc->GetDocShell());
1355 OSL_ENSURE(pDocShell, "no SwDocShell");
1356 if (pDocShell) {
1357 uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1358 pDocShell->GetModel(), uno::UNO_QUERY_THROW);
1359 uno::Reference<document::XDocumentProperties>
1360 xDocProps(xDPS->getDocumentProperties());
1361 OSL_ENSURE(xDocProps.is(),"no DocumentProperties");
1362 if (xDocProps.is()) {
1363 xDocProps->setDefaultTarget(
1364 rOption.GetString());
1368 break;
1369 default: break;
1373 break;
1375 case HtmlTokenId::META:
1377 SvKeyValueIterator *pHTTPHeader = nullptr;
1378 if( IsNewDoc() )
1380 SwDocShell *pDocSh = m_xDoc->GetDocShell();
1381 if( pDocSh )
1382 pHTTPHeader = pDocSh->GetHeaderAttributes();
1384 SwDocShell *pDocShell(m_xDoc->GetDocShell());
1385 OSL_ENSURE(pDocShell, "no SwDocShell");
1386 if (pDocShell)
1388 uno::Reference<document::XDocumentProperties> xDocProps;
1389 if (IsNewDoc())
1391 const uno::Reference<document::XDocumentPropertiesSupplier>
1392 xDPS( pDocShell->GetModel(), uno::UNO_QUERY_THROW );
1393 xDocProps = xDPS->getDocumentProperties();
1394 OSL_ENSURE(xDocProps.is(), "DocumentProperties is null");
1396 ParseMetaOptions( xDocProps, pHTTPHeader );
1399 break;
1401 case HtmlTokenId::TITLE_ON:
1402 m_bInTitle = true;
1403 break;
1405 case HtmlTokenId::SCRIPT_ON:
1406 NewScript();
1407 break;
1409 case HtmlTokenId::SCRIPT_OFF:
1410 EndScript();
1411 break;
1413 case HtmlTokenId::NOSCRIPT_ON:
1414 case HtmlTokenId::NOSCRIPT_OFF:
1415 bInsertUnknown = true;
1416 break;
1418 case HtmlTokenId::STYLE_ON:
1419 NewStyle();
1420 break;
1422 case HtmlTokenId::STYLE_OFF:
1423 EndStyle();
1424 break;
1426 case HtmlTokenId::RAWDATA:
1427 if( !m_bIgnoreRawData )
1429 if( IsReadScript() )
1431 AddScriptSource();
1433 else if( IsReadStyle() )
1435 if( !m_aStyleSource.isEmpty() )
1436 m_aStyleSource += "\n";
1437 m_aStyleSource += aToken;
1440 break;
1442 case HtmlTokenId::OBJECT_ON:
1443 if (m_bXHTML)
1445 if (!InsertEmbed())
1446 InsertImage();
1447 break;
1449 #if HAVE_FEATURE_JAVA
1450 NewObject();
1451 m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1452 #endif
1453 break;
1455 case HtmlTokenId::OBJECT_OFF:
1456 if (!m_aEmbeds.empty())
1457 m_aEmbeds.pop();
1458 break;
1460 case HtmlTokenId::APPLET_ON:
1461 #if HAVE_FEATURE_JAVA
1462 InsertApplet();
1463 m_bCallNextToken = m_pAppletImpl!=nullptr && m_xTable;
1464 #endif
1465 break;
1467 case HtmlTokenId::IFRAME_ON:
1468 if (bFuzzing && m_nFloatingFrames > 64)
1469 SAL_WARN("sw.html", "Not importing any more FloatingFrames for fuzzing performance");
1470 else
1472 InsertFloatingFrame();
1473 m_bCallNextToken = m_bInFloatingFrame && m_xTable;
1475 break;
1477 case HtmlTokenId::LINEBREAK:
1478 if( !IsReadPRE() )
1480 InsertLineBreak();
1481 break;
1483 else
1484 bGetIDOption = true;
1485 // <BR>s in <PRE> resemble true LFs, hence no break
1486 [[fallthrough]];
1488 case HtmlTokenId::NEWPARA:
1489 // CR in PRE/LISTING/XMP
1491 if( HtmlTokenId::NEWPARA==nToken ||
1492 m_pPam->GetPoint()->GetContentIndex() )
1494 AppendTextNode(); // there is no LF at this place
1495 // therefore it will cause no problems
1496 SetTextCollAttrs();
1498 // progress bar
1499 if (m_xProgress)
1500 m_xProgress->Update(rInput.Tell());
1502 break;
1504 case HtmlTokenId::NONBREAKSPACE:
1505 m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_HARDBLANK) );
1506 break;
1508 case HtmlTokenId::SOFTHYPH:
1509 m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, OUString(CHAR_SOFTHYPHEN) );
1510 break;
1512 case HtmlTokenId::LINEFEEDCHAR:
1513 if( m_pPam->GetPoint()->GetContentIndex() )
1514 AppendTextNode();
1515 if (!m_xTable && !m_xDoc->IsInHeaderFooter(m_pPam->GetPoint()->GetNode()))
1517 NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, SvxFormatBreakItem(SvxBreak::PageBefore, RES_BREAK));
1518 EndAttr( m_xAttrTab->pBreak, false );
1520 break;
1522 case HtmlTokenId::TEXTTOKEN:
1523 case HtmlTokenId::CDATA:
1524 // insert string without spanning attributes at the end.
1525 if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() )
1527 sal_Int32 nPos = m_pPam->GetPoint()->GetContentIndex();
1528 const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->GetNode().GetTextNode() : nullptr;
1529 if (pTextNode)
1531 const OUString& rText = pTextNode->GetText();
1532 sal_Unicode cLast = rText[--nPos];
1533 if( ' ' == cLast || '\x0a' == cLast)
1534 aToken.remove(0, 1);
1536 else
1537 aToken.remove(0, 1);
1539 if( aToken.isEmpty() )
1541 m_bUpperSpace = bUpperSpaceSave;
1542 break;
1546 if( !aToken.isEmpty() )
1548 if( !m_bDocInitialized )
1549 DocumentDetected();
1551 if (!m_aEmbeds.empty())
1553 // The text token is inside an OLE object, which means
1554 // alternate text.
1555 SwOLENode* pOLENode = m_aEmbeds.top();
1556 if (!pOLENode)
1558 // <object> is mapped to an image -> ignore.
1559 break;
1562 if (SwFlyFrameFormat* pFormat
1563 = dynamic_cast<SwFlyFrameFormat*>(pOLENode->GetFlyFormat()))
1565 if (SdrObject* pObject = SwXFrame::GetOrCreateSdrObject(*pFormat))
1567 pObject->SetTitle(pObject->GetTitle() + aToken);
1568 break;
1573 m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, aToken.toString());
1575 // if there are temporary paragraph attributes and the
1576 // paragraph isn't empty then the paragraph attributes
1577 // are final.
1578 m_aParaAttrs.clear();
1580 SetAttr();
1582 break;
1584 case HtmlTokenId::HORZRULE:
1585 InsertHorzRule();
1586 break;
1588 case HtmlTokenId::IMAGE:
1589 InsertImage();
1590 // if only the parser references the doc, we can break and set
1591 // an error code
1592 if( 1 == m_xDoc->getReferenceCount() )
1594 eState = SvParserState::Error;
1596 break;
1598 case HtmlTokenId::SPACER:
1599 InsertSpacer();
1600 break;
1602 case HtmlTokenId::EMBED:
1603 InsertEmbed();
1604 break;
1606 case HtmlTokenId::NOEMBED_ON:
1607 m_bInNoEmbed = true;
1608 m_bCallNextToken = bool(m_xTable);
1609 ReadRawData( OOO_STRING_SVTOOLS_HTML_noembed );
1610 break;
1612 case HtmlTokenId::DEFLIST_ON:
1613 if( m_nOpenParaToken != HtmlTokenId::NONE )
1614 EndPara();
1615 NewDefList();
1616 break;
1617 case HtmlTokenId::DEFLIST_OFF:
1618 if( m_nOpenParaToken != HtmlTokenId::NONE )
1619 EndPara();
1620 EndDefListItem( HtmlTokenId::NONE );
1621 EndDefList();
1622 break;
1624 case HtmlTokenId::DD_ON:
1625 case HtmlTokenId::DT_ON:
1626 if( m_nOpenParaToken != HtmlTokenId::NONE )
1627 EndPara();
1628 EndDefListItem();// close <DD>/<DT> and set no template
1629 NewDefListItem( nToken );
1630 break;
1632 case HtmlTokenId::DD_OFF:
1633 case HtmlTokenId::DT_OFF:
1634 // c.f. HtmlTokenId::LI_OFF
1635 // Actually we should close a DD/DT now.
1636 // But neither Netscape nor Microsoft do this and so don't we.
1637 EndDefListItem( nToken );
1638 break;
1640 // divisions
1641 case HtmlTokenId::DIVISION_ON:
1642 case HtmlTokenId::CENTER_ON:
1643 if (!m_isInTableStructure)
1645 if (m_nOpenParaToken != HtmlTokenId::NONE)
1647 if (IsReadPRE())
1648 m_nOpenParaToken = HtmlTokenId::NONE;
1649 else
1650 EndPara();
1652 NewDivision( nToken );
1654 break;
1656 case HtmlTokenId::DIVISION_OFF:
1657 case HtmlTokenId::CENTER_OFF:
1658 if (!m_isInTableStructure)
1660 if (m_nOpenParaToken != HtmlTokenId::NONE)
1662 if (IsReadPRE())
1663 m_nOpenParaToken = HtmlTokenId::NONE;
1664 else
1665 EndPara();
1667 EndDivision();
1669 break;
1671 case HtmlTokenId::MULTICOL_ON:
1672 if( m_nOpenParaToken != HtmlTokenId::NONE )
1673 EndPara();
1674 NewMultiCol();
1675 break;
1677 case HtmlTokenId::MULTICOL_OFF:
1678 if( m_nOpenParaToken != HtmlTokenId::NONE )
1679 EndPara();
1680 EndTag( HtmlTokenId::MULTICOL_ON );
1681 break;
1683 case HtmlTokenId::MARQUEE_ON:
1684 NewMarquee();
1685 m_bCallNextToken = m_pMarquee!=nullptr && m_xTable;
1686 break;
1688 case HtmlTokenId::FORM_ON:
1689 NewForm();
1690 break;
1691 case HtmlTokenId::FORM_OFF:
1692 EndForm();
1693 break;
1695 // templates
1696 case HtmlTokenId::PARABREAK_ON:
1697 if( m_nOpenParaToken != HtmlTokenId::NONE )
1698 EndPara( true );
1699 NewPara();
1700 break;
1702 case HtmlTokenId::PARABREAK_OFF:
1703 EndPara( true );
1704 break;
1706 case HtmlTokenId::ADDRESS_ON:
1707 if( m_nOpenParaToken != HtmlTokenId::NONE )
1708 EndPara();
1709 NewTextFormatColl(HtmlTokenId::ADDRESS_ON, RES_POOLCOLL_SEND_ADDRESS);
1710 break;
1712 case HtmlTokenId::ADDRESS_OFF:
1713 if( m_nOpenParaToken != HtmlTokenId::NONE )
1714 EndPara();
1715 EndTextFormatColl( HtmlTokenId::ADDRESS_OFF );
1716 break;
1718 case HtmlTokenId::BLOCKQUOTE_ON:
1719 case HtmlTokenId::BLOCKQUOTE30_ON:
1720 if( m_nOpenParaToken != HtmlTokenId::NONE )
1721 EndPara();
1722 NewTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON, RES_POOLCOLL_HTML_BLOCKQUOTE );
1723 break;
1725 case HtmlTokenId::BLOCKQUOTE_OFF:
1726 case HtmlTokenId::BLOCKQUOTE30_OFF:
1727 if( m_nOpenParaToken != HtmlTokenId::NONE )
1728 EndPara();
1729 EndTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON );
1730 break;
1732 case HtmlTokenId::PREFORMTXT_ON:
1733 case HtmlTokenId::LISTING_ON:
1734 case HtmlTokenId::XMP_ON:
1735 if( m_nOpenParaToken != HtmlTokenId::NONE )
1736 EndPara();
1737 NewTextFormatColl( nToken, RES_POOLCOLL_HTML_PRE );
1738 break;
1740 case HtmlTokenId::PREFORMTXT_OFF:
1741 m_bNoParSpace = true; // the last PRE-paragraph gets a spacing
1742 EndTextFormatColl( HtmlTokenId::PREFORMTXT_OFF );
1743 break;
1745 case HtmlTokenId::LISTING_OFF:
1746 case HtmlTokenId::XMP_OFF:
1747 EndTextFormatColl( nToken );
1748 break;
1750 case HtmlTokenId::HEAD1_ON:
1751 case HtmlTokenId::HEAD2_ON:
1752 case HtmlTokenId::HEAD3_ON:
1753 case HtmlTokenId::HEAD4_ON:
1754 case HtmlTokenId::HEAD5_ON:
1755 case HtmlTokenId::HEAD6_ON:
1756 if( m_nOpenParaToken != HtmlTokenId::NONE )
1758 if( IsReadPRE() )
1759 m_nOpenParaToken = HtmlTokenId::NONE;
1760 else
1761 EndPara();
1763 NewHeading( nToken );
1764 break;
1766 case HtmlTokenId::HEAD1_OFF:
1767 case HtmlTokenId::HEAD2_OFF:
1768 case HtmlTokenId::HEAD3_OFF:
1769 case HtmlTokenId::HEAD4_OFF:
1770 case HtmlTokenId::HEAD5_OFF:
1771 case HtmlTokenId::HEAD6_OFF:
1772 EndHeading();
1773 break;
1775 case HtmlTokenId::TABLE_ON:
1776 if( !m_vPendingStack.empty() )
1777 BuildTable( SvxAdjust::End );
1778 else
1780 if( m_nOpenParaToken != HtmlTokenId::NONE )
1781 EndPara();
1782 OSL_ENSURE(!m_xTable, "table in table not allowed here");
1783 if( !m_xTable && (IsNewDoc() || !m_pPam->GetPointNode().FindTableNode()) &&
1784 (m_pPam->GetPoint()->GetNodeIndex() >
1785 m_xDoc->GetNodes().GetEndOfExtras().GetIndex() ||
1786 !m_pPam->GetPointNode().FindFootnoteStartNode() ) )
1788 if ( m_nParaCnt < 5 )
1789 Show(); // show what we have up to here
1791 SvxAdjust eAdjust = m_xAttrTab->pAdjust
1792 ? static_cast<const SvxAdjustItem&>(m_xAttrTab->pAdjust->GetItem()).
1793 GetAdjust()
1794 : SvxAdjust::End;
1795 BuildTable( eAdjust );
1797 else
1798 bInsertUnknown = m_bKeepUnknown;
1800 break;
1802 // lists
1803 case HtmlTokenId::DIRLIST_ON:
1804 case HtmlTokenId::MENULIST_ON:
1805 case HtmlTokenId::ORDERLIST_ON:
1806 case HtmlTokenId::UNORDERLIST_ON:
1807 if( m_nOpenParaToken != HtmlTokenId::NONE )
1808 EndPara();
1809 NewNumberBulletList( nToken );
1810 break;
1812 case HtmlTokenId::DIRLIST_OFF:
1813 case HtmlTokenId::MENULIST_OFF:
1814 case HtmlTokenId::ORDERLIST_OFF:
1815 case HtmlTokenId::UNORDERLIST_OFF:
1816 if( m_nOpenParaToken != HtmlTokenId::NONE )
1817 EndPara();
1818 EndNumberBulletListItem( HtmlTokenId::NONE, true );
1819 EndNumberBulletList( nToken );
1820 break;
1822 case HtmlTokenId::LI_ON:
1823 case HtmlTokenId::LISTHEADER_ON:
1824 if( m_nOpenParaToken != HtmlTokenId::NONE &&
1825 (m_pPam->GetPoint()->GetContentIndex()
1826 || HtmlTokenId::PARABREAK_ON==m_nOpenParaToken) )
1828 // only finish paragraph for <P><LI>, not for <DD><LI>
1829 EndPara();
1832 if (bFuzzing && m_nListItems > 1024)
1834 SAL_WARN("sw.html", "skipping remaining bullet import for performance during fuzzing");
1836 else
1838 EndNumberBulletListItem( HtmlTokenId::NONE, false );// close <LI>/<LH> and don't set a template
1839 NewNumberBulletListItem( nToken );
1842 ++m_nListItems;
1844 break;
1845 case HtmlTokenId::LI_OFF:
1846 case HtmlTokenId::LISTHEADER_OFF:
1847 EndNumberBulletListItem( nToken, false );
1848 break;
1850 // Attribute :
1851 case HtmlTokenId::ITALIC_ON:
1853 SvxPostureItem aPosture( ITALIC_NORMAL, RES_CHRATR_POSTURE );
1854 SvxPostureItem aPostureCJK( ITALIC_NORMAL, RES_CHRATR_CJK_POSTURE );
1855 SvxPostureItem aPostureCTL( ITALIC_NORMAL, RES_CHRATR_CTL_POSTURE );
1856 NewStdAttr( HtmlTokenId::ITALIC_ON,
1857 &m_xAttrTab->pItalic, aPosture,
1858 &m_xAttrTab->pItalicCJK, &aPostureCJK,
1859 &m_xAttrTab->pItalicCTL, &aPostureCTL );
1861 break;
1863 case HtmlTokenId::BOLD_ON:
1865 SvxWeightItem aWeight( WEIGHT_BOLD, RES_CHRATR_WEIGHT );
1866 SvxWeightItem aWeightCJK( WEIGHT_BOLD, RES_CHRATR_CJK_WEIGHT );
1867 SvxWeightItem aWeightCTL( WEIGHT_BOLD, RES_CHRATR_CTL_WEIGHT );
1868 NewStdAttr( HtmlTokenId::BOLD_ON,
1869 &m_xAttrTab->pBold, aWeight,
1870 &m_xAttrTab->pBoldCJK, &aWeightCJK,
1871 &m_xAttrTab->pBoldCTL, &aWeightCTL );
1873 break;
1875 case HtmlTokenId::STRIKE_ON:
1876 case HtmlTokenId::STRIKETHROUGH_ON:
1878 NewStdAttr( HtmlTokenId::STRIKE_ON, &m_xAttrTab->pStrike,
1879 SvxCrossedOutItem(STRIKEOUT_SINGLE, RES_CHRATR_CROSSEDOUT) );
1881 break;
1883 case HtmlTokenId::UNDERLINE_ON:
1885 NewStdAttr( HtmlTokenId::UNDERLINE_ON, &m_xAttrTab->pUnderline,
1886 SvxUnderlineItem(LINESTYLE_SINGLE, RES_CHRATR_UNDERLINE) );
1888 break;
1890 case HtmlTokenId::SUPERSCRIPT_ON:
1892 NewStdAttr( HtmlTokenId::SUPERSCRIPT_ON, &m_xAttrTab->pEscapement,
1893 SvxEscapementItem(HTML_ESC_SUPER,HTML_ESC_PROP, RES_CHRATR_ESCAPEMENT) );
1895 break;
1897 case HtmlTokenId::SUBSCRIPT_ON:
1899 NewStdAttr( HtmlTokenId::SUBSCRIPT_ON, &m_xAttrTab->pEscapement,
1900 SvxEscapementItem(HTML_ESC_SUB,HTML_ESC_PROP, RES_CHRATR_ESCAPEMENT) );
1902 break;
1904 case HtmlTokenId::BLINK_ON:
1906 NewStdAttr( HtmlTokenId::BLINK_ON, &m_xAttrTab->pBlink,
1907 SvxBlinkItem( true, RES_CHRATR_BLINK ) );
1909 break;
1911 case HtmlTokenId::SPAN_ON:
1912 NewStdAttr( HtmlTokenId::SPAN_ON );
1913 break;
1915 case HtmlTokenId::ITALIC_OFF:
1916 case HtmlTokenId::BOLD_OFF:
1917 case HtmlTokenId::STRIKE_OFF:
1918 case HtmlTokenId::UNDERLINE_OFF:
1919 case HtmlTokenId::SUPERSCRIPT_OFF:
1920 case HtmlTokenId::SUBSCRIPT_OFF:
1921 case HtmlTokenId::BLINK_OFF:
1922 case HtmlTokenId::SPAN_OFF:
1923 EndTag( nToken );
1924 break;
1926 case HtmlTokenId::STRIKETHROUGH_OFF:
1927 EndTag( HtmlTokenId::STRIKE_OFF );
1928 break;
1930 case HtmlTokenId::BASEFONT_ON:
1931 NewBasefontAttr();
1932 break;
1933 case HtmlTokenId::BASEFONT_OFF:
1934 EndBasefontAttr();
1935 break;
1936 case HtmlTokenId::FONT_ON:
1937 case HtmlTokenId::BIGPRINT_ON:
1938 case HtmlTokenId::SMALLPRINT_ON:
1939 NewFontAttr( nToken );
1940 break;
1941 case HtmlTokenId::FONT_OFF:
1942 case HtmlTokenId::BIGPRINT_OFF:
1943 case HtmlTokenId::SMALLPRINT_OFF:
1944 EndFontAttr( nToken );
1945 break;
1947 case HtmlTokenId::EMPHASIS_ON:
1948 case HtmlTokenId::CITATION_ON:
1949 case HtmlTokenId::STRONG_ON:
1950 case HtmlTokenId::CODE_ON:
1951 case HtmlTokenId::SAMPLE_ON:
1952 case HtmlTokenId::KEYBOARD_ON:
1953 case HtmlTokenId::VARIABLE_ON:
1954 case HtmlTokenId::DEFINSTANCE_ON:
1955 case HtmlTokenId::SHORTQUOTE_ON:
1956 case HtmlTokenId::LANGUAGE_ON:
1957 case HtmlTokenId::AUTHOR_ON:
1958 case HtmlTokenId::PERSON_ON:
1959 case HtmlTokenId::ACRONYM_ON:
1960 case HtmlTokenId::ABBREVIATION_ON:
1961 case HtmlTokenId::INSERTEDTEXT_ON:
1962 case HtmlTokenId::DELETEDTEXT_ON:
1964 case HtmlTokenId::TELETYPE_ON:
1965 NewCharFormat( nToken );
1966 break;
1968 case HtmlTokenId::SDFIELD_ON:
1969 NewField();
1970 m_bCallNextToken = m_bInField && m_xTable;
1971 break;
1973 case HtmlTokenId::EMPHASIS_OFF:
1974 case HtmlTokenId::CITATION_OFF:
1975 case HtmlTokenId::STRONG_OFF:
1976 case HtmlTokenId::CODE_OFF:
1977 case HtmlTokenId::SAMPLE_OFF:
1978 case HtmlTokenId::KEYBOARD_OFF:
1979 case HtmlTokenId::VARIABLE_OFF:
1980 case HtmlTokenId::DEFINSTANCE_OFF:
1981 case HtmlTokenId::SHORTQUOTE_OFF:
1982 case HtmlTokenId::LANGUAGE_OFF:
1983 case HtmlTokenId::AUTHOR_OFF:
1984 case HtmlTokenId::PERSON_OFF:
1985 case HtmlTokenId::ACRONYM_OFF:
1986 case HtmlTokenId::ABBREVIATION_OFF:
1987 case HtmlTokenId::INSERTEDTEXT_OFF:
1988 case HtmlTokenId::DELETEDTEXT_OFF:
1990 case HtmlTokenId::TELETYPE_OFF:
1991 EndTag( nToken );
1992 break;
1994 case HtmlTokenId::HEAD_OFF:
1995 if( !m_aStyleSource.isEmpty() )
1997 m_pCSS1Parser->ParseStyleSheet( m_aStyleSource );
1998 m_aStyleSource.clear();
2000 break;
2002 case HtmlTokenId::DOCTYPE:
2003 case HtmlTokenId::BODY_OFF:
2004 case HtmlTokenId::HTML_OFF:
2005 case HtmlTokenId::HEAD_ON:
2006 case HtmlTokenId::TITLE_OFF:
2007 break; // don't evaluate further???
2008 case HtmlTokenId::HTML_ON:
2010 const HTMLOptions& rHTMLOptions = GetOptions();
2011 for (size_t i = rHTMLOptions.size(); i; )
2013 const HTMLOption& rOption = rHTMLOptions[--i];
2014 if( HtmlOptionId::DIR == rOption.GetToken() )
2016 const OUString& rDir = rOption.GetString();
2017 SfxItemSet aItemSet( m_xDoc->GetAttrPool(),
2018 m_pCSS1Parser->GetWhichMap() );
2019 SvxCSS1PropertyInfo aPropInfo;
2020 OUString aDummy;
2021 ParseStyleOptions( aDummy, aDummy, aDummy, aItemSet,
2022 aPropInfo, nullptr, &rDir );
2024 m_pCSS1Parser->SetPageDescAttrs( nullptr, &aItemSet );
2025 break;
2029 break;
2031 case HtmlTokenId::INPUT:
2032 InsertInput();
2033 break;
2035 case HtmlTokenId::TEXTAREA_ON:
2036 NewTextArea();
2037 m_bCallNextToken = m_bTextArea && m_xTable;
2038 break;
2040 case HtmlTokenId::SELECT_ON:
2041 NewSelect();
2042 m_bCallNextToken = m_bSelect && m_xTable;
2043 break;
2045 case HtmlTokenId::ANCHOR_ON:
2046 NewAnchor();
2047 break;
2049 case HtmlTokenId::ANCHOR_OFF:
2050 EndAnchor();
2051 break;
2053 case HtmlTokenId::COMMENT:
2054 if( ( aToken.getLength() > 5 ) && ( ! m_bIgnoreHTMLComments ) )
2056 // insert as Post-It
2057 // If there are no space characters right behind
2058 // the <!-- and on front of the -->, leave the comment untouched.
2059 if( ' ' == aToken[ 3 ] &&
2060 ' ' == aToken[ aToken.getLength()-3 ] )
2062 std::u16string_view aComment( aToken.subView( 3, aToken.getLength()-5 ) );
2063 InsertComment(OUString(comphelper::string::strip(aComment, ' ')));
2065 else
2067 OUString aComment = "<" + aToken + ">";
2068 InsertComment( aComment );
2071 break;
2073 case HtmlTokenId::MAP_ON:
2074 // Image Maps are read asynchronously: At first only an image map is created
2075 // Areas are processed later. Nevertheless the
2076 // ImageMap is inserted into the IMap-Array, because it might be used
2077 // already.
2078 m_pImageMap = new ImageMap;
2079 if( ParseMapOptions( m_pImageMap) )
2081 if (!m_pImageMaps)
2082 m_pImageMaps.reset( new ImageMaps );
2083 m_pImageMaps->push_back(std::unique_ptr<ImageMap>(m_pImageMap));
2085 else
2087 delete m_pImageMap;
2088 m_pImageMap = nullptr;
2090 break;
2092 case HtmlTokenId::MAP_OFF:
2093 // there is no ImageMap anymore (don't delete IMap, because it's
2094 // already contained in the array!)
2095 m_pImageMap = nullptr;
2096 break;
2098 case HtmlTokenId::AREA:
2099 if( m_pImageMap )
2100 ParseAreaOptions( m_pImageMap, m_sBaseURL, SvMacroItemId::OnMouseOver,
2101 SvMacroItemId::OnMouseOut );
2102 break;
2104 case HtmlTokenId::FRAMESET_ON:
2105 bInsertUnknown = m_bKeepUnknown;
2106 break;
2108 case HtmlTokenId::NOFRAMES_ON:
2109 if( IsInHeader() )
2110 FinishHeader();
2111 bInsertUnknown = m_bKeepUnknown;
2112 break;
2114 case HtmlTokenId::UNKNOWNCONTROL_ON:
2115 // Ignore content of unknown token in the header, if the token
2116 // does not start with a '!'.
2117 // (but judging from the code, also if does not start with a '%')
2118 // (and also if we're not somewhere we consider PRE)
2119 if( IsInHeader() && !IsReadPRE() && m_aUnknownToken.isEmpty() &&
2120 !sSaveToken.isEmpty() && '!' != sSaveToken[0] &&
2121 '%' != sSaveToken[0] )
2122 m_aUnknownToken = sSaveToken;
2123 [[fallthrough]];
2125 default:
2126 bInsertUnknown = m_bKeepUnknown;
2127 break;
2130 if( bGetIDOption )
2131 InsertIDOption();
2133 if( bInsertUnknown )
2135 OUStringBuffer aComment("HTML: <");
2136 if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
2137 aComment.append("/");
2138 aComment.append(sSaveToken);
2139 if( !aToken.isEmpty() )
2141 UnescapeToken();
2142 aComment.append(" " + aToken);
2144 aComment.append(">");
2145 InsertComment( aComment.makeStringAndClear() );
2148 // if there are temporary paragraph attributes and the
2149 // paragraph isn't empty then the paragraph attributes are final.
2150 if( !m_aParaAttrs.empty() && m_pPam->GetPoint()->GetContentIndex() )
2151 m_aParaAttrs.clear();
2154 static void lcl_swhtml_getItemInfo( const HTMLAttr& rAttr,
2155 bool& rScriptDependent,
2156 sal_uInt16& rScriptType )
2158 switch( rAttr.GetItem().Which() )
2160 case RES_CHRATR_FONT:
2161 case RES_CHRATR_FONTSIZE:
2162 case RES_CHRATR_LANGUAGE:
2163 case RES_CHRATR_POSTURE:
2164 case RES_CHRATR_WEIGHT:
2165 rScriptType = i18n::ScriptType::LATIN;
2166 rScriptDependent = true;
2167 break;
2168 case RES_CHRATR_CJK_FONT:
2169 case RES_CHRATR_CJK_FONTSIZE:
2170 case RES_CHRATR_CJK_LANGUAGE:
2171 case RES_CHRATR_CJK_POSTURE:
2172 case RES_CHRATR_CJK_WEIGHT:
2173 rScriptType = i18n::ScriptType::ASIAN;
2174 rScriptDependent = true;
2175 break;
2176 case RES_CHRATR_CTL_FONT:
2177 case RES_CHRATR_CTL_FONTSIZE:
2178 case RES_CHRATR_CTL_LANGUAGE:
2179 case RES_CHRATR_CTL_POSTURE:
2180 case RES_CHRATR_CTL_WEIGHT:
2181 rScriptType = i18n::ScriptType::COMPLEX;
2182 rScriptDependent = true;
2183 break;
2184 default:
2185 rScriptDependent = false;
2186 break;
2190 bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode, bool bUpdateNum )
2192 // A hard line break at the end always must be removed.
2193 // A second one we replace with paragraph spacing.
2194 sal_Int32 nLFStripped = StripTrailingLF();
2195 if( (AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode) && nLFStripped > 1 )
2196 eMode = AM_SPACE;
2198 // the hard attributes of this paragraph will never be invalid again
2199 m_aParaAttrs.clear();
2201 SwTextNode *pTextNode = (AM_SPACE==eMode || AM_NOSPACE==eMode) ?
2202 m_pPam->GetPoint()->GetNode().GetTextNode() : nullptr;
2204 if (pTextNode)
2206 const SvxULSpaceItem& rULSpace =
2207 pTextNode->SwContentNode::GetAttr( RES_UL_SPACE );
2209 bool bChange = AM_NOSPACE==eMode ? rULSpace.GetLower() > 0
2210 : rULSpace.GetLower() == 0;
2212 if( bChange )
2214 const SvxULSpaceItem& rCollULSpace =
2215 pTextNode->GetAnyFormatColl().GetULSpace();
2217 bool bMayReset = AM_NOSPACE==eMode ? rCollULSpace.GetLower() == 0
2218 : rCollULSpace.GetLower() > 0;
2220 if( bMayReset &&
2221 rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2223 pTextNode->ResetAttr( RES_UL_SPACE );
2225 else
2227 pTextNode->SetAttr(
2228 SvxULSpaceItem( rULSpace.GetUpper(),
2229 AM_NOSPACE==eMode ? 0 : HTML_PARSPACE, RES_UL_SPACE ) );
2233 m_bNoParSpace = AM_NOSPACE==eMode || AM_SOFTNOSPACE==eMode;
2235 SwPosition aOldPos( *m_pPam->GetPoint() );
2237 bool bRet = m_xDoc->getIDocumentContentOperations().AppendTextNode( *m_pPam->GetPoint() );
2239 // split character attributes and maybe set none,
2240 // which are set for the whole paragraph
2241 const sal_Int32 nEndCnt = aOldPos.GetContentIndex();
2242 const SwPosition& rPos = *m_pPam->GetPoint();
2244 HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
2245 for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes)
2247 HTMLAttr *pAttr = *pHTMLAttributes;
2248 if( pAttr && pAttr->GetItem().Which() < RES_PARATR_BEGIN )
2250 bool bWholePara = false;
2252 while( pAttr )
2254 HTMLAttr *pNext = pAttr->GetNext();
2255 if( pAttr->GetStartParagraphIdx() < aOldPos.GetNodeIndex() ||
2256 (!bWholePara &&
2257 pAttr->GetStartParagraph() == aOldPos.GetNode() &&
2258 pAttr->GetStartContent() != nEndCnt) )
2260 bWholePara =
2261 pAttr->GetStartParagraph() == aOldPos.GetNode() &&
2262 pAttr->GetStartContent() == 0;
2264 sal_Int32 nStt = pAttr->m_nStartContent;
2265 bool bScript = false;
2266 sal_uInt16 nScriptItem;
2267 bool bInsert = true;
2268 lcl_swhtml_getItemInfo( *pAttr, bScript,
2269 nScriptItem );
2270 // set previous part
2271 if( bScript )
2273 const SwTextNode *pTextNd =
2274 pAttr->GetStartParagraph().GetNode().GetTextNode();
2275 OSL_ENSURE( pTextNd, "No text node" );
2276 if( pTextNd )
2278 const OUString& rText = pTextNd->GetText();
2279 sal_uInt16 nScriptText =
2280 g_pBreakIt->GetBreakIter()->getScriptType(
2281 rText, pAttr->GetStartContent() );
2282 sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
2283 ->endOfScript( rText, nStt, nScriptText );
2284 while (nScriptEnd < nEndCnt && nScriptEnd != -1)
2286 if( nScriptItem == nScriptText )
2288 HTMLAttr *pSetAttr =
2289 pAttr->Clone( aOldPos.GetNode(), nScriptEnd );
2290 pSetAttr->m_nStartContent = nStt;
2291 pSetAttr->ClearPrev();
2292 if( !pNext || bWholePara )
2294 if (pSetAttr->m_bInsAtStart)
2295 m_aSetAttrTab.push_front( pSetAttr );
2296 else
2297 m_aSetAttrTab.push_back( pSetAttr );
2299 else
2300 pNext->InsertPrev( pSetAttr );
2302 nStt = nScriptEnd;
2303 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
2304 rText, nStt );
2305 nScriptEnd = g_pBreakIt->GetBreakIter()
2306 ->endOfScript( rText, nStt, nScriptText );
2308 bInsert = nScriptItem == nScriptText;
2311 if( bInsert )
2313 HTMLAttr *pSetAttr =
2314 pAttr->Clone( aOldPos.GetNode(), nEndCnt );
2315 pSetAttr->m_nStartContent = nStt;
2317 // When the attribute is for the whole paragraph, the outer
2318 // attributes aren't effective anymore. Hence it may not be inserted
2319 // in the Prev-List of an outer attribute, because that won't be
2320 // set. That leads to shifting when fields are used.
2321 if( !pNext || bWholePara )
2323 if (pSetAttr->m_bInsAtStart)
2324 m_aSetAttrTab.push_front( pSetAttr );
2325 else
2326 m_aSetAttrTab.push_back( pSetAttr );
2328 else
2329 pNext->InsertPrev( pSetAttr );
2331 else
2333 HTMLAttr *pPrev = pAttr->GetPrev();
2334 if( pPrev )
2336 // the previous attributes must be set anyway
2337 if( !pNext || bWholePara )
2339 if (pPrev->m_bInsAtStart)
2340 m_aSetAttrTab.push_front( pPrev );
2341 else
2342 m_aSetAttrTab.push_back( pPrev );
2344 else
2345 pNext->InsertPrev( pPrev );
2348 pAttr->ClearPrev();
2351 pAttr->SetStart( rPos );
2352 pAttr = pNext;
2357 if( bUpdateNum )
2359 if( GetNumInfo().GetDepth() )
2361 sal_uInt8 nLvl = GetNumInfo().GetLevel();
2362 SetNodeNum( nLvl );
2364 else
2365 m_pPam->GetPointNode().GetTextNode()->ResetAttr( RES_PARATR_NUMRULE );
2368 // We must set the attribute of the paragraph before now (because of JavaScript)
2369 SetAttr();
2371 // Now it is time to get rid of all script dependent hints that are
2372 // equal to the settings in the style
2373 SwTextNode *pTextNd = aOldPos.GetNode().GetTextNode();
2374 OSL_ENSURE( pTextNd, "There is the txt node" );
2375 size_t nCntAttr = (pTextNd && pTextNd->GetpSwpHints())
2376 ? pTextNd->GetSwpHints().Count() : 0;
2377 if( nCntAttr )
2379 // These are the end position of all script dependent hints.
2380 // If we find a hint that starts before the current end position,
2381 // we have to set it. If we find a hint that start behind or at
2382 // that position, we have to take the hint value into account.
2383 // If it is equal to the style, or in fact the paragraph value
2384 // for that hint, the hint is removed. Otherwise its end position
2385 // is remembered.
2386 sal_Int32 aEndPos[15] =
2387 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
2388 SwpHints& rHints = pTextNd->GetSwpHints();
2389 for( size_t i=0; i < nCntAttr; i++ )
2391 SwTextAttr *pHt = rHints.Get( i );
2392 sal_uInt16 nWhich = pHt->Which();
2393 sal_Int16 nIdx = 0;
2394 bool bFont = false;
2395 switch( nWhich )
2397 case RES_CHRATR_FONT:
2398 nIdx = 0;
2399 bFont = true;
2400 break;
2401 case RES_CHRATR_FONTSIZE:
2402 nIdx = 1;
2403 break;
2404 case RES_CHRATR_LANGUAGE:
2405 nIdx = 2;
2406 break;
2407 case RES_CHRATR_POSTURE:
2408 nIdx = 3;
2409 break;
2410 case RES_CHRATR_WEIGHT:
2411 nIdx = 4;
2412 break;
2413 case RES_CHRATR_CJK_FONT:
2414 nIdx = 5;
2415 bFont = true;
2416 break;
2417 case RES_CHRATR_CJK_FONTSIZE:
2418 nIdx = 6;
2419 break;
2420 case RES_CHRATR_CJK_LANGUAGE:
2421 nIdx = 7;
2422 break;
2423 case RES_CHRATR_CJK_POSTURE:
2424 nIdx = 8;
2425 break;
2426 case RES_CHRATR_CJK_WEIGHT:
2427 nIdx = 9;
2428 break;
2429 case RES_CHRATR_CTL_FONT:
2430 nIdx = 10;
2431 bFont = true;
2432 break;
2433 case RES_CHRATR_CTL_FONTSIZE:
2434 nIdx = 11;
2435 break;
2436 case RES_CHRATR_CTL_LANGUAGE:
2437 nIdx = 12;
2438 break;
2439 case RES_CHRATR_CTL_POSTURE:
2440 nIdx = 13;
2441 break;
2442 case RES_CHRATR_CTL_WEIGHT:
2443 nIdx = 14;
2444 break;
2445 default:
2446 // Skip to next attribute
2447 continue;
2449 const sal_Int32 nStt = pHt->GetStart();
2450 if( nStt >= aEndPos[nIdx] )
2452 const SfxPoolItem& rItem =
2453 static_cast<const SwContentNode *>(pTextNd)->GetAttr( nWhich );
2454 if( bFont ? swhtml_css1atr_equalFontItems(rItem,pHt->GetAttr())
2455 : rItem == pHt->GetAttr() )
2457 // The hint is the same as set in the paragraph and
2458 // therefore, it can be deleted
2459 // CAUTION!!! This WILL delete the hint and it MAY
2460 // also delete the SwpHints!!! To avoid any trouble
2461 // we leave the loop immediately if this is the last
2462 // hint.
2463 pTextNd->DeleteAttribute( pHt );
2464 if( 1 == nCntAttr )
2465 break;
2466 i--;
2467 nCntAttr--;
2469 else
2471 // The hint is different. Therefore all hints within that
2472 // hint have to be ignored.
2473 aEndPos[nIdx] = pHt->GetEnd() ? *pHt->GetEnd() : nStt;
2476 else
2478 // The hint starts before another one ends.
2479 // The hint in this case is not deleted
2480 OSL_ENSURE( pHt->GetEnd() && *pHt->GetEnd() <= aEndPos[nIdx],
2481 "hints aren't nested properly!" );
2486 if (!m_xTable && !--m_nParaCnt)
2487 Show();
2489 return bRet;
2492 void SwHTMLParser::AddParSpace()
2494 //If it already has ParSpace, return
2495 if( !m_bNoParSpace )
2496 return;
2498 m_bNoParSpace = false;
2500 SwNodeOffset nNdIdx = m_pPam->GetPoint()->GetNodeIndex() - 1;
2502 SwTextNode *pTextNode = m_xDoc->GetNodes()[nNdIdx]->GetTextNode();
2503 if( !pTextNode )
2504 return;
2506 SvxULSpaceItem rULSpace =
2507 pTextNode->SwContentNode::GetAttr( RES_UL_SPACE );
2508 if( rULSpace.GetLower() )
2509 return;
2511 const SvxULSpaceItem& rCollULSpace =
2512 pTextNode->GetAnyFormatColl().GetULSpace();
2513 if( rCollULSpace.GetLower() &&
2514 rCollULSpace.GetUpper() == rULSpace.GetUpper() )
2516 pTextNode->ResetAttr( RES_UL_SPACE );
2518 else
2520 //What I do here, is that I examine the attributes, and if
2521 //I find out, that it's CJK/CTL, then I set the paragraph space
2522 //to the value set in HTML_CJK_PARSPACE/HTML_CTL_PARSPACE.
2524 bool bIsCJK = false;
2525 bool bIsCTL = false;
2527 const size_t nCntAttr = pTextNode->GetpSwpHints()
2528 ? pTextNode->GetSwpHints().Count() : 0;
2530 for(size_t i = 0; i < nCntAttr; ++i)
2532 SwTextAttr *const pHt = pTextNode->GetSwpHints().Get(i);
2533 sal_uInt16 const nWhich = pHt->Which();
2534 if (RES_CHRATR_CJK_FONT == nWhich ||
2535 RES_CHRATR_CJK_FONTSIZE == nWhich ||
2536 RES_CHRATR_CJK_LANGUAGE == nWhich ||
2537 RES_CHRATR_CJK_POSTURE == nWhich ||
2538 RES_CHRATR_CJK_WEIGHT == nWhich)
2540 bIsCJK = true;
2541 break;
2543 if (RES_CHRATR_CTL_FONT == nWhich ||
2544 RES_CHRATR_CTL_FONTSIZE == nWhich ||
2545 RES_CHRATR_CTL_LANGUAGE == nWhich ||
2546 RES_CHRATR_CTL_POSTURE == nWhich ||
2547 RES_CHRATR_CTL_WEIGHT == nWhich)
2549 bIsCTL = true;
2550 break;
2554 if( bIsCTL )
2556 pTextNode->SetAttr(
2557 SvxULSpaceItem( rULSpace.GetUpper(), HTML_CTL_PARSPACE, RES_UL_SPACE ) );
2559 else if( bIsCJK )
2561 pTextNode->SetAttr(
2562 SvxULSpaceItem( rULSpace.GetUpper(), HTML_CJK_PARSPACE, RES_UL_SPACE ) );
2563 } else {
2564 pTextNode->SetAttr(
2565 SvxULSpaceItem( rULSpace.GetUpper(), HTML_PARSPACE, RES_UL_SPACE ) );
2570 void SwHTMLParser::Show()
2572 // Here
2573 // - a EndAction is called, so the document is formatted
2574 // - a Reschedule is called,
2575 // - the own View-Shell is set again
2576 // - and a StartAction is called
2578 OSL_ENSURE( SvParserState::Working==eState, "Show not in working state - That can go wrong" );
2579 SwViewShell *pOldVSh = CallEndAction();
2581 Application::Reschedule();
2583 if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2584 || 1 == m_xDoc->getReferenceCount() )
2586 // was the import aborted by SFX?
2587 eState = SvParserState::Error;
2590 // Fetch the SwViewShell again, as it could be destroyed in Reschedule.
2591 SwViewShell *pVSh = CallStartAction( pOldVSh );
2593 // is the current node not visible anymore, then we use a bigger increment
2594 if( pVSh )
2596 m_nParaCnt = (m_pPam->GetPoint()->GetNode().IsInVisibleArea(pVSh))
2597 ? 5 : 50;
2601 void SwHTMLParser::ShowStatline()
2603 // Here
2604 // - a Reschedule is called, so it can be scrolled
2605 // - the own View-Shell is set again
2606 // - a StartAction/EndAction is called, when there was scrolling.
2608 OSL_ENSURE( SvParserState::Working==eState, "ShowStatLine not in working state - That can go wrong" );
2610 // scroll bar
2611 if (m_xProgress)
2613 m_xProgress->Update(rInput.Tell());
2614 CheckActionViewShell();
2616 else
2618 Application::Reschedule();
2620 if( ( m_xDoc->GetDocShell() && m_xDoc->GetDocShell()->IsAbortingImport() )
2621 || 1 == m_xDoc->getReferenceCount() )
2622 // was the import aborted by SFX?
2623 eState = SvParserState::Error;
2625 SwViewShell *pVSh = CheckActionViewShell();
2626 if( pVSh && pVSh->HasInvalidRect() )
2628 CallEndAction( false, false );
2629 CallStartAction( pVSh, false );
2634 SwViewShell *SwHTMLParser::CallStartAction( SwViewShell *pVSh, bool bChkPtr )
2636 OSL_ENSURE( !m_pActionViewShell, "CallStartAction: SwViewShell already set" );
2638 if( !pVSh || bChkPtr )
2640 #if OSL_DEBUG_LEVEL > 0
2641 SwViewShell *pOldVSh = pVSh;
2642 #endif
2643 pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2644 #if OSL_DEBUG_LEVEL > 0
2645 OSL_ENSURE( !pVSh || !pOldVSh || pOldVSh == pVSh, "CallStartAction: Who swapped the SwViewShell?" );
2646 if( pOldVSh && !pVSh )
2647 pVSh = nullptr;
2648 #endif
2650 m_pActionViewShell = pVSh;
2652 if( m_pActionViewShell )
2654 if( auto pEditShell = dynamic_cast< SwEditShell *>( m_pActionViewShell ) )
2655 pEditShell->StartAction();
2656 else
2657 m_pActionViewShell->StartAction();
2660 return m_pActionViewShell;
2663 SwViewShell *SwHTMLParser::CallEndAction( bool bChkAction, bool bChkPtr )
2665 if( bChkPtr )
2667 SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2668 OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2669 "CallEndAction: Who swapped the SwViewShell?" );
2670 #if OSL_DEBUG_LEVEL > 0
2671 if( m_pActionViewShell && !pVSh )
2672 pVSh = nullptr;
2673 #endif
2674 if( pVSh != m_pActionViewShell )
2675 m_pActionViewShell = nullptr;
2678 if( !m_pActionViewShell || (bChkAction && !m_pActionViewShell->ActionPend()) )
2679 return m_pActionViewShell;
2681 if (SwEditShell* pEditShell = dynamic_cast<SwEditShell*>(m_pActionViewShell))
2683 // Already scrolled?, then make sure that the view doesn't move!
2684 const bool bOldLock = m_pActionViewShell->IsViewLocked();
2685 m_pActionViewShell->LockView( true );
2686 pEditShell->EndAction();
2687 m_pActionViewShell->LockView( bOldLock );
2689 // bChkJumpMark is only set when the object was also found
2690 if( m_bChkJumpMark )
2692 const Point aVisSttPos( DOCUMENTBORDER, DOCUMENTBORDER );
2693 if( GetMedium() && aVisSttPos == m_pActionViewShell->VisArea().Pos() )
2694 ::JumpToSwMark( m_pActionViewShell,
2695 GetMedium()->GetURLObject().GetMark() );
2696 m_bChkJumpMark = false;
2699 else
2700 m_pActionViewShell->EndAction();
2702 // if the parser holds the last reference to the document, then we can
2703 // abort here and set an error.
2704 if( 1 == m_xDoc->getReferenceCount() )
2706 eState = SvParserState::Error;
2709 SwViewShell *pVSh = m_pActionViewShell;
2710 m_pActionViewShell = nullptr;
2712 return pVSh;
2715 SwViewShell *SwHTMLParser::CheckActionViewShell()
2717 SwViewShell *pVSh = m_xDoc->getIDocumentLayoutAccess().GetCurrentViewShell();
2718 OSL_ENSURE( !pVSh || m_pActionViewShell == pVSh,
2719 "CheckActionViewShell: Who has swapped SwViewShell?" );
2720 #if OSL_DEBUG_LEVEL > 0
2721 if( m_pActionViewShell && !pVSh )
2722 pVSh = nullptr;
2723 #endif
2724 if( pVSh != m_pActionViewShell )
2725 m_pActionViewShell = nullptr;
2727 return m_pActionViewShell;
2730 SwHTMLFrameFormatListener::SwHTMLFrameFormatListener(SwFrameFormat* pFrameFormat)
2731 : m_pFrameFormat(pFrameFormat)
2733 StartListening(m_pFrameFormat->GetNotifier());
2736 void SwHTMLFrameFormatListener::Notify(const SfxHint& rHint)
2738 if (rHint.GetId() == SfxHintId::Dying)
2739 m_pFrameFormat = nullptr;
2742 void SwHTMLParser::SetAttr_( bool bChkEnd, bool bBeforeTable,
2743 std::deque<std::unique_ptr<HTMLAttr>> *pPostIts )
2745 SwPaM aAttrPam( *m_pPam->GetPoint() );
2746 const SwPosition& rEndPos = *m_pPam->GetPoint();
2747 const sal_Int32 nEndCnt = m_pPam->GetPoint()->GetContentIndex();
2748 HTMLAttr* pAttr;
2749 SwContentNode* pCNd;
2751 std::vector<std::unique_ptr<HTMLAttr>> aFields;
2753 for( auto n = m_aSetAttrTab.size(); n; )
2755 pAttr = m_aSetAttrTab[ --n ];
2756 sal_uInt16 nWhich = pAttr->m_pItem->Which();
2758 SwNodeOffset nEndParaIdx = pAttr->GetEndParagraphIdx();
2759 bool bSetAttr;
2760 if( bChkEnd )
2762 // Set character attribute with end early on, so set them still in
2763 // the current paragraph (because of JavaScript and various "chats"(?)).
2764 // This shouldn't be done for attributes which are used for
2765 // the whole paragraph, because they could be from a paragraph style
2766 // which can't be set. Because the attributes are inserted with
2767 // SETATTR_DONTREPLACE, they should be able to be set later.
2768 bSetAttr = ( nEndParaIdx < rEndPos.GetNodeIndex() &&
2769 ((RES_MARGIN_FIRSTLINE != nWhich && RES_MARGIN_TEXTLEFT != nWhich) || !GetNumInfo().GetNumRule()) ) ||
2770 ( !pAttr->IsLikePara() &&
2771 nEndParaIdx == rEndPos.GetNodeIndex() &&
2772 pAttr->GetEndContent() < nEndCnt &&
2773 (isCHRATR(nWhich) || isTXTATR_WITHEND(nWhich)) ) ||
2774 ( bBeforeTable &&
2775 nEndParaIdx == rEndPos.GetNodeIndex() &&
2776 !pAttr->GetEndContent() );
2778 else
2780 // Attributes in body nodes array section shouldn't be set if we are in a
2781 // special nodes array section, but vice versa it's possible.
2782 SwNodeOffset nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
2783 bSetAttr = nEndParaIdx < rEndPos.GetNodeIndex() ||
2784 rEndPos.GetNodeIndex() > nEndOfIcons ||
2785 nEndParaIdx <= nEndOfIcons;
2788 if( bSetAttr )
2790 // The attribute shouldn't be in the list of temporary paragraph
2791 // attributes, because then it would be deleted.
2792 while( !m_aParaAttrs.empty() )
2794 OSL_ENSURE( pAttr != m_aParaAttrs.back(),
2795 "SetAttr: Attribute must not yet be set" );
2796 m_aParaAttrs.pop_back();
2799 // then set it
2800 m_aSetAttrTab.erase( m_aSetAttrTab.begin() + n );
2802 while( pAttr )
2804 HTMLAttr *pPrev = pAttr->GetPrev();
2805 if( !pAttr->m_bValid )
2807 // invalid attributes can be deleted
2808 delete pAttr;
2809 pAttr = pPrev;
2810 continue;
2813 pCNd = pAttr->m_nStartPara.GetNode().GetContentNode();
2814 if( !pCNd )
2816 // because of the awful deleting of nodes an index can also
2817 // point to an end node :-(
2818 if ( (pAttr->GetStartParagraph() == pAttr->GetEndParagraph()) &&
2819 !isTXTATR_NOEND(nWhich) )
2821 // when the end index also points to the node, we don't
2822 // need to set attributes anymore, except if it's a text attribute.
2823 delete pAttr;
2824 pAttr = pPrev;
2825 continue;
2827 pCNd = m_xDoc->GetNodes().GoNext( &(pAttr->m_nStartPara) );
2828 if( pCNd )
2829 pAttr->m_nStartContent = 0;
2830 else
2832 OSL_ENSURE( false, "SetAttr: GoNext() failed!" );
2833 delete pAttr;
2834 pAttr = pPrev;
2835 continue;
2839 // because of the deleting of BRs the start index can also
2840 // point behind the end the text
2841 if( pAttr->m_nStartContent > pCNd->Len() )
2842 pAttr->m_nStartContent = pCNd->Len();
2843 aAttrPam.GetPoint()->Assign( *pCNd, pAttr->m_nStartContent );
2845 aAttrPam.SetMark();
2846 if ( (pAttr->GetStartParagraph() != pAttr->GetEndParagraph()) &&
2847 !isTXTATR_NOEND(nWhich) )
2849 pCNd = pAttr->m_nEndPara.GetNode().GetContentNode();
2850 if( !pCNd )
2852 pCNd = SwNodes::GoPrevious( &(pAttr->m_nEndPara) );
2853 if( pCNd )
2854 pAttr->m_nEndContent = pCNd->Len();
2855 else
2857 OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" );
2858 aAttrPam.DeleteMark();
2859 delete pAttr;
2860 pAttr = pPrev;
2861 continue;
2865 else if( pAttr->IsLikePara() )
2867 pAttr->m_nEndContent = pCNd->Len();
2870 // because of the deleting of BRs the start index can also
2871 // point behind the end the text
2872 if( pAttr->m_nEndContent > pCNd->Len() )
2873 pAttr->m_nEndContent = pCNd->Len();
2875 aAttrPam.GetPoint()->Assign( *pCNd, pAttr->m_nEndContent );
2876 if( bBeforeTable &&
2877 aAttrPam.GetPoint()->GetNodeIndex() ==
2878 rEndPos.GetNodeIndex() )
2880 // If we're before inserting a table and the attribute ends
2881 // in the current node, then we must end it in the previous
2882 // node or discard it, if it starts in that node.
2883 if( nWhich != RES_BREAK && nWhich != RES_PAGEDESC &&
2884 !isTXTATR_NOEND(nWhich) )
2886 if( aAttrPam.GetMark()->GetNodeIndex() !=
2887 rEndPos.GetNodeIndex() )
2889 OSL_ENSURE( !aAttrPam.GetPoint()->GetContentIndex(),
2890 "Content-Position before table not 0???" );
2891 aAttrPam.Move( fnMoveBackward );
2893 else
2895 aAttrPam.DeleteMark();
2896 delete pAttr;
2897 pAttr = pPrev;
2898 continue;
2903 switch( nWhich )
2905 case RES_FLTR_BOOKMARK: // insert bookmark
2907 const OUString sName( static_cast<SfxStringItem*>(pAttr->m_pItem.get())->GetValue() );
2908 IDocumentMarkAccess* const pMarkAccess = m_xDoc->getIDocumentMarkAccess();
2909 IDocumentMarkAccess::const_iterator_t ppBkmk = pMarkAccess->findMark( sName );
2910 if( ppBkmk != pMarkAccess->getAllMarksEnd() &&
2911 (*ppBkmk)->GetMarkStart() == *aAttrPam.GetPoint() )
2912 break; // do not generate duplicates on this position
2913 aAttrPam.DeleteMark();
2914 const ::sw::mark::IMark* const pNewMark = pMarkAccess->makeMark(
2915 aAttrPam,
2916 sName,
2917 IDocumentMarkAccess::MarkType::BOOKMARK,
2918 ::sw::mark::InsertMode::New);
2920 // jump to bookmark
2921 if( JumpToMarks::Mark == m_eJumpTo && pNewMark->GetName() == m_sJmpMark )
2923 m_bChkJumpMark = true;
2924 m_eJumpTo = JumpToMarks::NONE;
2927 break;
2928 case RES_TXTATR_FIELD:
2929 case RES_TXTATR_ANNOTATION:
2930 case RES_TXTATR_INPUTFIELD:
2932 SwFieldIds nFieldWhich =
2933 pPostIts
2934 ? static_cast<const SwFormatField *>(pAttr->m_pItem.get())->GetField()->GetTyp()->Which()
2935 : SwFieldIds::Database;
2936 if( pPostIts && (SwFieldIds::Postit == nFieldWhich ||
2937 SwFieldIds::Script == nFieldWhich) )
2939 pPostIts->emplace_front( pAttr );
2941 else
2943 aFields.emplace_back( pAttr);
2946 aAttrPam.DeleteMark();
2947 pAttr = pPrev;
2948 continue;
2950 // tdf#94088 expand RES_BACKGROUND to the new fill attribute
2951 // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST].
2952 // This is the right place in the future if the adapted fill attributes
2953 // may be handled more directly in HTML import to handle them.
2954 case RES_BACKGROUND:
2956 const SvxBrushItem& rBrush = static_cast< SvxBrushItem& >(*pAttr->m_pItem);
2957 SfxItemSetFixed<XATTR_FILL_FIRST, XATTR_FILL_LAST> aNewSet(m_xDoc->GetAttrPool());
2959 setSvxBrushItemAsFillAttributesToTargetSet(rBrush, aNewSet);
2960 m_xDoc->getIDocumentContentOperations().InsertItemSet(aAttrPam, aNewSet, SetAttrMode::DONTREPLACE);
2961 break;
2964 case RES_LR_SPACE:
2965 assert(false);
2966 break;
2968 case RES_MARGIN_FIRSTLINE:
2969 case RES_MARGIN_TEXTLEFT:
2970 case RES_MARGIN_RIGHT:
2971 if( aAttrPam.GetPoint()->GetNodeIndex() ==
2972 aAttrPam.GetMark()->GetNodeIndex())
2974 // because of numbering set this attribute directly at node
2975 pCNd->SetAttr( *pAttr->m_pItem );
2976 break;
2978 OSL_ENSURE( false,
2979 "LRSpace set over multiple paragraphs!" );
2980 [[fallthrough]]; // (shouldn't reach this point anyway)
2981 default:
2983 // maybe jump to a bookmark
2984 if( RES_TXTATR_INETFMT == nWhich &&
2985 JumpToMarks::Mark == m_eJumpTo &&
2986 m_sJmpMark == static_cast<SwFormatINetFormat*>(pAttr->m_pItem.get())->GetName() )
2988 m_bChkJumpMark = true;
2989 m_eJumpTo = JumpToMarks::NONE;
2992 m_xDoc->getIDocumentContentOperations().InsertPoolItem( aAttrPam, *pAttr->m_pItem, SetAttrMode::DONTREPLACE );
2994 aAttrPam.DeleteMark();
2996 delete pAttr;
2997 pAttr = pPrev;
3002 for( auto n = m_aMoveFlyFrames.size(); n; )
3004 SwFrameFormat *pFrameFormat = m_aMoveFlyFrames[--n]->GetFrameFormat();
3005 if (!pFrameFormat)
3007 SAL_WARN("sw.html", "SwFrameFormat deleted during import");
3008 m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
3009 m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
3010 continue;
3013 const SwFormatAnchor& rAnchor = pFrameFormat->GetAnchor();
3014 OSL_ENSURE( RndStdIds::FLY_AT_PARA == rAnchor.GetAnchorId(),
3015 "Only At-Para flys need special handling" );
3016 SwNodeOffset nFlyParaIdx = rAnchor.GetAnchorNode()->GetIndex();
3017 bool bMoveFly;
3018 if( bChkEnd )
3020 bMoveFly = nFlyParaIdx < rEndPos.GetNodeIndex() ||
3021 ( nFlyParaIdx == rEndPos.GetNodeIndex() &&
3022 m_aMoveFlyCnts[n] < nEndCnt );
3024 else
3026 SwNodeOffset nEndOfIcons = m_xDoc->GetNodes().GetEndOfExtras().GetIndex();
3027 bMoveFly = nFlyParaIdx < rEndPos.GetNodeIndex() ||
3028 rEndPos.GetNodeIndex() > nEndOfIcons ||
3029 nFlyParaIdx <= nEndOfIcons;
3031 if( bMoveFly )
3033 pFrameFormat->DelFrames();
3034 *aAttrPam.GetPoint() = *rAnchor.GetContentAnchor();
3035 aAttrPam.GetPoint()->SetContent( m_aMoveFlyCnts[n] );
3036 SwFormatAnchor aAnchor( rAnchor );
3037 aAnchor.SetType( RndStdIds::FLY_AT_CHAR );
3038 aAnchor.SetAnchor( aAttrPam.GetPoint() );
3039 pFrameFormat->SetFormatAttr( aAnchor );
3041 const SwFormatHoriOrient& rHoriOri = pFrameFormat->GetHoriOrient();
3042 if( text::HoriOrientation::LEFT == rHoriOri.GetHoriOrient() )
3044 SwFormatHoriOrient aHoriOri( rHoriOri );
3045 aHoriOri.SetRelationOrient( text::RelOrientation::CHAR );
3046 pFrameFormat->SetFormatAttr( aHoriOri );
3048 const SwFormatVertOrient& rVertOri = pFrameFormat->GetVertOrient();
3049 if( text::VertOrientation::TOP == rVertOri.GetVertOrient() )
3051 SwFormatVertOrient aVertOri( rVertOri );
3052 aVertOri.SetRelationOrient( text::RelOrientation::CHAR );
3053 pFrameFormat->SetFormatAttr( aVertOri );
3056 pFrameFormat->MakeFrames();
3057 m_aMoveFlyFrames.erase( m_aMoveFlyFrames.begin() + n );
3058 m_aMoveFlyCnts.erase( m_aMoveFlyCnts.begin() + n );
3061 for (auto & field : aFields)
3063 pCNd = field->m_nStartPara.GetNode().GetContentNode();
3064 aAttrPam.GetPoint()->Assign( *pCNd, field->m_nStartContent );
3066 if( bBeforeTable &&
3067 aAttrPam.GetPoint()->GetNodeIndex() == rEndPos.GetNodeIndex() )
3069 OSL_ENSURE( !bBeforeTable, "Aha, the case does occur" );
3070 OSL_ENSURE( !aAttrPam.GetPoint()->GetContentIndex(),
3071 "Content-Position before table not 0???" );
3072 // !!!
3073 aAttrPam.Move( fnMoveBackward );
3076 m_xDoc->getIDocumentContentOperations().InsertPoolItem( aAttrPam, *field->m_pItem );
3078 field.reset();
3080 aFields.clear();
3083 void SwHTMLParser::NewAttr(const std::shared_ptr<HTMLAttrTable>& rAttrTable, HTMLAttr **ppAttr, const SfxPoolItem& rItem )
3085 // Font height and font colour as well as escape attributes may not be
3086 // combined. Therefore they're saved in a list and in it the last opened
3087 // attribute is at the beginning and count is always one. For all other
3088 // attributes count is just incremented.
3089 if( *ppAttr )
3091 HTMLAttr *pAttr = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3092 pAttr->InsertNext( *ppAttr );
3093 (*ppAttr) = pAttr;
3095 else
3096 (*ppAttr) = new HTMLAttr(*m_pPam->GetPoint(), rItem, ppAttr, rAttrTable);
3099 bool SwHTMLParser::EndAttr( HTMLAttr* pAttr, bool bChkEmpty )
3101 bool bRet = true;
3103 // The list header is saved in the attribute.
3104 HTMLAttr **ppHead = pAttr->m_ppHead;
3106 OSL_ENSURE( ppHead, "No list header attribute found!" );
3108 // save the current position as end position
3109 const SwPosition* pEndPos = m_pPam->GetPoint();
3110 sal_Int32 nEndCnt = m_pPam->GetPoint()->GetContentIndex();
3112 // Is the last started or an earlier started attribute being ended?
3113 HTMLAttr *pLast = nullptr;
3114 if( ppHead && pAttr != *ppHead )
3116 // The last started attribute isn't being ended
3118 // Then we look for attribute which was started immediately afterwards,
3119 // which has also not yet been ended (otherwise it would no longer be
3120 // in the list).
3121 pLast = *ppHead;
3122 while( pLast && pLast->GetNext() != pAttr )
3123 pLast = pLast->GetNext();
3125 OSL_ENSURE( pLast, "Attribute not found in own list!" );
3128 bool bMoveBack = false;
3129 sal_uInt16 nWhich = pAttr->m_pItem->Which();
3130 if( !nEndCnt && RES_PARATR_BEGIN <= nWhich &&
3131 pEndPos->GetNodeIndex() != pAttr->GetStartParagraph().GetIndex() )
3133 // Then move back one position in the content!
3134 bMoveBack = m_pPam->Move( fnMoveBackward );
3135 nEndCnt = m_pPam->GetPoint()->GetContentIndex();
3138 // now end the attribute
3139 HTMLAttr *pNext = pAttr->GetNext();
3141 bool bInsert;
3142 sal_uInt16 nScriptItem = 0;
3143 bool bScript = false;
3144 // does it have a non-empty range?
3145 if( !bChkEmpty || (RES_PARATR_BEGIN <= nWhich && bMoveBack) ||
3146 RES_PAGEDESC == nWhich || RES_BREAK == nWhich ||
3147 pEndPos->GetNodeIndex() != pAttr->GetStartParagraph().GetIndex() ||
3148 nEndCnt != pAttr->GetStartContent() )
3150 bInsert = true;
3151 // We do some optimization for script dependent attributes here.
3152 if( pEndPos->GetNodeIndex() == pAttr->GetStartParagraph().GetIndex() )
3154 lcl_swhtml_getItemInfo( *pAttr, bScript, nScriptItem );
3157 else
3159 bInsert = false;
3162 const SwTextNode *pTextNd = (bInsert && bScript) ?
3163 pAttr->GetStartParagraph().GetNode().GetTextNode() :
3164 nullptr;
3166 if (pTextNd)
3168 const OUString& rText = pTextNd->GetText();
3169 sal_uInt16 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3170 rText, pAttr->GetStartContent() );
3171 sal_Int32 nScriptEnd = g_pBreakIt->GetBreakIter()
3172 ->endOfScript( rText, pAttr->GetStartContent(), nScriptText );
3173 while (nScriptEnd < nEndCnt && nScriptEnd != -1)
3175 if( nScriptItem == nScriptText )
3177 HTMLAttr *pSetAttr = pAttr->Clone( pEndPos->GetNode(), nScriptEnd );
3178 pSetAttr->ClearPrev();
3179 if( pNext )
3180 pNext->InsertPrev( pSetAttr );
3181 else
3183 if (pSetAttr->m_bInsAtStart)
3184 m_aSetAttrTab.push_front( pSetAttr );
3185 else
3186 m_aSetAttrTab.push_back( pSetAttr );
3189 pAttr->m_nStartContent = nScriptEnd;
3190 nScriptText = g_pBreakIt->GetBreakIter()->getScriptType(
3191 rText, nScriptEnd );
3192 nScriptEnd = g_pBreakIt->GetBreakIter()
3193 ->endOfScript( rText, nScriptEnd, nScriptText );
3195 bInsert = nScriptItem == nScriptText;
3197 if( bInsert )
3199 pAttr->m_nEndPara = pEndPos->GetNode();
3200 pAttr->m_nEndContent = nEndCnt;
3201 pAttr->m_bInsAtStart = RES_TXTATR_INETFMT != nWhich &&
3202 RES_TXTATR_CHARFMT != nWhich;
3204 if( !pNext )
3206 // No open attributes of that type exists any longer, so all
3207 // can be set. Except they depend on another attribute, then
3208 // they're appended there.
3209 if (pAttr->m_bInsAtStart)
3210 m_aSetAttrTab.push_front( pAttr );
3211 else
3212 m_aSetAttrTab.push_back( pAttr );
3214 else
3216 // There are other open attributes of that type,
3217 // therefore the setting must be postponed.
3218 // Hence the current attribute is added at the end
3219 // of the Prev-List of the successor.
3220 pNext->InsertPrev( pAttr );
3223 else
3225 // Then don't insert, but delete. Because of the "faking" of styles
3226 // by hard attributing there can be also other empty attributes in the
3227 // Prev-List, which must be set anyway.
3228 HTMLAttr *pPrev = pAttr->GetPrev();
3229 bRet = false;
3230 delete pAttr;
3232 if( pPrev )
3234 // The previous attributes must be set anyway.
3235 if( pNext )
3236 pNext->InsertPrev( pPrev );
3237 else
3239 if (pPrev->m_bInsAtStart)
3240 m_aSetAttrTab.push_front( pPrev );
3241 else
3242 m_aSetAttrTab.push_back( pPrev );
3248 // If the first attribute of the list was set, then the list header
3249 // must be corrected as well.
3250 if( pLast )
3251 pLast->m_pNext = pNext;
3252 else if( ppHead )
3253 *ppHead = pNext;
3255 if( bMoveBack )
3256 m_pPam->Move( fnMoveForward );
3258 return bRet;
3261 void SwHTMLParser::DeleteAttr( HTMLAttr* pAttr )
3263 // preliminary paragraph attributes are not allowed here, they could
3264 // be set here and then the pointers become invalid!
3265 OSL_ENSURE(m_aParaAttrs.empty(),
3266 "Danger: there are non-final paragraph attributes");
3267 m_aParaAttrs.clear();
3269 // The list header is saved in the attribute
3270 HTMLAttr **ppHead = pAttr->m_ppHead;
3272 OSL_ENSURE( ppHead, "no list header attribute found!" );
3274 // Is the last started or an earlier started attribute being removed?
3275 HTMLAttr *pLast = nullptr;
3276 if( ppHead && pAttr != *ppHead )
3278 // The last started attribute isn't being ended
3280 // Then we look for attribute which was started immediately afterwards,
3281 // which has also not yet been ended (otherwise it would no longer be
3282 // in the list).
3283 pLast = *ppHead;
3284 while( pLast && pLast->GetNext() != pAttr )
3285 pLast = pLast->GetNext();
3287 OSL_ENSURE( pLast, "Attribute not found in own list!" );
3290 // now delete the attribute
3291 HTMLAttr *pNext = pAttr->GetNext();
3292 HTMLAttr *pPrev = pAttr->GetPrev();
3293 //hold ref to xAttrTab until end of scope to ensure *ppHead validity
3294 std::shared_ptr<HTMLAttrTable> xKeepAlive(pAttr->m_xAttrTab);
3295 delete pAttr;
3297 if( pPrev )
3299 // The previous attributes must be set anyway.
3300 if( pNext )
3301 pNext->InsertPrev( pPrev );
3302 else
3304 if (pPrev->m_bInsAtStart)
3305 m_aSetAttrTab.push_front( pPrev );
3306 else
3307 m_aSetAttrTab.push_back( pPrev );
3311 // If the first attribute of the list was deleted, then the list header
3312 // must be corrected as well.
3313 if( pLast )
3314 pLast->m_pNext = pNext;
3315 else if( ppHead )
3316 *ppHead = pNext;
3319 void SwHTMLParser::SaveAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3321 // preliminary paragraph attributes are not allowed here, they could
3322 // be set here and then the pointers become invalid!
3323 OSL_ENSURE(m_aParaAttrs.empty(),
3324 "Danger: there are non-final paragraph attributes");
3325 m_aParaAttrs.clear();
3327 HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3328 HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3330 for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3332 *pSaveAttributes = *pHTMLAttributes;
3334 HTMLAttr *pAttr = *pSaveAttributes;
3335 while (pAttr)
3337 pAttr->SetHead(pSaveAttributes, rNewAttrTab);
3338 pAttr = pAttr->GetNext();
3341 *pHTMLAttributes = nullptr;
3345 void SwHTMLParser::SplitAttrTab( std::shared_ptr<HTMLAttrTable> const & rNewAttrTab,
3346 bool bMoveEndBack )
3348 // preliminary paragraph attributes are not allowed here, they could
3349 // be set here and then the pointers become invalid!
3350 OSL_ENSURE(m_aParaAttrs.empty(),
3351 "Danger: there are non-final paragraph attributes");
3352 m_aParaAttrs.clear();
3354 SwNodeIndex nEndIdx( m_pPam->GetPoint()->GetNode() );
3356 // close all still open attributes and re-open them after the table
3357 HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3358 HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3359 bool bSetAttr = true;
3360 const sal_Int32 nSttCnt = m_pPam->GetPoint()->GetContentIndex();
3361 sal_Int32 nEndCnt = nSttCnt;
3363 if( bMoveEndBack )
3365 SwNodeOffset nOldEnd = nEndIdx.GetIndex();
3366 SwNodeOffset nTmpIdx;
3367 if( ( nTmpIdx = m_xDoc->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd ||
3368 ( nTmpIdx = m_xDoc->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd )
3370 nTmpIdx = m_xDoc->GetNodes().GetEndOfInserts().GetIndex();
3372 SwContentNode* pCNd = SwNodes::GoPrevious(&nEndIdx);
3374 // Don't set attributes, when the PaM was moved outside of the content area.
3375 bSetAttr = pCNd && nTmpIdx < nEndIdx.GetIndex();
3377 nEndCnt = (bSetAttr ? pCNd->Len() : 0);
3379 for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; (++pHTMLAttributes, ++pSaveAttributes))
3381 HTMLAttr *pAttr = *pHTMLAttributes;
3382 *pSaveAttributes = nullptr;
3383 while( pAttr )
3385 HTMLAttr *pNext = pAttr->GetNext();
3386 HTMLAttr *pPrev = pAttr->GetPrev();
3388 if( bSetAttr &&
3389 ( pAttr->GetStartParagraphIdx() < nEndIdx.GetIndex() ||
3390 (pAttr->GetStartParagraph() == nEndIdx &&
3391 pAttr->GetStartContent() != nEndCnt) ) )
3393 // The attribute must be set before the list. We need the
3394 // original and therefore we clone it, because pointer to the
3395 // attribute exist in the other contexts. The Next-List is lost
3396 // in doing so, but the Previous-List is preserved.
3397 HTMLAttr *pSetAttr = pAttr->Clone( nEndIdx.GetNode(), nEndCnt );
3399 if( pNext )
3400 pNext->InsertPrev( pSetAttr );
3401 else
3403 if (pSetAttr->m_bInsAtStart)
3404 m_aSetAttrTab.push_front( pSetAttr );
3405 else
3406 m_aSetAttrTab.push_back( pSetAttr );
3409 else if( pPrev )
3411 // If the attribute doesn't need to be set before the table, then
3412 // the previous attributes must still be set.
3413 if( pNext )
3414 pNext->InsertPrev( pPrev );
3415 else
3417 if (pPrev->m_bInsAtStart)
3418 m_aSetAttrTab.push_front( pPrev );
3419 else
3420 m_aSetAttrTab.push_back( pPrev );
3424 // set the start of the attribute anew and break link
3425 pAttr->Reset(m_pPam->GetPoint()->GetNode(), nSttCnt, pSaveAttributes, rNewAttrTab);
3427 if (*pSaveAttributes)
3429 HTMLAttr *pSAttr = *pSaveAttributes;
3430 while( pSAttr->GetNext() )
3431 pSAttr = pSAttr->GetNext();
3432 pSAttr->InsertNext( pAttr );
3434 else
3435 *pSaveAttributes = pAttr;
3437 pAttr = pNext;
3440 *pHTMLAttributes = nullptr;
3444 void SwHTMLParser::RestoreAttrTab(std::shared_ptr<HTMLAttrTable> const & rNewAttrTab)
3446 // preliminary paragraph attributes are not allowed here, they could
3447 // be set here and then the pointers become invalid!
3448 OSL_ENSURE(m_aParaAttrs.empty(),
3449 "Danger: there are non-final paragraph attributes");
3450 m_aParaAttrs.clear();
3452 HTMLAttr** pHTMLAttributes = reinterpret_cast<HTMLAttr**>(m_xAttrTab.get());
3453 HTMLAttr** pSaveAttributes = reinterpret_cast<HTMLAttr**>(rNewAttrTab.get());
3455 for (auto nCnt = sizeof(HTMLAttrTable) / sizeof(HTMLAttr*); nCnt--; ++pHTMLAttributes, ++pSaveAttributes)
3457 OSL_ENSURE(!*pHTMLAttributes, "The attribute table is not empty!");
3459 *pHTMLAttributes = *pSaveAttributes;
3461 HTMLAttr *pAttr = *pHTMLAttributes;
3462 while (pAttr)
3464 OSL_ENSURE( !pAttr->GetPrev() || !pAttr->GetPrev()->m_ppHead,
3465 "Previous attribute has still a header" );
3466 pAttr->SetHead(pHTMLAttributes, m_xAttrTab);
3467 pAttr = pAttr->GetNext();
3470 *pSaveAttributes = nullptr;
3474 void SwHTMLParser::InsertAttr( const SfxPoolItem& rItem, bool bInsAtStart )
3476 HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), rItem, nullptr, std::shared_ptr<HTMLAttrTable>());
3477 if (bInsAtStart)
3478 m_aSetAttrTab.push_front( pTmp );
3479 else
3480 m_aSetAttrTab.push_back( pTmp );
3483 void SwHTMLParser::InsertAttrs( std::deque<std::unique_ptr<HTMLAttr>> rAttrs )
3485 while( !rAttrs.empty() )
3487 std::unique_ptr<HTMLAttr> pAttr = std::move(rAttrs.front());
3488 InsertAttr( pAttr->GetItem(), false );
3489 rAttrs.pop_front();
3493 void SwHTMLParser::NewStdAttr( HtmlTokenId nToken )
3495 OUString aId, aStyle, aLang, aDir;
3496 OUString aClass;
3498 const HTMLOptions& rHTMLOptions = GetOptions();
3499 for (size_t i = rHTMLOptions.size(); i; )
3501 const HTMLOption& rOption = rHTMLOptions[--i];
3502 switch( rOption.GetToken() )
3504 case HtmlOptionId::ID:
3505 aId = rOption.GetString();
3506 break;
3507 case HtmlOptionId::STYLE:
3508 aStyle = rOption.GetString();
3509 break;
3510 case HtmlOptionId::CLASS:
3511 aClass = rOption.GetString();
3512 break;
3513 case HtmlOptionId::LANG:
3514 aLang = rOption.GetString();
3515 break;
3516 case HtmlOptionId::DIR:
3517 aDir = rOption.GetString();
3518 break;
3519 default: break;
3523 // create a new context
3524 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3526 // parse styles
3527 if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3529 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3530 SvxCSS1PropertyInfo aPropInfo;
3532 if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3534 if( HtmlTokenId::SPAN_ON != nToken || aClass.isEmpty() ||
3535 !CreateContainer( aClass, aItemSet, aPropInfo, xCntxt.get() ) )
3536 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3537 InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3541 // save the context
3542 PushContext(xCntxt);
3545 void SwHTMLParser::NewStdAttr( HtmlTokenId nToken,
3546 HTMLAttr **ppAttr, const SfxPoolItem & rItem,
3547 HTMLAttr **ppAttr2, const SfxPoolItem *pItem2,
3548 HTMLAttr **ppAttr3, const SfxPoolItem *pItem3 )
3550 OUString aId, aStyle, aClass, aLang, aDir;
3552 const HTMLOptions& rHTMLOptions = GetOptions();
3553 for (size_t i = rHTMLOptions.size(); i; )
3555 const HTMLOption& rOption = rHTMLOptions[--i];
3556 switch( rOption.GetToken() )
3558 case HtmlOptionId::ID:
3559 aId = rOption.GetString();
3560 break;
3561 case HtmlOptionId::STYLE:
3562 aStyle = rOption.GetString();
3563 break;
3564 case HtmlOptionId::CLASS:
3565 aClass = rOption.GetString();
3566 break;
3567 case HtmlOptionId::LANG:
3568 aLang = rOption.GetString();
3569 break;
3570 case HtmlOptionId::DIR:
3571 aDir = rOption.GetString();
3572 break;
3573 default: break;
3577 // create a new context
3578 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3580 // parse styles
3581 if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3583 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3584 SvxCSS1PropertyInfo aPropInfo;
3586 aItemSet.Put( rItem );
3587 if( pItem2 )
3588 aItemSet.Put( *pItem2 );
3589 if( pItem3 )
3590 aItemSet.Put( *pItem3 );
3592 if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3593 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3595 InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3597 else
3599 InsertAttr( ppAttr ,rItem, xCntxt.get() );
3600 if( pItem2 )
3602 OSL_ENSURE( ppAttr2, "missing table entry for item2" );
3603 InsertAttr( ppAttr2, *pItem2, xCntxt.get() );
3605 if( pItem3 )
3607 OSL_ENSURE( ppAttr3, "missing table entry for item3" );
3608 InsertAttr( ppAttr3, *pItem3, xCntxt.get() );
3612 // save the context
3613 PushContext(xCntxt);
3616 void SwHTMLParser::EndTag( HtmlTokenId nToken )
3618 // fetch context
3619 std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
3620 if (xCntxt)
3622 // and maybe end the attributes
3623 EndContext(xCntxt.get());
3627 void SwHTMLParser::NewBasefontAttr()
3629 OUString aId, aStyle, aClass, aLang, aDir;
3630 sal_uInt16 nSize = 3;
3632 const HTMLOptions& rHTMLOptions = GetOptions();
3633 for (size_t i = rHTMLOptions.size(); i; )
3635 const HTMLOption& rOption = rHTMLOptions[--i];
3636 switch( rOption.GetToken() )
3638 case HtmlOptionId::SIZE:
3639 nSize = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
3640 break;
3641 case HtmlOptionId::ID:
3642 aId = rOption.GetString();
3643 break;
3644 case HtmlOptionId::STYLE:
3645 aStyle = rOption.GetString();
3646 break;
3647 case HtmlOptionId::CLASS:
3648 aClass = rOption.GetString();
3649 break;
3650 case HtmlOptionId::LANG:
3651 aLang = rOption.GetString();
3652 break;
3653 case HtmlOptionId::DIR:
3654 aDir = rOption.GetString();
3655 break;
3656 default: break;
3660 if( nSize < 1 )
3661 nSize = 1;
3663 if( nSize > 7 )
3664 nSize = 7;
3666 // create a new context
3667 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::BASEFONT_ON));
3669 // parse styles
3670 if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3672 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3673 SvxCSS1PropertyInfo aPropInfo;
3675 //CJK has different defaults
3676 SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3677 aItemSet.Put( aFontHeight );
3678 SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3679 aItemSet.Put( aFontHeightCJK );
3680 //Complex type can contain so many types of letters,
3681 //that it's not really worthy to bother, IMO.
3682 //Still, I have set a default.
3683 SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3684 aItemSet.Put( aFontHeightCTL );
3686 if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3687 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3689 InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3691 else
3693 SvxFontHeightItem aFontHeight( m_aFontHeights[nSize-1], 100, RES_CHRATR_FONTSIZE );
3694 InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3695 SvxFontHeightItem aFontHeightCJK( m_aFontHeights[nSize-1], 100, RES_CHRATR_CJK_FONTSIZE );
3696 InsertAttr( &m_xAttrTab->pFontHeightCJK, aFontHeightCJK, xCntxt.get() );
3697 SvxFontHeightItem aFontHeightCTL( m_aFontHeights[nSize-1], 100, RES_CHRATR_CTL_FONTSIZE );
3698 InsertAttr( &m_xAttrTab->pFontHeightCTL, aFontHeightCTL, xCntxt.get() );
3701 // save the context
3702 PushContext(xCntxt);
3704 // save the font size
3705 m_aBaseFontStack.push_back( nSize );
3708 void SwHTMLParser::EndBasefontAttr()
3710 EndTag( HtmlTokenId::BASEFONT_ON );
3712 // avoid stack underflow in tables
3713 if( m_aBaseFontStack.size() > m_nBaseFontStMin )
3714 m_aBaseFontStack.erase( m_aBaseFontStack.begin() + m_aBaseFontStack.size() - 1 );
3717 void SwHTMLParser::NewFontAttr( HtmlTokenId nToken )
3719 sal_uInt16 nBaseSize =
3720 ( m_aBaseFontStack.size() > m_nBaseFontStMin
3721 ? (m_aBaseFontStack[m_aBaseFontStack.size()-1] & FONTSIZE_MASK)
3722 : 3 );
3723 sal_uInt16 nFontSize =
3724 ( m_aFontStack.size() > m_nFontStMin
3725 ? (m_aFontStack[m_aFontStack.size()-1] & FONTSIZE_MASK)
3726 : nBaseSize );
3728 OUString aFace, aId, aStyle, aClass, aLang, aDir;
3729 Color aColor;
3730 sal_uLong nFontHeight = 0; // actual font height to set
3731 sal_uInt16 nSize = 0; // font height in Netscape notation (1-7)
3732 bool bColor = false;
3734 const HTMLOptions& rHTMLOptions = GetOptions();
3735 for (size_t i = rHTMLOptions.size(); i; )
3737 const HTMLOption& rOption = rHTMLOptions[--i];
3738 switch( rOption.GetToken() )
3740 case HtmlOptionId::SIZE:
3741 if( HtmlTokenId::FONT_ON==nToken && !rOption.GetString().isEmpty() )
3743 sal_Int32 nSSize;
3744 if( '+' == rOption.GetString()[0] ||
3745 '-' == rOption.GetString()[0] )
3746 nSSize = o3tl::saturating_add<sal_Int32>(nBaseSize, rOption.GetSNumber());
3747 else
3748 nSSize = static_cast<sal_Int32>(rOption.GetNumber());
3750 if( nSSize < 1 )
3751 nSSize = 1;
3752 else if( nSSize > 7 )
3753 nSSize = 7;
3755 nSize = o3tl::narrowing<sal_uInt16>(nSSize);
3756 nFontHeight = m_aFontHeights[nSize-1];
3758 break;
3759 case HtmlOptionId::COLOR:
3760 if( HtmlTokenId::FONT_ON==nToken )
3762 rOption.GetColor( aColor );
3763 bColor = true;
3765 break;
3766 case HtmlOptionId::FACE:
3767 if( HtmlTokenId::FONT_ON==nToken )
3768 aFace = rOption.GetString();
3769 break;
3770 case HtmlOptionId::ID:
3771 aId = rOption.GetString();
3772 break;
3773 case HtmlOptionId::STYLE:
3774 aStyle = rOption.GetString();
3775 break;
3776 case HtmlOptionId::CLASS:
3777 aClass = rOption.GetString();
3778 break;
3779 case HtmlOptionId::LANG:
3780 aLang = rOption.GetString();
3781 break;
3782 case HtmlOptionId::DIR:
3783 aDir = rOption.GetString();
3784 break;
3785 default: break;
3789 if( HtmlTokenId::FONT_ON != nToken )
3791 // HTML_BIGPRINT_ON or HTML_SMALLPRINT_ON
3793 // In headings the current heading sets the font height
3794 // and not BASEFONT.
3795 const SwFormatColl *pColl = GetCurrFormatColl();
3796 sal_uInt16 nPoolId = pColl ? pColl->GetPoolFormatId() : 0;
3797 if( nPoolId>=RES_POOLCOLL_HEADLINE1 &&
3798 nPoolId<=RES_POOLCOLL_HEADLINE6 )
3800 // If the font height in the heading wasn't changed yet,
3801 // then take the one from the style.
3802 if( m_nFontStHeadStart==m_aFontStack.size() )
3803 nFontSize = static_cast< sal_uInt16 >(6 - (nPoolId - RES_POOLCOLL_HEADLINE1));
3805 else
3806 nPoolId = 0;
3808 if( HtmlTokenId::BIGPRINT_ON == nToken )
3809 nSize = ( nFontSize<7 ? nFontSize+1 : 7 );
3810 else
3811 nSize = ( nFontSize>1 ? nFontSize-1 : 1 );
3813 // If possible in headlines we fetch the new font height
3814 // from the style.
3815 if( nPoolId && nSize>=1 && nSize <=6 )
3816 nFontHeight =
3817 m_pCSS1Parser->GetTextCollFromPool(
3818 RES_POOLCOLL_HEADLINE1+6-nSize )->GetSize().GetHeight();
3819 else
3820 nFontHeight = m_aFontHeights[nSize-1];
3823 OSL_ENSURE( !nSize == !nFontHeight, "HTML-Font-Size != Font-Height" );
3825 OUString aFontName;
3826 const OUString aStyleName;
3827 FontFamily eFamily = FAMILY_DONTKNOW; // family and pitch,
3828 FontPitch ePitch = PITCH_DONTKNOW; // if not found
3829 rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
3831 if( !aFace.isEmpty() && !m_pCSS1Parser->IsIgnoreFontFamily() )
3833 const FontList *pFList = nullptr;
3834 SwDocShell *pDocSh = m_xDoc->GetDocShell();
3835 if( pDocSh )
3837 const SvxFontListItem *pFListItem =
3838 static_cast<const SvxFontListItem *>(pDocSh->GetItem(SID_ATTR_CHAR_FONTLIST));
3839 if( pFListItem )
3840 pFList = pFListItem->GetFontList();
3843 bool bFound = false;
3844 sal_Int32 nStrPos = 0;
3845 while( nStrPos!= -1 )
3847 OUString aFName = aFace.getToken( 0, ',', nStrPos );
3848 aFName = comphelper::string::strip(aFName, ' ');
3849 if( !aFName.isEmpty() )
3851 if( !bFound && pFList )
3853 sal_Handle hFont = pFList->GetFirstFontMetric( aFName );
3854 if( nullptr != hFont )
3856 const FontMetric& rFMetric = FontList::GetFontMetric( hFont );
3857 if( RTL_TEXTENCODING_DONTKNOW != rFMetric.GetCharSet() )
3859 bFound = true;
3860 if( RTL_TEXTENCODING_SYMBOL == rFMetric.GetCharSet() )
3861 eEnc = RTL_TEXTENCODING_SYMBOL;
3865 if( !aFontName.isEmpty() )
3866 aFontName += ";";
3867 aFontName += aFName;
3872 // create a new context
3873 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
3875 // parse styles
3876 if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
3878 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3879 SvxCSS1PropertyInfo aPropInfo;
3881 if( nFontHeight )
3883 SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3884 aItemSet.Put( aFontHeight );
3885 SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3886 aItemSet.Put( aFontHeightCJK );
3887 SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3888 aItemSet.Put( aFontHeightCTL );
3890 if( bColor )
3891 aItemSet.Put( SvxColorItem(aColor, RES_CHRATR_COLOR) );
3892 if( !aFontName.isEmpty() )
3894 SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3895 aItemSet.Put( aFont );
3896 SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3897 aItemSet.Put( aFontCJK );
3898 SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3899 aItemSet.Put( aFontCTL );
3902 if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
3903 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
3905 InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
3907 else
3909 if( nFontHeight )
3911 SvxFontHeightItem aFontHeight( nFontHeight, 100, RES_CHRATR_FONTSIZE );
3912 InsertAttr( &m_xAttrTab->pFontHeight, aFontHeight, xCntxt.get() );
3913 SvxFontHeightItem aFontHeightCJK( nFontHeight, 100, RES_CHRATR_CJK_FONTSIZE );
3914 InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCJK, xCntxt.get() );
3915 SvxFontHeightItem aFontHeightCTL( nFontHeight, 100, RES_CHRATR_CTL_FONTSIZE );
3916 InsertAttr( &m_xAttrTab->pFontHeight, aFontHeightCTL, xCntxt.get() );
3918 if( bColor )
3919 InsertAttr( &m_xAttrTab->pFontColor, SvxColorItem(aColor, RES_CHRATR_COLOR), xCntxt.get() );
3920 if( !aFontName.isEmpty() )
3922 SvxFontItem aFont( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_FONT );
3923 InsertAttr( &m_xAttrTab->pFont, aFont, xCntxt.get() );
3924 SvxFontItem aFontCJK( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CJK_FONT );
3925 InsertAttr( &m_xAttrTab->pFont, aFontCJK, xCntxt.get() );
3926 SvxFontItem aFontCTL( eFamily, aFontName, aStyleName, ePitch, eEnc, RES_CHRATR_CTL_FONT );
3927 InsertAttr( &m_xAttrTab->pFont, aFontCTL, xCntxt.get() );
3931 // save the context
3932 PushContext(xCntxt);
3934 m_aFontStack.push_back( nSize );
3937 void SwHTMLParser::EndFontAttr( HtmlTokenId nToken )
3939 EndTag( nToken );
3941 // avoid stack underflow in tables
3942 if( m_aFontStack.size() > m_nFontStMin )
3943 m_aFontStack.erase( m_aFontStack.begin() + m_aFontStack.size() - 1 );
3946 void SwHTMLParser::NewPara()
3948 if( m_pPam->GetPoint()->GetContentIndex() )
3949 AppendTextNode( AM_SPACE );
3950 else
3951 AddParSpace();
3953 m_eParaAdjust = SvxAdjust::End;
3954 OUString aId, aStyle, aClass, aLang, aDir;
3956 const HTMLOptions& rHTMLOptions = GetOptions();
3957 for (size_t i = rHTMLOptions.size(); i; )
3959 const HTMLOption& rOption = rHTMLOptions[--i];
3960 switch( rOption.GetToken() )
3962 case HtmlOptionId::ID:
3963 aId = rOption.GetString();
3964 break;
3965 case HtmlOptionId::ALIGN:
3966 m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
3967 break;
3968 case HtmlOptionId::STYLE:
3969 aStyle = rOption.GetString();
3970 break;
3971 case HtmlOptionId::CLASS:
3972 aClass = rOption.GetString();
3973 break;
3974 case HtmlOptionId::LANG:
3975 aLang = rOption.GetString();
3976 break;
3977 case HtmlOptionId::DIR:
3978 aDir = rOption.GetString();
3979 break;
3980 default: break;
3984 // create a new context
3985 std::unique_ptr<HTMLAttrContext> xCntxt(
3986 !aClass.isEmpty() ? new HTMLAttrContext( HtmlTokenId::PARABREAK_ON,
3987 RES_POOLCOLL_TEXT, aClass )
3988 : new HTMLAttrContext( HtmlTokenId::PARABREAK_ON ));
3990 // parse styles (Don't consider class. This is only possible as long as none of
3991 // the CSS1 properties of the class must be formatted hard!!!)
3992 if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
3994 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
3995 SvxCSS1PropertyInfo aPropInfo;
3997 if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
3999 OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4000 "Class is not considered" );
4001 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4002 InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4006 if( SvxAdjust::End != m_eParaAdjust )
4007 InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4009 // and push on stack
4010 PushContext( xCntxt );
4012 // set the current style or its attributes
4013 SetTextCollAttrs( !aClass.isEmpty() ? m_aContexts.back().get() : nullptr );
4015 // progress bar
4016 ShowStatline();
4018 OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE, "Now an open paragraph element will be lost." );
4019 m_nOpenParaToken = HtmlTokenId::PARABREAK_ON;
4022 void SwHTMLParser::EndPara( bool bReal )
4024 if (HtmlTokenId::LI_ON==m_nOpenParaToken && m_xTable)
4026 #if OSL_DEBUG_LEVEL > 0
4027 const SwNumRule *pNumRule = m_pPam->GetPointNode().GetTextNode()->GetNumRule();
4028 OSL_ENSURE( pNumRule, "Where is the NumRule" );
4029 #endif
4032 // Netscape skips empty paragraphs, we do the same; unless in XHTML mode, which prefers mapping
4033 // the source document to the doc model 1:1 if possible.
4034 if( bReal )
4036 if (m_pPam->GetPoint()->GetContentIndex() || m_bXHTML)
4037 AppendTextNode( AM_SPACE );
4038 else
4039 AddParSpace();
4042 // If a DD or DT was open, it's an implied definition list,
4043 // which must be closed now.
4044 if( (m_nOpenParaToken == HtmlTokenId::DT_ON || m_nOpenParaToken == HtmlTokenId::DD_ON) &&
4045 m_nDefListDeep)
4047 m_nDefListDeep--;
4050 // Pop the context of the stack. It can also be from an
4051 // implied opened definition list.
4052 std::unique_ptr<HTMLAttrContext> xCntxt(
4053 PopContext( m_nOpenParaToken != HtmlTokenId::NONE ? getOnToken(m_nOpenParaToken) : HtmlTokenId::PARABREAK_ON ));
4055 // close attribute
4056 if (xCntxt)
4058 EndContext(xCntxt.get());
4059 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4060 xCntxt.reset();
4063 // reset the existing style
4064 if( bReal )
4065 SetTextCollAttrs();
4067 m_nOpenParaToken = HtmlTokenId::NONE;
4070 void SwHTMLParser::NewHeading( HtmlTokenId nToken )
4072 m_eParaAdjust = SvxAdjust::End;
4074 OUString aId, aStyle, aClass, aLang, aDir;
4076 const HTMLOptions& rHTMLOptions = GetOptions();
4077 for (size_t i = rHTMLOptions.size(); i; )
4079 const HTMLOption& rOption = rHTMLOptions[--i];
4080 switch( rOption.GetToken() )
4082 case HtmlOptionId::ID:
4083 aId = rOption.GetString();
4084 break;
4085 case HtmlOptionId::ALIGN:
4086 m_eParaAdjust = rOption.GetEnum( aHTMLPAlignTable, m_eParaAdjust );
4087 break;
4088 case HtmlOptionId::STYLE:
4089 aStyle = rOption.GetString();
4090 break;
4091 case HtmlOptionId::CLASS:
4092 aClass = rOption.GetString();
4093 break;
4094 case HtmlOptionId::LANG:
4095 aLang = rOption.GetString();
4096 break;
4097 case HtmlOptionId::DIR:
4098 aDir = rOption.GetString();
4099 break;
4100 default: break;
4104 // open a new paragraph
4105 if( m_pPam->GetPoint()->GetContentIndex() )
4106 AppendTextNode( AM_SPACE );
4107 else
4108 AddParSpace();
4110 // search for the matching style
4111 sal_uInt16 nTextColl;
4112 switch( nToken )
4114 case HtmlTokenId::HEAD1_ON: nTextColl = RES_POOLCOLL_HEADLINE1; break;
4115 case HtmlTokenId::HEAD2_ON: nTextColl = RES_POOLCOLL_HEADLINE2; break;
4116 case HtmlTokenId::HEAD3_ON: nTextColl = RES_POOLCOLL_HEADLINE3; break;
4117 case HtmlTokenId::HEAD4_ON: nTextColl = RES_POOLCOLL_HEADLINE4; break;
4118 case HtmlTokenId::HEAD5_ON: nTextColl = RES_POOLCOLL_HEADLINE5; break;
4119 case HtmlTokenId::HEAD6_ON: nTextColl = RES_POOLCOLL_HEADLINE6; break;
4120 default: nTextColl = RES_POOLCOLL_STANDARD; break;
4123 // create the context
4124 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nTextColl, aClass));
4126 // parse styles (regarding class see also NewPara)
4127 if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4129 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4130 SvxCSS1PropertyInfo aPropInfo;
4132 if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4134 OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4135 "Class is not considered" );
4136 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4137 InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4141 if( SvxAdjust::End != m_eParaAdjust )
4142 InsertAttr( &m_xAttrTab->pAdjust, SvxAdjustItem(m_eParaAdjust, RES_PARATR_ADJUST), xCntxt.get() );
4144 // and push on stack
4145 PushContext(xCntxt);
4147 // set the current style or its attributes
4148 SetTextCollAttrs(m_aContexts.back().get());
4150 m_nFontStHeadStart = m_aFontStack.size();
4152 // progress bar
4153 ShowStatline();
4156 void SwHTMLParser::EndHeading()
4158 // open a new paragraph
4159 if( m_pPam->GetPoint()->GetContentIndex() )
4160 AppendTextNode( AM_SPACE );
4161 else
4162 AddParSpace();
4164 // search context matching the token and fetch it from stack
4165 std::unique_ptr<HTMLAttrContext> xCntxt;
4166 auto nPos = m_aContexts.size();
4167 while( !xCntxt && nPos>m_nContextStMin )
4169 switch( m_aContexts[--nPos]->GetToken() )
4171 case HtmlTokenId::HEAD1_ON:
4172 case HtmlTokenId::HEAD2_ON:
4173 case HtmlTokenId::HEAD3_ON:
4174 case HtmlTokenId::HEAD4_ON:
4175 case HtmlTokenId::HEAD5_ON:
4176 case HtmlTokenId::HEAD6_ON:
4177 xCntxt = std::move(m_aContexts[nPos]);
4178 m_aContexts.erase( m_aContexts.begin() + nPos );
4179 break;
4180 default: break;
4184 // and now end attributes
4185 if (xCntxt)
4187 EndContext(xCntxt.get());
4188 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4189 xCntxt.reset();
4192 // reset existing style
4193 SetTextCollAttrs();
4195 m_nFontStHeadStart = m_nFontStMin;
4198 void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken, sal_uInt16 nColl )
4200 OUString aId, aStyle, aClass, aLang, aDir;
4202 const HTMLOptions& rHTMLOptions = GetOptions();
4203 for (size_t i = rHTMLOptions.size(); i; )
4205 const HTMLOption& rOption = rHTMLOptions[--i];
4206 switch( rOption.GetToken() )
4208 case HtmlOptionId::ID:
4209 aId = rOption.GetString();
4210 break;
4211 case HtmlOptionId::STYLE:
4212 aStyle = rOption.GetString();
4213 break;
4214 case HtmlOptionId::CLASS:
4215 aClass = rOption.GetString();
4216 break;
4217 case HtmlOptionId::LANG:
4218 aLang = rOption.GetString();
4219 break;
4220 case HtmlOptionId::DIR:
4221 aDir = rOption.GetString();
4222 break;
4223 default: break;
4227 // open a new paragraph
4228 SwHTMLAppendMode eMode = AM_NORMAL;
4229 switch( nToken )
4231 case HtmlTokenId::LISTING_ON:
4232 case HtmlTokenId::XMP_ON:
4233 // These both tags will be mapped to the PRE style. For the case that a
4234 // a CLASS exists we will delete it so that we don't get the CLASS of
4235 // the PRE style.
4236 aClass.clear();
4237 [[fallthrough]];
4238 case HtmlTokenId::BLOCKQUOTE_ON:
4239 case HtmlTokenId::BLOCKQUOTE30_ON:
4240 case HtmlTokenId::PREFORMTXT_ON:
4241 eMode = AM_SPACE;
4242 break;
4243 case HtmlTokenId::ADDRESS_ON:
4244 eMode = AM_NOSPACE; // ADDRESS can follow on a <P> without </P>
4245 break;
4246 case HtmlTokenId::DT_ON:
4247 case HtmlTokenId::DD_ON:
4248 eMode = AM_SOFTNOSPACE;
4249 break;
4250 default:
4251 OSL_ENSURE( false, "unknown style" );
4252 break;
4254 if( m_pPam->GetPoint()->GetContentIndex() )
4255 AppendTextNode( eMode );
4256 else if( AM_SPACE==eMode )
4257 AddParSpace();
4259 // ... and save in a context
4260 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken, nColl, aClass));
4262 // parse styles (regarding class see also NewPara)
4263 if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4265 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4266 SvxCSS1PropertyInfo aPropInfo;
4268 if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4270 OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4271 "Class is not considered" );
4272 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4273 InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4277 PushContext(xCntxt);
4279 // set the new style
4280 SetTextCollAttrs(m_aContexts.back().get());
4282 // update progress bar
4283 ShowStatline();
4286 void SwHTMLParser::EndTextFormatColl( HtmlTokenId nToken )
4288 SwHTMLAppendMode eMode = AM_NORMAL;
4289 switch( getOnToken(nToken) )
4291 case HtmlTokenId::BLOCKQUOTE_ON:
4292 case HtmlTokenId::BLOCKQUOTE30_ON:
4293 case HtmlTokenId::PREFORMTXT_ON:
4294 case HtmlTokenId::LISTING_ON:
4295 case HtmlTokenId::XMP_ON:
4296 eMode = AM_SPACE;
4297 break;
4298 case HtmlTokenId::ADDRESS_ON:
4299 case HtmlTokenId::DT_ON:
4300 case HtmlTokenId::DD_ON:
4301 eMode = AM_SOFTNOSPACE;
4302 break;
4303 default:
4304 OSL_ENSURE( false, "unknown style" );
4305 break;
4307 if( m_pPam->GetPoint()->GetContentIndex() )
4308 AppendTextNode( eMode );
4309 else if( AM_SPACE==eMode )
4310 AddParSpace();
4312 // pop current context of stack
4313 std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(getOnToken(nToken)));
4315 // and now end attributes
4316 if (xCntxt)
4318 EndContext(xCntxt.get());
4319 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4320 xCntxt.reset();
4323 // reset existing style
4324 SetTextCollAttrs();
4327 void SwHTMLParser::NewDefList()
4329 OUString aId, aStyle, aClass, aLang, aDir;
4331 const HTMLOptions& rHTMLOptions = GetOptions();
4332 for (size_t i = rHTMLOptions.size(); i; )
4334 const HTMLOption& rOption = rHTMLOptions[--i];
4335 switch( rOption.GetToken() )
4337 case HtmlOptionId::ID:
4338 aId = rOption.GetString();
4339 break;
4340 case HtmlOptionId::STYLE:
4341 aStyle = rOption.GetString();
4342 break;
4343 case HtmlOptionId::CLASS:
4344 aClass = rOption.GetString();
4345 break;
4346 case HtmlOptionId::LANG:
4347 aLang = rOption.GetString();
4348 break;
4349 case HtmlOptionId::DIR:
4350 aDir = rOption.GetString();
4351 break;
4352 default: break;
4356 // open a new paragraph
4357 bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 0;
4358 if( m_pPam->GetPoint()->GetContentIndex() )
4359 AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4360 else if( bSpace )
4361 AddParSpace();
4363 // one level more
4364 m_nDefListDeep++;
4366 bool bInDD = false, bNotInDD = false;
4367 auto nPos = m_aContexts.size();
4368 while( !bInDD && !bNotInDD && nPos>m_nContextStMin )
4370 HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4371 switch( nCntxtToken )
4373 case HtmlTokenId::DEFLIST_ON:
4374 case HtmlTokenId::DIRLIST_ON:
4375 case HtmlTokenId::MENULIST_ON:
4376 case HtmlTokenId::ORDERLIST_ON:
4377 case HtmlTokenId::UNORDERLIST_ON:
4378 bNotInDD = true;
4379 break;
4380 case HtmlTokenId::DD_ON:
4381 bInDD = true;
4382 break;
4383 default: break;
4387 // ... and save in a context
4388 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(HtmlTokenId::DEFLIST_ON));
4390 // in it save also the margins
4391 sal_uInt16 nLeft=0, nRight=0;
4392 short nIndent=0;
4393 GetMarginsFromContext( nLeft, nRight, nIndent );
4395 // The indentation, which already results from a DL, correlates with a DT
4396 // on the current level and this correlates to a DD from the previous level.
4397 // For a level >=2 we must add DD distance.
4398 if( !bInDD && m_nDefListDeep > 1 )
4401 // and the one of the DT-style of the current level
4402 SvxTextLeftMarginItem const& rTextLeftMargin =
4403 m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DD, OUString())
4404 ->GetTextLeftMargin();
4405 nLeft = nLeft + static_cast<sal_uInt16>(rTextLeftMargin.GetTextLeft());
4408 xCntxt->SetMargins( nLeft, nRight, nIndent );
4410 // parse styles
4411 if( HasStyleOptions( aStyle, aId, aClass, &aLang, &aDir ) )
4413 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4414 SvxCSS1PropertyInfo aPropInfo;
4416 if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo, &aLang, &aDir ) )
4418 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4419 InsertAttrs( aItemSet, aPropInfo, xCntxt.get() );
4423 PushContext(xCntxt);
4425 // set the attributes of the new style
4426 if( m_nDefListDeep > 1 )
4427 SetTextCollAttrs(m_aContexts.back().get());
4430 void SwHTMLParser::EndDefList()
4432 bool bSpace = (GetNumInfo().GetDepth() + m_nDefListDeep) == 1;
4433 if( m_pPam->GetPoint()->GetContentIndex() )
4434 AppendTextNode( bSpace ? AM_SPACE : AM_SOFTNOSPACE );
4435 else if( bSpace )
4436 AddParSpace();
4438 // one level less
4439 if( m_nDefListDeep > 0 )
4440 m_nDefListDeep--;
4442 // pop current context of stack
4443 std::unique_ptr<HTMLAttrContext> xCntxt(PopContext(HtmlTokenId::DEFLIST_ON));
4445 // and now end attributes
4446 if (xCntxt)
4448 EndContext(xCntxt.get());
4449 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4450 xCntxt.reset();
4453 // and set style
4454 SetTextCollAttrs();
4457 void SwHTMLParser::NewDefListItem( HtmlTokenId nToken )
4459 // determine if the DD/DT exist in a DL
4460 bool bInDefList = false, bNotInDefList = false;
4461 auto nPos = m_aContexts.size();
4462 while( !bInDefList && !bNotInDefList && nPos>m_nContextStMin )
4464 HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4465 switch( nCntxtToken )
4467 case HtmlTokenId::DEFLIST_ON:
4468 bInDefList = true;
4469 break;
4470 case HtmlTokenId::DIRLIST_ON:
4471 case HtmlTokenId::MENULIST_ON:
4472 case HtmlTokenId::ORDERLIST_ON:
4473 case HtmlTokenId::UNORDERLIST_ON:
4474 bNotInDefList = true;
4475 break;
4476 default: break;
4480 // if not, then implicitly open a new DL
4481 if( !bInDefList )
4483 m_nDefListDeep++;
4484 OSL_ENSURE( m_nOpenParaToken == HtmlTokenId::NONE,
4485 "Now an open paragraph element will be lost." );
4486 m_nOpenParaToken = nToken;
4489 NewTextFormatColl( nToken, static_cast< sal_uInt16 >(nToken==HtmlTokenId::DD_ON ? RES_POOLCOLL_HTML_DD
4490 : RES_POOLCOLL_HTML_DT) );
4493 void SwHTMLParser::EndDefListItem( HtmlTokenId nToken )
4495 // open a new paragraph
4496 if( nToken == HtmlTokenId::NONE && m_pPam->GetPoint()->GetContentIndex() )
4497 AppendTextNode( AM_SOFTNOSPACE );
4499 // search context matching the token and fetch it from stack
4500 nToken = getOnToken(nToken);
4501 std::unique_ptr<HTMLAttrContext> xCntxt;
4502 auto nPos = m_aContexts.size();
4503 while( !xCntxt && nPos>m_nContextStMin )
4505 HtmlTokenId nCntxtToken = m_aContexts[--nPos]->GetToken();
4506 switch( nCntxtToken )
4508 case HtmlTokenId::DD_ON:
4509 case HtmlTokenId::DT_ON:
4510 if( nToken == HtmlTokenId::NONE || nToken == nCntxtToken )
4512 xCntxt = std::move(m_aContexts[nPos]);
4513 m_aContexts.erase( m_aContexts.begin() + nPos );
4515 break;
4516 case HtmlTokenId::DEFLIST_ON:
4517 // don't look at DD/DT outside the current DefList
4518 case HtmlTokenId::DIRLIST_ON:
4519 case HtmlTokenId::MENULIST_ON:
4520 case HtmlTokenId::ORDERLIST_ON:
4521 case HtmlTokenId::UNORDERLIST_ON:
4522 // and also not outside another list
4523 nPos = m_nContextStMin;
4524 break;
4525 default: break;
4529 // and now end attributes
4530 if (xCntxt)
4532 EndContext(xCntxt.get());
4533 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4539 * @param bNoSurroundOnly The paragraph contains at least one frame
4540 * without wrapping.
4541 * @param bSurroundOnly The paragraph contains at least one frame
4542 * with wrapping, but none without wrapping.
4544 * Otherwise the paragraph contains any frame.
4546 bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly,
4547 bool bSurroundOnly ) const
4549 SwNode& rNode = m_pPam->GetPoint()->GetNode();
4552 bool bFound = false;
4553 for(sw::SpzFrameFormat* pFormat: *m_xDoc->GetSpzFrameFormats())
4555 SwFormatAnchor const*const pAnchor = &pFormat->GetAnchor();
4556 // A frame was found, when
4557 // - it is paragraph-bound, and
4558 // - is anchored in current paragraph, and
4559 // - every paragraph-bound frame counts, or
4560 // - (only frames without wrapping count and) the frame doesn't have
4561 // a wrapping
4562 SwNode const*const pAnchorNode = pAnchor->GetAnchorNode();
4563 if (pAnchorNode &&
4564 ((RndStdIds::FLY_AT_PARA == pAnchor->GetAnchorId()) ||
4565 (RndStdIds::FLY_AT_CHAR == pAnchor->GetAnchorId())) &&
4566 *pAnchorNode == rNode )
4568 if( !(bNoSurroundOnly || bSurroundOnly) )
4570 bFound = true;
4571 break;
4573 else
4575 // When looking for frames with wrapping, also disregard
4576 // ones with wrap-through. In this case it's (still) HIDDEN-Controls,
4577 // and you don't want to evade those when positioning.
4578 css::text::WrapTextMode eSurround = pFormat->GetSurround().GetSurround();
4579 if( bNoSurroundOnly )
4581 if( css::text::WrapTextMode_NONE==eSurround )
4583 bFound = true;
4584 break;
4587 if( bSurroundOnly )
4589 if( css::text::WrapTextMode_NONE==eSurround )
4591 bFound = false;
4592 break;
4594 else if( css::text::WrapTextMode_THROUGH!=eSurround )
4596 bFound = true;
4597 // Continue searching: It's possible that some without
4598 // wrapping will follow...
4605 return bFound;
4608 // the special methods for inserting of objects
4610 const SwFormatColl *SwHTMLParser::GetCurrFormatColl() const
4612 const SwContentNode* pCNd = m_pPam->GetPointContentNode();
4613 return pCNd ? &pCNd->GetAnyFormatColl() : nullptr;
4616 void SwHTMLParser::SetTextCollAttrs( HTMLAttrContext *pContext )
4618 SwTextFormatColl *pCollToSet = nullptr; // the style to set
4619 SfxItemSet *pItemSet = nullptr; // set of hard attributes
4620 sal_uInt16 nTopColl = pContext ? pContext->GetTextFormatColl() : 0;
4621 const OUString rTopClass = pContext ? pContext->GetClass() : OUString();
4622 sal_uInt16 nDfltColl = RES_POOLCOLL_TEXT;
4624 bool bInPRE=false; // some context info
4626 sal_uInt16 nLeftMargin = 0, nRightMargin = 0; // the margins and
4627 short nFirstLineIndent = 0; // indentations
4629 auto nDepth = m_aContexts.size();
4630 if (bFuzzing && nDepth > 128)
4632 SAL_WARN("sw.html", "Not applying any more text collection attributes to a deeply nested node for fuzzing performance");
4633 nDepth = 0;
4636 for (auto i = m_nContextStAttrMin; i < nDepth; ++i)
4638 const HTMLAttrContext *pCntxt = m_aContexts[i].get();
4640 sal_uInt16 nColl = pCntxt->GetTextFormatColl();
4641 if( nColl )
4643 // There is a style to set. Then at first we must decide,
4644 // if the style can be set.
4645 bool bSetThis = true;
4646 switch( nColl )
4648 case RES_POOLCOLL_HTML_PRE:
4649 bInPRE = true;
4650 break;
4651 case RES_POOLCOLL_TEXT:
4652 // <TD><P CLASS=xxx> must become TD.xxx
4653 if( nDfltColl==RES_POOLCOLL_TABLE ||
4654 nDfltColl==RES_POOLCOLL_TABLE_HDLN )
4655 nColl = nDfltColl;
4656 break;
4657 case RES_POOLCOLL_HTML_HR:
4658 // also <HR> in <PRE> set as style, otherwise it can't
4659 // be exported anymore
4660 break;
4661 default:
4662 if( bInPRE )
4663 bSetThis = false;
4664 break;
4667 SwTextFormatColl *pNewColl =
4668 m_pCSS1Parser->GetTextFormatColl( nColl, pCntxt->GetClass() );
4670 if( bSetThis )
4672 // If now a different style should be set as previously, the
4673 // previous style must be replaced by hard attribution.
4675 if( pCollToSet )
4677 // insert the attributes hard, which previous style sets
4678 if( !pItemSet )
4679 pItemSet = new SfxItemSet( pCollToSet->GetAttrSet() );
4680 else
4682 const SfxItemSet& rCollSet = pCollToSet->GetAttrSet();
4683 SfxItemSet aItemSet( *rCollSet.GetPool(),
4684 rCollSet.GetRanges() );
4685 aItemSet.Set( rCollSet );
4686 pItemSet->Put( aItemSet );
4688 // but remove the attributes, which the current style sets,
4689 // because otherwise they will be overwritten later
4690 pItemSet->Differentiate( pNewColl->GetAttrSet() );
4693 pCollToSet = pNewColl;
4695 else
4697 // hard attribution
4698 if( !pItemSet )
4699 pItemSet = new SfxItemSet( pNewColl->GetAttrSet() );
4700 else
4702 const SfxItemSet& rCollSet = pNewColl->GetAttrSet();
4703 SfxItemSet aItemSet( *rCollSet.GetPool(),
4704 rCollSet.GetRanges() );
4705 aItemSet.Set( rCollSet );
4706 pItemSet->Put( aItemSet );
4710 else
4712 // Maybe a default style exists?
4713 nColl = pCntxt->GetDefaultTextFormatColl();
4714 if( nColl )
4715 nDfltColl = nColl;
4718 // if applicable fetch new paragraph indents
4719 if( pCntxt->IsLRSpaceChanged() )
4721 sal_uInt16 nLeft=0, nRight=0;
4723 pCntxt->GetMargins( nLeft, nRight, nFirstLineIndent );
4724 nLeftMargin = nLeft;
4725 nRightMargin = nRight;
4729 // If in current context a new style should be set,
4730 // its paragraph margins must be inserted in the context.
4731 if( pContext && nTopColl )
4733 // <TD><P CLASS=xxx> must become TD.xxx
4734 if( nTopColl==RES_POOLCOLL_TEXT &&
4735 (nDfltColl==RES_POOLCOLL_TABLE ||
4736 nDfltColl==RES_POOLCOLL_TABLE_HDLN) )
4737 nTopColl = nDfltColl;
4739 const SwTextFormatColl *pTopColl =
4740 m_pCSS1Parser->GetTextFormatColl( nTopColl, rTopClass );
4741 const SfxItemSet& rItemSet = pTopColl->GetAttrSet();
4742 if (rItemSet.GetItemIfSet(RES_MARGIN_FIRSTLINE)
4743 || rItemSet.GetItemIfSet(RES_MARGIN_TEXTLEFT)
4744 || rItemSet.GetItemIfSet(RES_MARGIN_RIGHT))
4746 sal_Int32 nLeft = rItemSet.Get(RES_MARGIN_TEXTLEFT).GetTextLeft();
4747 sal_Int32 nRight = rItemSet.Get(RES_MARGIN_RIGHT).GetRight();
4748 nFirstLineIndent = rItemSet.Get(RES_MARGIN_FIRSTLINE).GetTextFirstLineOffset();
4750 // In Definition lists the margins also contain the margins from the previous levels
4751 if( RES_POOLCOLL_HTML_DD == nTopColl )
4753 auto const*const pColl(m_pCSS1Parser->GetTextFormatColl(RES_POOLCOLL_HTML_DT, OUString()));
4754 nLeft -= pColl->GetTextLeftMargin().GetTextLeft();
4755 nRight -= pColl->GetRightMargin().GetRight();
4757 else if( RES_POOLCOLL_HTML_DT == nTopColl )
4759 nLeft = 0;
4760 nRight = 0;
4763 // the paragraph margins add up
4764 nLeftMargin = nLeftMargin + static_cast< sal_uInt16 >(nLeft);
4765 nRightMargin = nRightMargin + static_cast< sal_uInt16 >(nRight);
4767 pContext->SetMargins( nLeftMargin, nRightMargin,
4768 nFirstLineIndent );
4770 if( const SvxULSpaceItem* pULItem = rItemSet.GetItemIfSet(RES_UL_SPACE) )
4772 pContext->SetULSpace( pULItem->GetUpper(), pULItem->GetLower() );
4776 // If no style is set in the context use the text body.
4777 if( !pCollToSet )
4779 pCollToSet = m_pCSS1Parser->GetTextCollFromPool( nDfltColl );
4780 if( !nLeftMargin )
4782 nLeftMargin = static_cast<sal_uInt16>(pCollToSet->GetTextLeftMargin().GetTextLeft());
4784 if( !nRightMargin )
4786 nRightMargin = static_cast<sal_uInt16>(pCollToSet->GetRightMargin().GetRight());
4788 if( !nFirstLineIndent )
4790 nFirstLineIndent = pCollToSet->GetFirstLineIndent().GetTextFirstLineOffset();
4794 // remove previous hard attribution of paragraph
4795 for( auto pParaAttr : m_aParaAttrs )
4796 pParaAttr->Invalidate();
4797 m_aParaAttrs.clear();
4799 // set the style
4800 m_xDoc->SetTextFormatColl( *m_pPam, pCollToSet );
4802 // if applicable correct the paragraph indent
4803 const SvxFirstLineIndentItem & rFirstLine = pCollToSet->GetFirstLineIndent();
4804 const SvxTextLeftMarginItem & rTextLeftMargin = pCollToSet->GetTextLeftMargin();
4805 const SvxRightMarginItem & rRightMargin = pCollToSet->GetRightMargin();
4806 bool bSetLRSpace = nLeftMargin != rTextLeftMargin.GetTextLeft() ||
4807 nFirstLineIndent != rFirstLine.GetTextFirstLineOffset() ||
4808 nRightMargin != rRightMargin.GetRight();
4810 if( bSetLRSpace )
4812 SvxFirstLineIndentItem firstLine(rFirstLine);
4813 SvxTextLeftMarginItem leftMargin(rTextLeftMargin);
4814 SvxRightMarginItem rightMargin(rRightMargin);
4815 firstLine.SetTextFirstLineOffset(nFirstLineIndent);
4816 leftMargin.SetTextLeft(nLeftMargin);
4817 rightMargin.SetRight(nRightMargin);
4818 if( pItemSet )
4820 pItemSet->Put(firstLine);
4821 pItemSet->Put(leftMargin);
4822 pItemSet->Put(rightMargin);
4824 else
4826 NewAttr(m_xAttrTab, &m_xAttrTab->pFirstLineIndent, firstLine);
4827 m_xAttrTab->pFirstLineIndent->SetLikePara();
4828 m_aParaAttrs.push_back(m_xAttrTab->pFirstLineIndent);
4829 EndAttr(m_xAttrTab->pFirstLineIndent, false);
4830 NewAttr(m_xAttrTab, &m_xAttrTab->pTextLeftMargin, leftMargin);
4831 m_xAttrTab->pTextLeftMargin->SetLikePara();
4832 m_aParaAttrs.push_back(m_xAttrTab->pTextLeftMargin);
4833 EndAttr(m_xAttrTab->pTextLeftMargin, false);
4834 NewAttr(m_xAttrTab, &m_xAttrTab->pRightMargin, rightMargin);
4835 m_xAttrTab->pRightMargin->SetLikePara();
4836 m_aParaAttrs.push_back(m_xAttrTab->pRightMargin);
4837 EndAttr(m_xAttrTab->pRightMargin, false);
4841 // and now set the attributes
4842 if( pItemSet )
4844 InsertParaAttrs( *pItemSet );
4845 delete pItemSet;
4849 void SwHTMLParser::NewCharFormat( HtmlTokenId nToken )
4851 OUString aId, aStyle, aLang, aDir;
4852 OUString aClass;
4854 const HTMLOptions& rHTMLOptions = GetOptions();
4855 for (size_t i = rHTMLOptions.size(); i; )
4857 const HTMLOption& rOption = rHTMLOptions[--i];
4858 switch( rOption.GetToken() )
4860 case HtmlOptionId::ID:
4861 aId = rOption.GetString();
4862 break;
4863 case HtmlOptionId::STYLE:
4864 aStyle = rOption.GetString();
4865 break;
4866 case HtmlOptionId::CLASS:
4867 aClass = rOption.GetString();
4868 break;
4869 case HtmlOptionId::LANG:
4870 aLang = rOption.GetString();
4871 break;
4872 case HtmlOptionId::DIR:
4873 aDir = rOption.GetString();
4874 break;
4875 default: break;
4879 // create a new context
4880 std::unique_ptr<HTMLAttrContext> xCntxt(new HTMLAttrContext(nToken));
4882 // set the style and save it in the context
4883 SwCharFormat* pCFormat = m_pCSS1Parser->GetChrFormat( nToken, aClass );
4884 OSL_ENSURE( pCFormat, "No character format found for token" );
4886 // parse styles (regarding class see also NewPara)
4887 if (HasStyleOptions(aStyle, aId, {}, &aLang, &aDir))
4889 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
4890 SvxCSS1PropertyInfo aPropInfo;
4892 if (ParseStyleOptions(aStyle, aId, OUString(), aItemSet, aPropInfo, &aLang, &aDir))
4894 OSL_ENSURE( aClass.isEmpty() || !m_pCSS1Parser->GetClass( aClass ),
4895 "Class is not considered" );
4896 DoPositioning( aItemSet, aPropInfo, xCntxt.get() );
4897 InsertAttrs( aItemSet, aPropInfo, xCntxt.get(), true );
4901 // Character formats are stored in their own stack and can never be inserted
4902 // by styles. Therefore the attribute doesn't exist in CSS1-Which-Range.
4903 if( pCFormat )
4904 InsertAttr( &m_xAttrTab->pCharFormats, SwFormatCharFormat( pCFormat ), xCntxt.get() );
4906 // save the context
4907 PushContext(xCntxt);
4910 void SwHTMLParser::InsertSpacer()
4912 // and if applicable change it via the options
4913 sal_Int16 eVertOri = text::VertOrientation::TOP;
4914 sal_Int16 eHoriOri = text::HoriOrientation::NONE;
4915 Size aSize( 0, 0);
4916 tools::Long nSize = 0;
4917 bool bPercentWidth = false;
4918 bool bPercentHeight = false;
4919 sal_uInt16 nType = HTML_SPTYPE_HORI;
4921 const HTMLOptions& rHTMLOptions = GetOptions();
4922 for (size_t i = rHTMLOptions.size(); i; )
4924 const HTMLOption& rOption = rHTMLOptions[--i];
4925 switch( rOption.GetToken() )
4927 case HtmlOptionId::TYPE:
4928 rOption.GetEnum( nType, aHTMLSpacerTypeTable );
4929 break;
4930 case HtmlOptionId::ALIGN:
4931 eVertOri =
4932 rOption.GetEnum( aHTMLImgVAlignTable,
4933 eVertOri );
4934 eHoriOri =
4935 rOption.GetEnum( aHTMLImgHAlignTable,
4936 eHoriOri );
4937 break;
4938 case HtmlOptionId::WIDTH:
4939 // First only save as pixel value!
4940 bPercentWidth = (rOption.GetString().indexOf('%') != -1);
4941 aSize.setWidth( static_cast<tools::Long>(rOption.GetNumber()) );
4942 break;
4943 case HtmlOptionId::HEIGHT:
4944 // First only save as pixel value!
4945 bPercentHeight = (rOption.GetString().indexOf('%') != -1);
4946 aSize.setHeight( static_cast<tools::Long>(rOption.GetNumber()) );
4947 break;
4948 case HtmlOptionId::SIZE:
4949 // First only save as pixel value!
4950 nSize = rOption.GetNumber();
4951 break;
4952 default: break;
4956 switch( nType )
4958 case HTML_SPTYPE_BLOCK:
4960 // create an empty text frame
4962 // fetch the ItemSet
4963 SfxItemSetFixed<RES_FRMATR_BEGIN, RES_FRMATR_END-1> aFrameSet( m_xDoc->GetAttrPool() );
4964 if( !IsNewDoc() )
4965 Reader::ResetFrameFormatAttrs( aFrameSet );
4967 // set the anchor and the adjustment
4968 SetAnchorAndAdjustment( eVertOri, eHoriOri, aFrameSet );
4970 // and the size of the frame
4971 Size aDfltSz( MINFLY, MINFLY );
4972 Size aSpace( 0, 0 );
4973 SfxItemSet aDummyItemSet( m_xDoc->GetAttrPool(),
4974 m_pCSS1Parser->GetWhichMap() );
4975 SvxCSS1PropertyInfo aDummyPropInfo;
4977 SetFixSize( aSize, aDfltSz, bPercentWidth, bPercentHeight,
4978 aDummyPropInfo, aFrameSet );
4979 SetSpace( aSpace, aDummyItemSet, aDummyPropInfo, aFrameSet );
4981 // protect the content
4982 SvxProtectItem aProtectItem( RES_PROTECT) ;
4983 aProtectItem.SetContentProtect( true );
4984 aFrameSet.Put( aProtectItem );
4986 // create the frame
4987 RndStdIds eAnchorId =
4988 aFrameSet.Get(RES_ANCHOR).GetAnchorId();
4989 SwFrameFormat *pFlyFormat = m_xDoc->MakeFlySection( eAnchorId,
4990 m_pPam->GetPoint(), &aFrameSet );
4991 // Possibly create frames and register auto-bound frames.
4992 RegisterFlyFrame( pFlyFormat );
4994 break;
4995 case HTML_SPTYPE_VERT:
4996 if( nSize > 0 )
4998 nSize = o3tl::convert(nSize, o3tl::Length::px, o3tl::Length::twip);
5000 // set a paragraph margin
5001 SwTextNode *pTextNode = nullptr;
5002 if( !m_pPam->GetPoint()->GetContentIndex() )
5004 // if possible change the bottom paragraph margin
5005 // of previous node
5007 SetAttr(); // set still open paragraph attributes
5009 pTextNode = m_xDoc->GetNodes()[m_pPam->GetPoint()->GetNodeIndex()-1]
5010 ->GetTextNode();
5012 // If the previous paragraph isn't a text node, then now an
5013 // empty paragraph is created, which already generates a single
5014 // line of spacing.
5015 if( !pTextNode )
5016 nSize = nSize>HTML_PARSPACE ? nSize-HTML_PARSPACE : 0;
5019 if( pTextNode )
5021 SvxULSpaceItem aULSpace( pTextNode->SwContentNode::GetAttr( RES_UL_SPACE ) );
5022 aULSpace.SetLower( aULSpace.GetLower() + o3tl::narrowing<sal_uInt16>(nSize) );
5023 pTextNode->SetAttr( aULSpace );
5025 else
5027 NewAttr(m_xAttrTab, &m_xAttrTab->pULSpace, SvxULSpaceItem(0, o3tl::narrowing<sal_uInt16>(nSize), RES_UL_SPACE));
5028 EndAttr( m_xAttrTab->pULSpace, false );
5030 AppendTextNode(); // Don't change spacing!
5033 break;
5034 case HTML_SPTYPE_HORI:
5035 if( nSize > 0 )
5037 // If the paragraph is still empty, set first line
5038 // indentation, otherwise apply letter spacing over a space.
5040 nSize = o3tl::convert(nSize, o3tl::Length::px, o3tl::Length::twip);
5042 if( !m_pPam->GetPoint()->GetContentIndex() )
5044 sal_uInt16 nLeft=0, nRight=0;
5045 short nIndent = 0;
5047 GetMarginsFromContextWithNumberBullet( nLeft, nRight, nIndent );
5048 nIndent = nIndent + static_cast<short>(nSize);
5050 SvxFirstLineIndentItem const firstLine(nIndent, RES_MARGIN_FIRSTLINE);
5051 SvxTextLeftMarginItem const leftMargin(nLeft, RES_MARGIN_TEXTLEFT);
5052 SvxRightMarginItem const rightMargin(nRight, RES_MARGIN_RIGHT);
5054 NewAttr(m_xAttrTab, &m_xAttrTab->pFirstLineIndent, firstLine);
5055 EndAttr(m_xAttrTab->pFirstLineIndent, false);
5056 NewAttr(m_xAttrTab, &m_xAttrTab->pTextLeftMargin, leftMargin);
5057 EndAttr(m_xAttrTab->pTextLeftMargin, false);
5058 NewAttr(m_xAttrTab, &m_xAttrTab->pRightMargin, rightMargin);
5059 EndAttr(m_xAttrTab->pRightMargin, false);
5061 else
5063 NewAttr(m_xAttrTab, &m_xAttrTab->pKerning, SvxKerningItem( static_cast<short>(nSize), RES_CHRATR_KERNING ));
5064 m_xDoc->getIDocumentContentOperations().InsertString( *m_pPam, " " );
5065 EndAttr( m_xAttrTab->pKerning );
5071 sal_uInt16 SwHTMLParser::ToTwips( sal_uInt16 nPixel )
5073 return std::min(o3tl::convert(nPixel, o3tl::Length::px, o3tl::Length::twip),
5074 sal_Int64(SAL_MAX_UINT16));
5077 SwTwips SwHTMLParser::GetCurrentBrowseWidth()
5079 const SwTwips nWidth = SwHTMLTableLayout::GetBrowseWidth( *m_xDoc );
5080 if( nWidth )
5081 return nWidth;
5083 if( !m_aHTMLPageSize.Width() )
5085 const SwFrameFormat& rPgFormat = m_pCSS1Parser->GetMasterPageDesc()->GetMaster();
5087 const SwFormatFrameSize& rSz = rPgFormat.GetFrameSize();
5088 const SvxLRSpaceItem& rLR = rPgFormat.GetLRSpace();
5089 const SvxULSpaceItem& rUL = rPgFormat.GetULSpace();
5090 const SwFormatCol& rCol = rPgFormat.GetCol();
5092 m_aHTMLPageSize.setWidth( rSz.GetWidth() - rLR.GetLeft() - rLR.GetRight() );
5093 m_aHTMLPageSize.setHeight( rSz.GetHeight() - rUL.GetUpper() - rUL.GetLower() );
5095 if( 1 < rCol.GetNumCols() )
5096 m_aHTMLPageSize.setWidth( m_aHTMLPageSize.Width() / ( rCol.GetNumCols()) );
5099 return m_aHTMLPageSize.Width();
5102 void SwHTMLParser::InsertIDOption()
5104 OUString aId;
5105 const HTMLOptions& rHTMLOptions = GetOptions();
5106 for (size_t i = rHTMLOptions.size(); i; )
5108 const HTMLOption& rOption = rHTMLOptions[--i];
5109 if( HtmlOptionId::ID==rOption.GetToken() )
5111 aId = rOption.GetString();
5112 break;
5116 if( !aId.isEmpty() )
5117 InsertBookmark( aId );
5120 void SwHTMLParser::InsertLineBreak()
5122 OUString aId, aStyle, aClass; // the id of bookmark
5123 SwLineBreakClear eClear = SwLineBreakClear::NONE;
5125 // then we fetch the options
5126 const HTMLOptions& rHTMLOptions = GetOptions();
5127 for (size_t i = rHTMLOptions.size(); i; )
5129 const HTMLOption& rOption = rHTMLOptions[--i];
5130 switch( rOption.GetToken() )
5132 case HtmlOptionId::CLEAR:
5134 const OUString &rClear = rOption.GetString();
5135 if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all ) )
5137 eClear = SwLineBreakClear::ALL;
5139 else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
5141 eClear = SwLineBreakClear::LEFT;
5143 else if( rClear.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
5145 eClear = SwLineBreakClear::LEFT;
5148 break;
5149 case HtmlOptionId::ID:
5150 aId = rOption.GetString();
5151 break;
5152 case HtmlOptionId::STYLE:
5153 aStyle = rOption.GetString();
5154 break;
5155 case HtmlOptionId::CLASS:
5156 aClass = rOption.GetString();
5157 break;
5158 default: break;
5162 // parse styles
5163 std::shared_ptr<SvxFormatBreakItem> aBreakItem(std::make_shared<SvxFormatBreakItem>(SvxBreak::NONE, RES_BREAK));
5164 bool bBreakItem = false;
5165 if( HasStyleOptions( aStyle, aId, aClass ) )
5167 SfxItemSet aItemSet( m_xDoc->GetAttrPool(), m_pCSS1Parser->GetWhichMap() );
5168 SvxCSS1PropertyInfo aPropInfo;
5170 if( ParseStyleOptions( aStyle, aId, aClass, aItemSet, aPropInfo ) )
5172 if( m_pCSS1Parser->SetFormatBreak( aItemSet, aPropInfo ) )
5174 aBreakItem.reset(aItemSet.Get(RES_BREAK).Clone());
5175 bBreakItem = true;
5177 if( !aPropInfo.m_aId.isEmpty() )
5178 InsertBookmark( aPropInfo.m_aId );
5182 if( bBreakItem && SvxBreak::PageAfter == aBreakItem->GetBreak() )
5184 NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5185 EndAttr( m_xAttrTab->pBreak, false );
5188 if (!bBreakItem)
5190 if (eClear == SwLineBreakClear::NONE)
5192 // If no CLEAR could or should be executed, a line break will be inserted
5193 m_xDoc->getIDocumentContentOperations().InsertString(*m_pPam, "\x0A");
5195 else
5197 // <BR CLEAR=xxx> is mapped an SwFormatLineBreak.
5198 SwTextNode* pTextNode = m_pPam->GetPointNode().GetTextNode();
5199 if (pTextNode)
5201 SwFormatLineBreak aLineBreak(eClear);
5202 sal_Int32 nPos = m_pPam->GetPoint()->GetContentIndex();
5203 pTextNode->InsertItem(aLineBreak, nPos, nPos);
5207 else if( m_pPam->GetPoint()->GetContentIndex() )
5209 // If a CLEAR is executed in a non-empty paragraph, then after it
5210 // a new paragraph has to be opened.
5211 // MIB 21.02.97: Here actually we should change the bottom paragraph
5212 // margin to zero. This will fail for something like this <BR ..><P>
5213 // (>Netscape). That's why we don't do it.
5214 AppendTextNode( AM_NOSPACE );
5216 if( bBreakItem && SvxBreak::PageBefore == aBreakItem->GetBreak() )
5218 NewAttr(m_xAttrTab, &m_xAttrTab->pBreak, *aBreakItem);
5219 EndAttr( m_xAttrTab->pBreak, false );
5223 void SwHTMLParser::InsertHorzRule()
5225 sal_uInt16 nSize = 0;
5226 sal_uInt16 nWidth = 0;
5228 SvxAdjust eAdjust = SvxAdjust::End;
5230 bool bPercentWidth = false;
5231 bool bNoShade = false;
5232 bool bColor = false;
5234 Color aColor;
5235 OUString aId;
5237 // let's fetch the options
5238 const HTMLOptions& rHTMLOptions = GetOptions();
5239 for (size_t i = rHTMLOptions.size(); i; )
5241 const HTMLOption& rOption = rHTMLOptions[--i];
5242 switch( rOption.GetToken() )
5244 case HtmlOptionId::ID:
5245 aId = rOption.GetString();
5246 break;
5247 case HtmlOptionId::SIZE:
5248 nSize = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
5249 break;
5250 case HtmlOptionId::WIDTH:
5251 bPercentWidth = (rOption.GetString().indexOf('%') != -1);
5252 nWidth = o3tl::narrowing<sal_uInt16>(rOption.GetNumber());
5253 if( bPercentWidth && nWidth>=100 )
5255 // the default case are 100% lines (no attributes necessary)
5256 nWidth = 0;
5257 bPercentWidth = false;
5259 break;
5260 case HtmlOptionId::ALIGN:
5261 eAdjust = rOption.GetEnum( aHTMLPAlignTable, eAdjust );
5262 break;
5263 case HtmlOptionId::NOSHADE:
5264 bNoShade = true;
5265 break;
5266 case HtmlOptionId::COLOR:
5267 rOption.GetColor( aColor );
5268 bColor = true;
5269 break;
5270 default: break;
5274 if( m_pPam->GetPoint()->GetContentIndex() )
5275 AppendTextNode( AM_NOSPACE );
5276 if( m_nOpenParaToken != HtmlTokenId::NONE )
5277 EndPara();
5278 AppendTextNode();
5279 m_pPam->Move( fnMoveBackward );
5281 // ...and save in a context
5282 std::unique_ptr<HTMLAttrContext> xCntxt(
5283 new HTMLAttrContext(HtmlTokenId::HORZRULE, RES_POOLCOLL_HTML_HR, OUString()));
5285 PushContext(xCntxt);
5287 // set the new style
5288 SetTextCollAttrs(m_aContexts.back().get());
5290 // the hard attributes of the current paragraph will never become invalid
5291 m_aParaAttrs.clear();
5293 if( nSize>0 || bColor || bNoShade )
5295 // set line colour and/or width
5296 if( !bColor )
5297 aColor = COL_GRAY;
5299 SvxBorderLine aBorderLine( &aColor );
5300 if( nSize )
5302 tools::Long nPWidth = 0;
5303 tools::Long nPHeight = static_cast<tools::Long>(nSize);
5304 SvxCSS1Parser::PixelToTwip( nPWidth, nPHeight );
5305 if ( !bNoShade )
5307 aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5309 aBorderLine.SetWidth( nPHeight );
5311 else if( bNoShade )
5313 aBorderLine.SetWidth( SvxBorderLineWidth::Medium );
5315 else
5317 aBorderLine.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE);
5318 aBorderLine.SetWidth(SvxBorderLineWidth::Hairline);
5321 SvxBoxItem aBoxItem(RES_BOX);
5322 aBoxItem.SetLine( &aBorderLine, SvxBoxItemLine::BOTTOM );
5323 HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), aBoxItem, nullptr, std::shared_ptr<HTMLAttrTable>());
5324 m_aSetAttrTab.push_back( pTmp );
5326 if( nWidth )
5328 // If we aren't in a table, then the width value will be "faked" with
5329 // paragraph indents. That makes little sense in a table. In order to
5330 // avoid that the line is considered during the width calculation, it
5331 // still gets an appropriate LRSpace-Item.
5332 if (!m_xTable)
5334 // fake length and alignment of line above paragraph indents
5335 tools::Long nBrowseWidth = GetCurrentBrowseWidth();
5336 nWidth = bPercentWidth ? o3tl::narrowing<sal_uInt16>((nWidth*nBrowseWidth) / 100)
5337 : ToTwips( o3tl::narrowing<sal_uInt16>(nBrowseWidth) );
5338 if( nWidth < MINLAY )
5339 nWidth = MINLAY;
5341 const SwFormatColl *pColl = (static_cast<tools::Long>(nWidth) < nBrowseWidth) ? GetCurrFormatColl() : nullptr;
5342 if (pColl)
5344 tools::Long nDist = nBrowseWidth - nWidth;
5345 ::std::optional<SvxTextLeftMarginItem> oLeft;
5346 ::std::optional<SvxRightMarginItem> oRight;
5348 switch( eAdjust )
5350 case SvxAdjust::Right:
5351 oLeft.emplace(o3tl::narrowing<sal_uInt16>(nDist), RES_MARGIN_TEXTLEFT);
5352 break;
5353 case SvxAdjust::Left:
5354 oRight.emplace(o3tl::narrowing<sal_uInt16>(nDist), RES_MARGIN_RIGHT);
5355 break;
5356 case SvxAdjust::Center:
5357 default:
5358 nDist /= 2;
5359 oLeft.emplace(o3tl::narrowing<sal_uInt16>(nDist), RES_MARGIN_TEXTLEFT);
5360 oRight.emplace(o3tl::narrowing<sal_uInt16>(nDist), RES_MARGIN_RIGHT);
5361 break;
5364 if (oLeft)
5366 HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), *oLeft, nullptr, std::shared_ptr<HTMLAttrTable>());
5367 m_aSetAttrTab.push_back( pTmp );
5369 if (oRight)
5371 HTMLAttr* pTmp = new HTMLAttr(*m_pPam->GetPoint(), *oRight, nullptr, std::shared_ptr<HTMLAttrTable>());
5372 m_aSetAttrTab.push_back( pTmp );
5378 // it's not possible to insert bookmarks in links
5379 if( !aId.isEmpty() )
5380 InsertBookmark( aId );
5382 // pop current context of stack
5383 std::unique_ptr<HTMLAttrContext> xPoppedContext(PopContext(HtmlTokenId::HORZRULE));
5384 xPoppedContext.reset();
5386 m_pPam->Move( fnMoveForward );
5388 // and set the current style in the next paragraph
5389 SetTextCollAttrs();
5392 void SwHTMLParser::ParseMoreMetaOptions()
5394 OUString aName, aContent;
5395 bool bHTTPEquiv = false;
5397 const HTMLOptions& rHTMLOptions = GetOptions();
5398 for (size_t i = rHTMLOptions.size(); i; )
5400 const HTMLOption& rOption = rHTMLOptions[--i];
5401 switch( rOption.GetToken() )
5403 case HtmlOptionId::NAME:
5404 aName = rOption.GetString();
5405 bHTTPEquiv = false;
5406 break;
5407 case HtmlOptionId::HTTPEQUIV:
5408 aName = rOption.GetString();
5409 bHTTPEquiv = true;
5410 break;
5411 case HtmlOptionId::CONTENT:
5412 aContent = rOption.GetString();
5413 break;
5414 default: break;
5418 // Here things get a little tricky: We know for sure, that the Doc-Info
5419 // wasn't changed. Therefore it's enough to query for Generator and Refresh
5420 // to find a not processed Token. These are the only ones which won't change
5421 // the Doc-Info.
5422 if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_generator ) ||
5423 aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_refresh ) ||
5424 aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_type ) ||
5425 aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_script_type ) )
5426 return;
5428 aContent = aContent.replaceAll("\r", "").replaceAll("\n", "");
5430 if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdendnote ) )
5432 FillEndNoteInfo( aContent );
5433 return;
5436 if( aName.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdfootnote ) )
5438 FillFootNoteInfo( aContent );
5439 return;
5442 OUStringBuffer sText(
5443 "HTML: <"
5444 OOO_STRING_SVTOOLS_HTML_meta
5445 " ");
5446 if( bHTTPEquiv )
5447 sText.append(OOO_STRING_SVTOOLS_HTML_O_httpequiv);
5448 else
5449 sText.append(OOO_STRING_SVTOOLS_HTML_O_name);
5450 sText.append(
5451 "=\"" + aName
5452 + "\" "
5453 OOO_STRING_SVTOOLS_HTML_O_content
5454 "=\""
5455 + aContent
5456 + "\">");
5458 SwPostItField aPostItField(
5459 static_cast<SwPostItFieldType*>(m_xDoc->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Postit )),
5460 OUString(), sText.makeStringAndClear(), OUString(), OUString(), DateTime(DateTime::SYSTEM));
5461 SwFormatField aFormatField( aPostItField );
5462 InsertAttr( aFormatField, false );
5465 HTMLAttr::HTMLAttr( const SwPosition& rPos, const SfxPoolItem& rItem,
5466 HTMLAttr **ppHd, std::shared_ptr<HTMLAttrTable> xAttrTab ) :
5467 m_nStartPara( rPos.GetNode() ),
5468 m_nEndPara( rPos.GetNode() ),
5469 m_nStartContent( rPos.GetContentIndex() ),
5470 m_nEndContent(rPos.GetContentIndex() ),
5471 m_bInsAtStart( true ),
5472 m_bLikePara( false ),
5473 m_bValid( true ),
5474 m_pItem( rItem.Clone() ),
5475 m_xAttrTab(std::move( xAttrTab )),
5476 m_pNext( nullptr ),
5477 m_pPrev( nullptr ),
5478 m_ppHead( ppHd )
5482 HTMLAttr::HTMLAttr( const HTMLAttr &rAttr, const SwNode &rEndPara,
5483 sal_Int32 nEndCnt, HTMLAttr **ppHd, std::shared_ptr<HTMLAttrTable> xAttrTab ) :
5484 m_nStartPara( rAttr.m_nStartPara ),
5485 m_nEndPara( rEndPara ),
5486 m_nStartContent( rAttr.m_nStartContent ),
5487 m_nEndContent( nEndCnt ),
5488 m_bInsAtStart( rAttr.m_bInsAtStart ),
5489 m_bLikePara( rAttr.m_bLikePara ),
5490 m_bValid( rAttr.m_bValid ),
5491 m_pItem( rAttr.m_pItem->Clone() ),
5492 m_xAttrTab(std::move( xAttrTab )),
5493 m_pNext( nullptr ),
5494 m_pPrev( nullptr ),
5495 m_ppHead( ppHd )
5499 HTMLAttr::~HTMLAttr()
5503 HTMLAttr *HTMLAttr::Clone(const SwNode& rEndPara, sal_Int32 nEndCnt) const
5505 // create the attribute anew with old start position
5506 HTMLAttr *pNew = new HTMLAttr( *this, rEndPara, nEndCnt, m_ppHead, m_xAttrTab );
5508 // The Previous-List must be taken over, the Next-List not!
5509 pNew->m_pPrev = m_pPrev;
5511 return pNew;
5514 void HTMLAttr::Reset(const SwNode& rSttPara, sal_Int32 nSttCnt,
5515 HTMLAttr **ppHd, const std::shared_ptr<HTMLAttrTable>& rAttrTab)
5517 // reset the start (and the end)
5518 m_nStartPara = rSttPara;
5519 m_nStartContent = nSttCnt;
5520 m_nEndPara = rSttPara;
5521 m_nEndContent = nSttCnt;
5523 // correct the head and nullify link
5524 m_pNext = nullptr;
5525 m_pPrev = nullptr;
5526 m_ppHead = ppHd;
5527 m_xAttrTab = rAttrTab;
5530 void HTMLAttr::InsertPrev( HTMLAttr *pPrv )
5532 OSL_ENSURE( !pPrv->m_pNext || pPrv->m_pNext == this,
5533 "HTMLAttr::InsertPrev: pNext wrong" );
5534 pPrv->m_pNext = nullptr;
5536 OSL_ENSURE( nullptr == pPrv->m_ppHead || m_ppHead == pPrv->m_ppHead,
5537 "HTMLAttr::InsertPrev: ppHead wrong" );
5538 pPrv->m_ppHead = nullptr;
5540 HTMLAttr *pAttr = this;
5541 while( pAttr->GetPrev() )
5542 pAttr = pAttr->GetPrev();
5544 pAttr->m_pPrev = pPrv;
5547 bool SwHTMLParser::ParseMetaOptions(
5548 const uno::Reference<document::XDocumentProperties> & i_xDocProps,
5549 SvKeyValueIterator *i_pHeader )
5551 // always call base ParseMetaOptions, it sets the encoding (#i96700#)
5552 bool ret( HTMLParser::ParseMetaOptions(i_xDocProps, i_pHeader) );
5553 if (!ret && IsNewDoc())
5555 ParseMoreMetaOptions();
5557 return ret;
5560 // override so we can parse DOCINFO field subtypes INFO[1-4]
5561 void SwHTMLParser::AddMetaUserDefined( OUString const & i_rMetaName )
5563 // unless we already have 4 names, append the argument to m_InfoNames
5564 OUString* pName // the first empty string in m_InfoNames
5565 (m_InfoNames[0].isEmpty() ? &m_InfoNames[0] :
5566 (m_InfoNames[1].isEmpty() ? &m_InfoNames[1] :
5567 (m_InfoNames[2].isEmpty() ? &m_InfoNames[2] :
5568 (m_InfoNames[3].isEmpty() ? &m_InfoNames[3] : nullptr ))));
5569 if (pName)
5571 (*pName) = i_rMetaName;
5575 void HTMLReader::SetupFilterOptions()
5577 // Reset state from previous Read() invocation.
5578 m_aNamespace.clear();
5580 if (!m_pMedium)
5581 return;
5583 const SfxItemSet* pItemSet = m_pMedium->GetItemSet();
5584 if (!pItemSet)
5585 return;
5587 auto pItem = pItemSet->GetItem<SfxStringItem>(SID_FILE_FILTEROPTIONS);
5588 if (!pItem)
5589 return;
5591 OUString aFilterOptions = pItem->GetValue();
5592 static const OUStringLiteral aXhtmlNsKey(u"xhtmlns=");
5593 if (aFilterOptions.startsWith(aXhtmlNsKey))
5595 OUString aNamespace = aFilterOptions.copy(aXhtmlNsKey.getLength());
5596 m_aNamespace = aNamespace;
5600 namespace
5602 class FontCacheGuard
5604 public:
5605 ~FontCacheGuard()
5607 FlushFontCache();
5612 bool TestImportHTML(SvStream &rStream)
5614 FontCacheGuard aFontCacheGuard;
5615 HTMLReader aReader;
5616 aReader.m_pStream = &rStream;
5618 SwGlobals::ensure();
5620 SfxObjectShellLock xDocSh(new SwDocShell(SfxObjectCreateMode::INTERNAL));
5621 xDocSh->DoInitNew();
5622 SwDoc *pD = static_cast<SwDocShell*>((&xDocSh))->GetDoc();
5624 SwPaM aPaM(pD->GetNodes().GetEndOfContent(), SwNodeOffset(-1));
5625 pD->SetInReading(true);
5626 bool bRet = false;
5629 bRet = aReader.Read(*pD, OUString(), aPaM, OUString()) == ERRCODE_NONE;
5631 catch (const std::runtime_error&)
5634 catch (const std::out_of_range&)
5637 pD->SetInReading(false);
5639 return bRet;
5642 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */