1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
24 #include <config_java.h>
26 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
27 #include <com/sun/star/document/XDocumentProperties.hpp>
28 #include <com/sun/star/i18n/ScriptType.hpp>
29 #include <com/sun/star/i18n/XBreakIterator.hpp>
30 #include <comphelper/string.hxx>
31 #include <o3tl/safeint.hxx>
32 #include <rtl/ustrbuf.hxx>
33 #include <svx/svxids.hrc>
34 #if OSL_DEBUG_LEVEL > 0
37 #include <hintids.hxx>
40 #include <vcl/errinf.hxx>
41 #include <svl/stritem.hxx>
42 #include <vcl/imap.hxx>
43 #include <svtools/htmltokn.h>
44 #include <svtools/htmlkywd.hxx>
45 #include <svtools/ctrltool.hxx>
46 #include <unotools/pathoptions.hxx>
47 #include <vcl/svapp.hxx>
48 #include <sfx2/event.hxx>
49 #include <sfx2/docfile.hxx>
51 #include <sfx2/linkmgr.hxx>
52 #include <editeng/kernitem.hxx>
53 #include <editeng/boxitem.hxx>
54 #include <editeng/fhgtitem.hxx>
55 #include <editeng/formatbreakitem.hxx>
56 #include <editeng/postitem.hxx>
57 #include <editeng/wghtitem.hxx>
58 #include <editeng/crossedoutitem.hxx>
59 #include <editeng/udlnitem.hxx>
60 #include <editeng/escapementitem.hxx>
61 #include <editeng/blinkitem.hxx>
62 #include <editeng/ulspitem.hxx>
63 #include <editeng/colritem.hxx>
64 #include <editeng/fontitem.hxx>
65 #include <editeng/adjustitem.hxx>
66 #include <editeng/lrspitem.hxx>
67 #include <editeng/protitem.hxx>
68 #include <editeng/flstitem.hxx>
69 #include <svx/unobrushitemhelper.hxx>
72 #include <charatr.hxx>
74 #include <fmtpdsc.hxx>
75 #include <fmtanchr.hxx>
76 #include <fmtsrnd.hxx>
77 #include <fmtfsize.hxx>
78 #include <fmtclds.hxx>
79 #include <fchrfmt.hxx>
80 #include <fmtinfmt.hxx>
81 #include <fmtfollowtextflow.hxx>
82 #include <fmtornt.hxx>
84 #include <IDocumentUndoRedo.hxx>
85 #include <IDocumentSettingAccess.hxx>
86 #include <IDocumentLayoutAccess.hxx>
87 #include <IDocumentLinksAdministration.hxx>
88 #include <IDocumentRedlineAccess.hxx>
89 #include <IDocumentFieldsAccess.hxx>
90 #include <IDocumentStylePoolAccess.hxx>
91 #include <IDocumentStatistics.hxx>
92 #include <IDocumentState.hxx>
96 #include <poolfmt.hxx>
97 #include <pagedesc.hxx>
100 #include <editsh.hxx>
101 #include <docufld.hxx>
102 #include "swcss1.hxx"
103 #include <fltini.hxx>
104 #include <htmltbl.hxx>
105 #include "htmlnum.hxx"
106 #include "swhtml.hxx"
107 #include "wrthtml.hxx"
108 #include <linkenum.hxx>
109 #include <breakit.hxx>
110 #include <SwAppletImpl.hxx>
112 #include <txatbase.hxx>
114 #include <sfx2/viewfrm.hxx>
115 #include <svx/svdobj.hxx>
116 #include <officecfg/Office/Writer.hxx>
117 #include <comphelper/sequenceashashmap.hxx>
118 #include <comphelper/sequence.hxx>
119 #include <officecfg/Office/Common.hxx>
123 #include <unoframe.hxx>
124 #include "css1atr.hxx"
125 #include <frameformats.hxx>
127 #define FONTSIZE_MASK 7
129 #define HTML_ESC_PROP 80
130 #define HTML_ESC_SUPER DFLT_ESC_SUPER
131 #define HTML_ESC_SUB DFLT_ESC_SUB
133 #define HTML_SPTYPE_BLOCK 1
134 #define HTML_SPTYPE_HORI 2
135 #define HTML_SPTYPE_VERT 3
137 using editeng::SvxBorderLine
;
138 using namespace ::com::sun::star
;
140 // <P ALIGN=xxx>, <Hn ALIGN=xxx>, <TD ALIGN=xxx> etc.
141 HTMLOptionEnum
<SvxAdjust
> const aHTMLPAlignTable
[] =
143 { OOO_STRING_SVTOOLS_HTML_AL_left
, SvxAdjust::Left
},
144 { OOO_STRING_SVTOOLS_HTML_AL_center
, SvxAdjust::Center
},
145 { OOO_STRING_SVTOOLS_HTML_AL_middle
, SvxAdjust::Center
}, // Netscape
146 { OOO_STRING_SVTOOLS_HTML_AL_right
, SvxAdjust::Right
},
147 { OOO_STRING_SVTOOLS_HTML_AL_justify
, SvxAdjust::Block
},
148 { OOO_STRING_SVTOOLS_HTML_AL_char
, SvxAdjust::Left
},
149 { nullptr, SvxAdjust(0) }
153 HTMLOptionEnum
<sal_uInt16
> const aHTMLSpacerTypeTable
[] =
155 { OOO_STRING_SVTOOLS_HTML_SPTYPE_block
, HTML_SPTYPE_BLOCK
},
156 { OOO_STRING_SVTOOLS_HTML_SPTYPE_horizontal
, HTML_SPTYPE_HORI
},
157 { OOO_STRING_SVTOOLS_HTML_SPTYPE_vertical
, HTML_SPTYPE_VERT
},
161 HTMLReader::HTMLReader()
163 m_bTemplateBrowseMode
= true;
166 OUString
HTMLReader::GetTemplateName(SwDoc
& rDoc
) const
168 if (!rDoc
.getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE
))
169 // HTML import into Writer, avoid loading the Writer/Web template.
172 static const OUStringLiteral
sTemplateWithoutExt(u
"internal/html");
173 SvtPathOptions aPathOpt
;
175 // first search for OpenDocument Writer/Web template
176 // OpenDocument Writer/Web template (extension .oth)
177 OUString
sTemplate( sTemplateWithoutExt
+ ".oth" );
178 if (aPathOpt
.SearchFile( sTemplate
, SvtPathOptions::Paths::Template
))
181 // no OpenDocument Writer/Web template found.
182 // search for OpenOffice.org Writer/Web template
183 sTemplate
= sTemplateWithoutExt
+ ".stw";
184 if (aPathOpt
.SearchFile( sTemplate
, SvtPathOptions::Paths::Template
))
187 OSL_ENSURE( false, "The default HTML template cannot be found in the defined template directories!");
192 bool HTMLReader::SetStrmStgPtr()
194 OSL_ENSURE( m_pMedium
, "Where is the medium??" );
196 if( m_pMedium
->IsRemote() || !m_pMedium
->IsStorage() )
198 m_pStream
= m_pMedium
->GetInStream();
205 // Call for the general Reader-Interface
206 ErrCode
HTMLReader::Read( SwDoc
&rDoc
, const OUString
& rBaseURL
, SwPaM
&rPam
, const OUString
& rName
)
208 SetupFilterOptions();
212 OSL_ENSURE( m_pStream
, "HTML-Read without stream" );
213 return ERR_SWG_READ_ERROR
;
218 Reader::ResetFrameFormats( rDoc
);
220 // Set the HTML page style, when it isn't a HTML document,
221 // otherwise it's already set.
222 if( !rDoc
.getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE
) && m_aNamespace
!= "reqif-xhtml" )
224 rDoc
.getIDocumentContentOperations().InsertPoolItem( rPam
, SwFormatPageDesc(
225 rDoc
.getIDocumentStylePoolAccess().GetPageDescFromPool( RES_POOLPAGE_HTML
, false )) );
229 // so nobody steals the document!
230 rtl::Reference
<SwDoc
> xHoldAlive(&rDoc
);
231 ErrCode nRet
= ERRCODE_NONE
;
232 tools::SvRef
<SwHTMLParser
> xParser
= new SwHTMLParser( &rDoc
, rPam
, *m_pStream
,
233 rName
, rBaseURL
, !m_bInsertMode
, m_pMedium
,
235 m_bIgnoreHTMLComments
, m_aNamespace
);
237 SvParserState eState
= xParser
->CallParser();
239 if( SvParserState::Pending
== eState
)
240 m_pStream
->ResetError();
241 else if( SvParserState::Accepted
!= eState
)
243 const OUString
sErr(OUString::number(static_cast<sal_Int32
>(xParser
->GetLineNr()))
244 + "," + OUString::number(static_cast<sal_Int32
>(xParser
->GetLinePos())));
246 // use the stream as transport for error number
247 nRet
= *new StringErrorInfo( ERR_FORMAT_ROWCOL
, sErr
,
248 DialogMask::ButtonsOk
| DialogMask::MessageError
);
254 SwHTMLParser::SwHTMLParser( SwDoc
* pD
, SwPaM
& rCursor
, SvStream
& rIn
,
258 SfxMedium
* pMed
, bool bReadUTF8
,
259 bool bNoHTMLComments
,
260 const OUString
& rNamespace
)
261 : SfxHTMLParser( rIn
, bReadNewDoc
, pMed
),
262 m_aPathToFile(std::move( aPath
)),
263 m_sBaseURL(std::move( aBaseURL
)),
264 m_xAttrTab(std::make_shared
<HTMLAttrTable
>()),
265 m_pNumRuleInfo( new SwHTMLNumRuleInfo
),
267 m_pActionViewShell( nullptr ),
268 m_pSttNdIdx( nullptr ),
269 m_pFormImpl( nullptr ),
270 m_pImageMap( nullptr ),
271 m_nBaseFontStMin( 0 ),
274 m_nFontStHeadStart( 0 ),
276 m_nMissingImgMaps( 0 ),
279 m_nContextStMin( 0 ),
280 m_nContextStAttrMin( 0 ),
281 m_nSelectEntryCnt( 0 ),
282 m_nOpenParaToken( HtmlTokenId::NONE
),
283 m_eJumpTo( JumpToMarks::NONE
),
287 m_eParaAdjust( SvxAdjust::End
),
288 m_bDocInitialized( false ),
289 m_bSetModEnabled( false ),
290 m_bInFloatingFrame( false ),
292 m_bKeepUnknown( false ),
293 m_bCallNextToken( false ),
294 m_bIgnoreRawData( false ),
295 m_bLBEntrySelected ( false ),
296 m_bTAIgnoreNewPara ( false ),
297 m_bFixMarqueeWidth ( false ),
298 m_bNoParSpace( false ),
299 m_bInNoEmbed( false ),
301 m_bUpdateDocStat( false ),
302 m_bFixSelectWidth( false ),
303 m_bTextArea( false ),
305 m_bInFootEndNoteAnchor( false ),
306 m_bInFootEndNoteSymbol( false ),
307 m_bIgnoreHTMLComments( bNoHTMLComments
),
308 m_bRemoveHidden( false ),
309 m_bBodySeen( false ),
310 m_bReadingHeaderOrFooter( false ),
311 m_bNotifyMacroEventRead( false ),
312 m_isInTableStructure(false),
314 m_nFloatingFrames( 0 ),
316 m_pTempViewFrame(nullptr)
318 // If requested explicitly, then force ignoring of comments (don't create postits for them).
321 if (officecfg::Office::Writer::Filter::Import::HTML::IgnoreComments::get())
322 m_bIgnoreHTMLComments
= true;
323 m_bKeepUnknown
= officecfg::Office::Common::Filter::HTML::Import::UnknownTag::get();
326 m_nEventId
= nullptr;
327 m_bUpperSpace
= m_bViewCreated
= m_bChkJumpMark
= false;
329 m_eScriptLang
= HTMLScriptLanguage::Unknown
;
331 rCursor
.DeleteMark();
332 m_pPam
= &rCursor
; // re-use existing cursor: avoids spurious ~SwContentIndexReg assert
333 memset(m_xAttrTab
.get(), 0, sizeof(HTMLAttrTable
));
335 // Read the font sizes 1-7 from the INI file
338 m_aFontHeights
[0] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_1::get() * 20;
339 m_aFontHeights
[1] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_2::get() * 20;
340 m_aFontHeights
[2] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_3::get() * 20;
341 m_aFontHeights
[3] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_4::get() * 20;
342 m_aFontHeights
[4] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_5::get() * 20;
343 m_aFontHeights
[5] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_6::get() * 20;
344 m_aFontHeights
[6] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_7::get() * 20;
348 m_aFontHeights
[0] = m_aFontHeights
[1] = m_aFontHeights
[2] = m_aFontHeights
[3] =
349 m_aFontHeights
[4] = m_aFontHeights
[5] = m_aFontHeights
[6] = 12 * 20;
354 //CJK has different defaults, so a different object should be used for this
355 //RES_CHARTR_CJK_FONTSIZE is a valid value
356 SvxFontHeightItem
aFontHeight(m_aFontHeights
[2], 100, RES_CHRATR_FONTSIZE
);
357 m_xDoc
->SetDefault( aFontHeight
);
358 SvxFontHeightItem
aFontHeightCJK(m_aFontHeights
[2], 100, RES_CHRATR_CJK_FONTSIZE
);
359 m_xDoc
->SetDefault( aFontHeightCJK
);
360 SvxFontHeightItem
aFontHeightCTL(m_aFontHeights
[2], 100, RES_CHRATR_CTL_FONTSIZE
);
361 m_xDoc
->SetDefault( aFontHeightCTL
);
363 // #i18732# - adjust default of option 'FollowTextFlow'
364 // TODO: not sure what the appropriate default for HTML should be?
365 m_xDoc
->SetDefault( SwFormatFollowTextFlow(true) );
368 // Change to HTML mode during the import, so that the right styles are created
369 m_bOldIsHTMLMode
= m_xDoc
->getIDocumentSettingAccess().get(DocumentSettingId::HTML_MODE
);
370 m_xDoc
->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE
, true);
372 m_pCSS1Parser
.reset(new SwCSS1Parser(m_xDoc
.get(), *this, m_aFontHeights
, m_sBaseURL
, IsNewDoc()));
374 m_pCSS1Parser
->SetIgnoreFontFamily( officecfg::Office::Common::Filter::HTML::Import::FontSetting::get() );
378 SetSrcEncoding( RTL_TEXTENCODING_UTF8
);
382 SwDocShell
*pDocSh
= m_xDoc
->GetDocShell();
383 SvKeyValueIterator
*pHeaderAttrs
=
384 pDocSh
->GetHeaderAttributes();
386 SetEncodingByHTTPHeader( pHeaderAttrs
);
388 m_pCSS1Parser
->SetDfltEncoding( osl_getThreadTextEncoding() );
390 SwDocShell
* pDocSh
= m_xDoc
->GetDocShell();
393 m_bViewCreated
= true; // not, load synchronous
395 // a jump mark is present
399 m_sJmpMark
= pMed
->GetURLObject().GetMark();
400 if( !m_sJmpMark
.isEmpty() )
402 m_eJumpTo
= JumpToMarks::Mark
;
403 sal_Int32 nLastPos
= m_sJmpMark
.lastIndexOf( cMarkSeparator
);
404 sal_Int32 nPos
= nLastPos
!= -1 ? nLastPos
: 0;
409 sCmp
= m_sJmpMark
.copy(nPos
+ 1).replaceAll(" ", "");
412 if( !sCmp
.isEmpty() )
414 sCmp
= sCmp
.toAsciiLowerCase();
415 if( sCmp
== "region" )
416 m_eJumpTo
= JumpToMarks::Region
;
417 else if( sCmp
== "table" )
418 m_eJumpTo
= JumpToMarks::Table
;
419 else if( sCmp
== "graphic" )
420 m_eJumpTo
= JumpToMarks::Graphic
;
421 else if( sCmp
== "outline" ||
424 m_eJumpTo
= JumpToMarks::NONE
; // this is nothing valid!
426 // otherwise this is a normal (book)mark
433 m_sJmpMark
= m_sJmpMark
.copy( 0, nPos
);
434 if( m_sJmpMark
.isEmpty() )
435 m_eJumpTo
= JumpToMarks::NONE
;
440 if (!rNamespace
.isEmpty())
442 SetNamespace(rNamespace
);
444 if (rNamespace
== "reqif-xhtml")
448 // Extract load parameters which are specific to this filter.
454 comphelper::SequenceAsHashMap
aLoadMap(pMed
->GetArgs());
455 auto it
= aLoadMap
.find("AllowedRTFOLEMimeTypes");
456 if (it
== aLoadMap
.end())
461 uno::Sequence
<OUString
> aTypes
;
462 it
->second
>>= aTypes
;
463 m_aAllowedRTFOLEMimeTypes
= comphelper::sequenceToContainer
<std::set
<OUString
>>(aTypes
);
466 SwHTMLParser::~SwHTMLParser()
469 OSL_ENSURE( !m_nContinue
, "DTOR in continue!" );
472 OSL_ENSURE(m_aContexts
.empty(), "There are still contexts on the stack");
473 OSL_ENSURE(!m_nContextStMin
, "There are protected contexts");
475 while (!m_aContexts
.empty())
477 std::unique_ptr
<HTMLAttrContext
> xCntxt(PopContext());
478 ClearContext(xCntxt
.get());
481 bool bAsync
= m_xDoc
->IsInLoadAsynchron();
482 m_xDoc
->SetInLoadAsynchron( false );
483 m_xDoc
->getIDocumentSettingAccess().set(DocumentSettingId::HTML_MODE
, m_bOldIsHTMLMode
);
485 if( m_xDoc
->GetDocShell() && m_nEventId
)
486 Application::RemoveUserEvent( m_nEventId
);
488 // the DocumentDetected maybe can delete the DocShells, therefore fetch again
489 if( m_xDoc
->GetDocShell() )
491 // update linked sections
492 sal_uInt16 nLinkMode
= m_xDoc
->getIDocumentSettingAccess().getLinkUpdateMode( true );
493 if( nLinkMode
!= NEVER
&& bAsync
&&
494 SfxObjectCreateMode::INTERNAL
!=m_xDoc
->GetDocShell()->GetCreateMode() )
495 m_xDoc
->getIDocumentLinksAdministration().GetLinkManager().UpdateAllLinks( nLinkMode
== MANUAL
, false, nullptr );
497 if ( m_xDoc
->GetDocShell()->IsLoading() )
500 m_xDoc
->GetDocShell()->LoadingFinished();
506 if( !m_aSetAttrTab
.empty() )
508 OSL_ENSURE( m_aSetAttrTab
.empty(),"There are still attributes on the stack" );
509 for ( const auto& rpAttr
: m_aSetAttrTab
)
511 m_aSetAttrTab
.clear();
514 m_pCSS1Parser
.reset();
515 m_pNumRuleInfo
.reset();
517 m_pFootEndNoteImpl
.reset();
519 OSL_ENSURE(!m_xTable
, "It exists still an open table");
520 m_pImageMaps
.reset();
522 OSL_ENSURE( m_vPendingStack
.empty(),
523 "SwHTMLParser::~SwHTMLParser: Here should not be Pending-Stack anymore" );
524 m_vPendingStack
.clear();
528 if ( m_pTempViewFrame
)
530 m_pTempViewFrame
->DoClose();
532 // the temporary view frame is hidden, so the hidden flag might need to be removed
533 if ( m_bRemoveHidden
&& m_xDoc
.is() && m_xDoc
->GetDocShell() && m_xDoc
->GetDocShell()->GetMedium() )
534 m_xDoc
->GetDocShell()->GetMedium()->GetItemSet()->ClearItem( SID_HIDDEN
);
538 IMPL_LINK_NOARG( SwHTMLParser
, AsyncCallback
, void*, void )
542 // #i47907# - If the document has already been destructed,
543 // the parser should be aware of this:
544 if( ( m_xDoc
->GetDocShell() && m_xDoc
->GetDocShell()->IsAbortingImport() )
545 || 1 == m_xDoc
->getReferenceCount() )
547 // was the import aborted by SFX?
548 eState
= SvParserState::Error
;
551 GetAsynchCallLink().Call(nullptr);
554 SvParserState
SwHTMLParser::CallParser()
556 // create temporary index on position 0, so it won't be moved!
557 m_pSttNdIdx
= new SwNodeIndex( m_xDoc
->GetNodes() );
558 if( !IsNewDoc() ) // insert into existing document ?
560 const SwPosition
* pPos
= m_pPam
->GetPoint();
562 m_xDoc
->getIDocumentContentOperations().SplitNode( *pPos
, false );
564 *m_pSttNdIdx
= pPos
->GetNodeIndex()-1;
565 m_xDoc
->getIDocumentContentOperations().SplitNode( *pPos
, false );
567 SwPaM
aInsertionRangePam( *pPos
);
569 m_pPam
->Move( fnMoveBackward
);
571 // split any redline over the insertion point
572 aInsertionRangePam
.SetMark();
573 *aInsertionRangePam
.GetPoint() = *m_pPam
->GetPoint();
574 aInsertionRangePam
.Move( fnMoveBackward
);
575 m_xDoc
->getIDocumentRedlineAccess().SplitRedline( aInsertionRangePam
);
577 m_xDoc
->SetTextFormatColl( *m_pPam
,
578 m_pCSS1Parser
->GetTextCollFromPool( RES_POOLCOLL_STANDARD
));
583 if( !m_bViewCreated
)
585 m_nEventId
= Application::PostUserEvent( LINK( this, SwHTMLParser
, AsyncCallback
) );
589 m_bViewCreated
= true;
590 m_nEventId
= nullptr;
593 else // show progress bar
595 rInput
.Seek(STREAM_SEEK_TO_END
);
598 m_xProgress
.reset(new ImportProgress(m_xDoc
->GetDocShell(), 0, rInput
.Tell()));
600 rInput
.Seek(STREAM_SEEK_TO_BEGIN
);
604 StartListening(m_xDoc
->GetPageDesc( 0 ).GetNotifier());
606 SvParserState eRet
= HTMLParser::CallParser();
610 bool SwHTMLParser::CanRemoveNode(SwNodeOffset nNodeIdx
) const
612 const SwNode
*pPrev
= m_xDoc
->GetNodes()[nNodeIdx
- 1];
613 return pPrev
->IsContentNode() || (pPrev
->IsEndNode() && pPrev
->StartOfSectionNode()->IsSectionNode());
616 void SwHTMLParser::Continue( HtmlTokenId nToken
)
619 OSL_ENSURE(!m_nContinue
, "Continue in Continue - not supposed to happen");
623 // When the import (of SFX) is aborted, an error will be set but
624 // we still continue, so that we clean up properly.
625 OSL_ENSURE( SvParserState::Error
!=eState
,
626 "SwHTMLParser::Continue: already set an error" );
627 if( m_xDoc
->GetDocShell() && m_xDoc
->GetDocShell()->IsAbortingImport() )
628 eState
= SvParserState::Error
;
630 // Fetch SwViewShell from document, save it and set as current.
631 SwViewShell
*pInitVSh
= CallStartAction();
633 if( SvParserState::Error
!= eState
&& GetMedium() && !m_bViewCreated
)
635 // At first call first return, show document and wait for callback
637 // At this point in CallParser only one digit was read and
638 // a SaveState(0) was called.
639 eState
= SvParserState::Pending
;
640 m_bViewCreated
= true;
641 m_xDoc
->SetInLoadAsynchron( true );
650 m_bSetModEnabled
= false;
651 if( m_xDoc
->GetDocShell() )
653 m_bSetModEnabled
= m_xDoc
->GetDocShell()->IsEnableSetModified();
654 if( m_bSetModEnabled
)
656 m_xDoc
->GetDocShell()->EnableSetModified( false );
660 // during import don't call OLE-Modified
661 Link
<bool,void> aOLELink( m_xDoc
->GetOle2Link() );
662 m_xDoc
->SetOle2Link( Link
<bool,void>() );
664 bool bModified
= m_xDoc
->getIDocumentState().IsModified();
665 bool const bWasUndo
= m_xDoc
->GetIDocumentUndoRedo().DoesUndo();
666 m_xDoc
->GetIDocumentUndoRedo().DoUndo(false);
668 // When the import will be aborted, don't call Continue anymore.
669 // If a Pending-Stack exists make sure the stack is ended with a call
671 if( SvParserState::Error
== eState
)
673 OSL_ENSURE( m_vPendingStack
.empty() || m_vPendingStack
.back().nToken
!= HtmlTokenId::NONE
,
674 "SwHTMLParser::Continue: Pending-Stack without Token" );
675 if( !m_vPendingStack
.empty() && m_vPendingStack
.back().nToken
!= HtmlTokenId::NONE
)
676 NextToken( m_vPendingStack
.back().nToken
);
677 OSL_ENSURE( m_vPendingStack
.empty(),
678 "SwHTMLParser::Continue: There is again a Pending-Stack" );
682 HTMLParser::Continue( !m_vPendingStack
.empty() ? m_vPendingStack
.back().nToken
: nToken
);
685 // disable progress bar again
688 bool bLFStripped
= false;
689 if( SvParserState::Pending
!= GetStatus() )
691 // set the last attributes yet
693 if( !m_aScriptSource
.isEmpty() )
695 SwScriptFieldType
*pType
=
696 static_cast<SwScriptFieldType
*>(m_xDoc
->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Script
));
698 SwScriptField
aField( pType
, m_aScriptType
, m_aScriptSource
,
700 InsertAttr( SwFormatField( aField
), false );
705 if( m_pAppletImpl
->GetApplet().is() )
711 // maybe remove an existing LF after the last paragraph
713 bLFStripped
= StripTrailingLF() > 0;
715 // close still open numbering
716 while( GetNumInfo().GetNumRule() )
717 EndNumberBulletList();
719 OSL_ENSURE( !m_nContextStMin
, "There are protected contexts" );
720 // try this twice, first normally to let m_nContextStMin decrease
721 // naturally and get contexts popped in desired order, and if that
723 for (int i
= 0; i
< 2; ++i
)
725 while (m_aContexts
.size() > m_nContextStMin
)
727 std::unique_ptr
<HTMLAttrContext
> xCntxt(PopContext());
729 EndContext(xCntxt
.get());
731 if (!m_nContextStMin
)
733 OSL_ENSURE(!m_nContextStMin
, "There are still protected contexts");
737 m_aParaAttrs
.clear();
741 // set the first delayed styles
742 m_pCSS1Parser
->SetDelayedStyles();
745 // again correct the start
746 if( !IsNewDoc() && m_pSttNdIdx
->GetIndex() )
748 SwTextNode
* pTextNode
= m_pSttNdIdx
->GetNode().GetTextNode();
749 SwNodeIndex
aNxtIdx( *m_pSttNdIdx
);
750 if( pTextNode
&& pTextNode
->CanJoinNext( &aNxtIdx
))
752 const sal_Int32 nStt
= pTextNode
->GetText().getLength();
753 // when the cursor is still in the node, then set him at the end
754 if( m_pPam
->GetPoint()->GetNode() == aNxtIdx
.GetNode() )
756 m_pPam
->GetPoint()->Assign( *pTextNode
, nStt
);
759 #if OSL_DEBUG_LEVEL > 0
760 // !!! shouldn't be possible, or ??
761 OSL_ENSURE( m_pSttNdIdx
->GetIndex()+1 != m_pPam
->GetBound().GetNodeIndex(),
762 "Pam.Bound1 is still in the node" );
763 OSL_ENSURE( m_pSttNdIdx
->GetIndex()+1 != m_pPam
->GetBound( false ).GetNodeIndex(),
764 "Pam.Bound2 is still in the node" );
766 if( m_pSttNdIdx
->GetIndex()+1 == m_pPam
->GetBound().GetNodeIndex() )
768 const sal_Int32 nCntPos
= m_pPam
->GetBound().GetContentIndex();
769 m_pPam
->GetBound().SetContent(
770 pTextNode
->GetText().getLength() + nCntPos
);
772 if( m_pSttNdIdx
->GetIndex()+1 == m_pPam
->GetBound( false ).GetNodeIndex() )
774 const sal_Int32 nCntPos
= m_pPam
->GetBound( false ).GetContentIndex();
775 m_pPam
->GetBound( false ).SetContent(
776 pTextNode
->GetText().getLength() + nCntPos
);
779 // Keep character attribute!
780 SwTextNode
* pDelNd
= aNxtIdx
.GetNode().GetTextNode();
781 if (pTextNode
->GetText().getLength())
782 pDelNd
->FormatToTextAttr( pTextNode
);
784 pTextNode
->ChgFormatColl( pDelNd
->GetTextColl() );
785 pTextNode
->JoinNext();
790 if( SvParserState::Accepted
== eState
)
792 if( m_nMissingImgMaps
)
794 // Some Image-Map relations are still missing.
795 // Maybe now the Image-Maps are there?
799 // now remove the last useless paragraph
800 SwPosition
* pPos
= m_pPam
->GetPoint();
801 if( !pPos
->GetContentIndex() && !bLFStripped
)
803 SwTextNode
* pCurrentNd
;
804 SwNodeOffset nNodeIdx
= pPos
->GetNodeIndex();
806 bool bHasFlysOrMarks
=
807 HasCurrentParaFlys() || HasCurrentParaBookmarks( true );
811 if (!m_pPam
->GetPoint()->GetContentIndex() && CanRemoveNode(nNodeIdx
))
813 SwContentNode
* pCNd
= m_pPam
->GetPointContentNode();
814 if( pCNd
&& pCNd
->StartOfSectionIndex()+2 <
815 pCNd
->EndOfSectionIndex() && !bHasFlysOrMarks
)
817 SwViewShell
*pVSh
= CheckActionViewShell();
818 SwCursorShell
*pCursorSh
= dynamic_cast<SwCursorShell
*>( pVSh
);
820 pCursorSh
->GetCursor()->GetPoint()
821 ->GetNodeIndex() == nNodeIdx
)
823 pCursorSh
->MovePara(GoPrevPara
, fnParaEnd
);
824 pCursorSh
->SetMark();
825 pCursorSh
->ClearMark();
827 SwNode
& rDelNode
= m_pPam
->GetPoint()->GetNode();
828 // move so we don't have a dangling SwContentIndex to the deleted node
829 m_pPam
->GetPoint()->Adjust(SwNodeOffset(1));
830 if (m_pPam
->HasMark())
831 m_pPam
->GetMark()->Adjust(SwNodeOffset(1));
832 m_xDoc
->GetNodes().Delete( rDelNode
);
836 else if( nullptr != ( pCurrentNd
= m_xDoc
->GetNodes()[ nNodeIdx
]->GetTextNode()) && !bHasFlysOrMarks
)
838 if( pCurrentNd
->CanJoinNext( pPos
))
840 SwTextNode
* pNextNd
= pPos
->GetNode().GetTextNode();
841 m_pPam
->SetMark(); m_pPam
->DeleteMark();
844 else if (pCurrentNd
->GetText().isEmpty())
846 m_pPam
->SetMark(); m_pPam
->DeleteMark();
847 SwNode
& rDelNode
= pPos
->GetNode();
848 // move so we don't have a dangling SwContentIndex to the deleted node
849 m_pPam
->GetPoint()->Adjust(SwNodeOffset(+1));
850 m_xDoc
->GetNodes().Delete( rDelNode
);
851 m_pPam
->Move( fnMoveBackward
);
856 // annul the SplitNode from the beginning
857 else if( !IsNewDoc() )
859 if( pPos
->GetContentIndex() ) // then there was no <p> at the end
860 m_pPam
->Move( fnMoveForward
, GoInNode
); // therefore to the next
861 SwTextNode
* pTextNode
= pPos
->GetNode().GetTextNode();
862 SwNodeIndex
aPrvIdx( pPos
->GetNode() );
863 if( pTextNode
&& pTextNode
->CanJoinPrev( &aPrvIdx
) &&
864 *m_pSttNdIdx
<= aPrvIdx
)
866 // Normally here should take place a JoinNext, but all cursors and
867 // so are registered in pTextNode, so that it MUST remain.
869 // Convert paragraph to character attribute, from Prev adopt
870 // the paragraph attribute and the template!
871 SwTextNode
* pPrev
= aPrvIdx
.GetNode().GetTextNode();
872 pTextNode
->ChgFormatColl( pPrev
->GetTextColl() );
873 pTextNode
->FormatToTextAttr( pPrev
);
874 pTextNode
->ResetAllAttr();
876 if( pPrev
->HasSwAttrSet() )
877 pTextNode
->SetAttr( *pPrev
->GetpSwAttrSet() );
879 if( &m_pPam
->GetBound().GetNode() == pPrev
)
880 m_pPam
->GetBound().nContent
.Assign( pTextNode
, 0 );
881 if( &m_pPam
->GetBound(false).GetNode() == pPrev
)
882 m_pPam
->GetBound(false).nContent
.Assign( pTextNode
, 0 );
884 pTextNode
->JoinPrev();
888 // adjust AutoLoad in DocumentProperties
889 if (!bFuzzing
&& IsNewDoc())
891 SwDocShell
*pDocShell(m_xDoc
->GetDocShell());
892 OSL_ENSURE(pDocShell
, "no SwDocShell");
894 uno::Reference
<document::XDocumentPropertiesSupplier
> xDPS(
895 pDocShell
->GetModel(), uno::UNO_QUERY_THROW
);
896 uno::Reference
<document::XDocumentProperties
> xDocProps(
897 xDPS
->getDocumentProperties());
898 OSL_ENSURE(xDocProps
.is(), "DocumentProperties is null");
899 if ( xDocProps
.is() && (xDocProps
->getAutoloadSecs() > 0) &&
900 (xDocProps
->getAutoloadURL().isEmpty()) )
902 xDocProps
->setAutoloadURL(m_aPathToFile
);
907 if( m_bUpdateDocStat
)
909 m_xDoc
->getIDocumentStatistics().UpdateDocStat( false, true );
913 if( SvParserState::Pending
!= GetStatus() )
916 m_pSttNdIdx
= nullptr;
919 // should the parser be the last one who hold the document, then nothing
920 // has to be done anymore, document will be destroyed shortly!
921 if( 1 < m_xDoc
->getReferenceCount() )
925 m_xDoc
->GetIDocumentUndoRedo().DelAllUndoObj();
926 m_xDoc
->GetIDocumentUndoRedo().DoUndo(true);
930 // When at the beginning of Continue no Shell was available,
931 // it's possible in the meantime one was created.
932 // In that case the bWasUndo flag is wrong and we must
934 SwViewShell
*pTmpVSh
= CheckActionViewShell();
937 m_xDoc
->GetIDocumentUndoRedo().DoUndo(true);
941 m_xDoc
->SetOle2Link( aOLELink
);
943 m_xDoc
->getIDocumentState().ResetModified();
944 if( m_bSetModEnabled
&& m_xDoc
->GetDocShell() )
946 m_xDoc
->GetDocShell()->EnableSetModified();
947 m_bSetModEnabled
= false; // this is unnecessary here
951 // When the Document-SwVievShell still exists and an Action is open
952 // (doesn't have to be by abort), end the Action, disconnect from Shell
953 // and finally reconstruct the old Shell.
954 CallEndAction( true );
961 void SwHTMLParser::Notify(const SfxHint
& rHint
)
963 if(rHint
.GetId() == SfxHintId::Dying
)
970 void SwHTMLParser::DocumentDetected()
972 OSL_ENSURE( !m_bDocInitialized
, "DocumentDetected called multiple times" );
973 m_bDocInitialized
= true;
979 CallEndAction( true );
981 m_xDoc
->GetIDocumentUndoRedo().DoUndo(false);
982 // For DocumentDetected in general a SwViewShell is created.
983 // But it also can be created later, in case the UI is captured.
988 // is called for every token that is recognised in CallParser
989 void SwHTMLParser::NextToken( HtmlTokenId nToken
)
991 if( ( m_xDoc
->GetDocShell() && m_xDoc
->GetDocShell()->IsAbortingImport() )
992 || 1 == m_xDoc
->getReferenceCount() )
994 // Was the import cancelled by SFX? If a pending stack
996 eState
= SvParserState::Error
;
997 OSL_ENSURE( m_vPendingStack
.empty() || m_vPendingStack
.back().nToken
!= HtmlTokenId::NONE
,
998 "SwHTMLParser::NextToken: Pending-Stack without token" );
999 if( 1 == m_xDoc
->getReferenceCount() || m_vPendingStack
.empty() )
1003 #if OSL_DEBUG_LEVEL > 0
1004 if( !m_vPendingStack
.empty() )
1008 // tables are read by recursive method calls
1009 case HtmlTokenId::TABLE_ON
:
1010 // For CSS declarations we might have to wait
1011 // for a file download to finish
1012 case HtmlTokenId::LINK
:
1013 // For controls we might have to set the size.
1014 case HtmlTokenId::INPUT
:
1015 case HtmlTokenId::TEXTAREA_ON
:
1016 case HtmlTokenId::SELECT_ON
:
1017 case HtmlTokenId::SELECT_OFF
:
1020 OSL_ENSURE( m_vPendingStack
.empty(), "Unknown token for Pending-Stack" );
1026 // The following special cases have to be treated before the
1027 // filter detection, because Netscape doesn't reference the content
1028 // of the title for filter detection either.
1029 if( m_vPendingStack
.empty() )
1035 case HtmlTokenId::TITLE_OFF
:
1037 OUString sTitle
= m_sTitle
.makeStringAndClear();
1038 if( IsNewDoc() && !sTitle
.isEmpty() )
1040 if( m_xDoc
->GetDocShell() ) {
1041 uno::Reference
<document::XDocumentPropertiesSupplier
>
1042 xDPS(m_xDoc
->GetDocShell()->GetModel(),
1043 uno::UNO_QUERY_THROW
);
1044 uno::Reference
<document::XDocumentProperties
> xDocProps(
1045 xDPS
->getDocumentProperties());
1046 OSL_ENSURE(xDocProps
.is(), "no DocumentProperties");
1047 if (xDocProps
.is()) {
1048 xDocProps
->setTitle(sTitle
);
1051 m_xDoc
->GetDocShell()->SetTitle(sTitle
);
1058 case HtmlTokenId::NONBREAKSPACE
:
1059 m_sTitle
.append(" ");
1062 case HtmlTokenId::SOFTHYPH
:
1063 m_sTitle
.append("-");
1066 case HtmlTokenId::TEXTTOKEN
:
1067 m_sTitle
.append(aToken
);
1071 m_sTitle
.append("<");
1072 if( (nToken
>= HtmlTokenId::ONOFF_START
) && isOffToken(nToken
) )
1073 m_sTitle
.append("/");
1074 m_sTitle
.append(sSaveToken
);
1075 if( !aToken
.isEmpty() )
1077 m_sTitle
.append(" ");
1078 m_sTitle
.append(aToken
);
1080 m_sTitle
.append(">");
1088 // Find out what type of document it is if we don't know already.
1089 // For Controls this has to be finished before the control is inserted
1090 // because for inserting a View is needed.
1091 if( !m_bDocInitialized
)
1094 bool bGetIDOption
= false, bInsertUnknown
= false;
1095 bool bUpperSpaceSave
= m_bUpperSpace
;
1096 m_bUpperSpace
= false;
1098 // The following special cases may or have to be treated after the
1100 if( m_vPendingStack
.empty() )
1102 if( m_bInFloatingFrame
)
1104 // <SCRIPT> is ignored here (from us), because it is ignored in
1106 if( HtmlTokenId::IFRAME_OFF
== nToken
)
1108 m_bCallNextToken
= false;
1109 m_bInFloatingFrame
= false;
1114 else if( m_bInNoEmbed
)
1118 case HtmlTokenId::NOEMBED_OFF
:
1119 m_aContents
= convertLineEnd(m_aContents
, GetSystemLineEnd());
1120 InsertComment( m_aContents
, OOO_STRING_SVTOOLS_HTML_noembed
);
1121 m_aContents
.clear();
1122 m_bCallNextToken
= false;
1123 m_bInNoEmbed
= false;
1126 case HtmlTokenId::RAWDATA
:
1127 InsertCommentText( OOO_STRING_SVTOOLS_HTML_noembed
);
1131 OSL_ENSURE( false, "SwHTMLParser::NextToken: invalid tag" );
1137 else if( m_pAppletImpl
)
1139 // in an applet only <PARAM> tags and the </APPLET> tag
1140 // are of interest for us (for the moment)
1141 // <SCRIPT> is ignored here (from Netscape)!
1145 case HtmlTokenId::APPLET_OFF
:
1146 m_bCallNextToken
= false;
1149 case HtmlTokenId::OBJECT_OFF
:
1150 m_bCallNextToken
= false;
1153 case HtmlTokenId::PARAM
:
1161 else if( m_bTextArea
)
1163 // in a TextArea everything up to </TEXTAREA> is inserted as text.
1164 // <SCRIPT> is ignored here (from Netscape)!
1168 case HtmlTokenId::TEXTAREA_OFF
:
1169 m_bCallNextToken
= false;
1174 InsertTextAreaText( nToken
);
1180 else if( m_bSelect
)
1182 // HAS to be treated after bNoScript!
1185 case HtmlTokenId::SELECT_OFF
:
1186 m_bCallNextToken
= false;
1190 case HtmlTokenId::OPTION
:
1191 InsertSelectOption();
1194 case HtmlTokenId::TEXTTOKEN
:
1198 case HtmlTokenId::INPUT
:
1199 case HtmlTokenId::SCRIPT_ON
:
1200 case HtmlTokenId::SCRIPT_OFF
:
1201 case HtmlTokenId::NOSCRIPT_ON
:
1202 case HtmlTokenId::NOSCRIPT_OFF
:
1203 case HtmlTokenId::RAWDATA
:
1204 // treat in normal switch
1212 else if( m_pMarquee
)
1214 // in a TextArea everything up to </TEXTAREA> is inserted as text.
1215 // The <SCRIPT> tags are ignored from MS-IE, we ignore the whole
1219 case HtmlTokenId::MARQUEE_OFF
:
1220 m_bCallNextToken
= false;
1224 case HtmlTokenId::TEXTTOKEN
:
1225 InsertMarqueeText();
1232 else if( m_bInField
)
1236 case HtmlTokenId::SDFIELD_OFF
:
1237 m_bCallNextToken
= false;
1241 case HtmlTokenId::TEXTTOKEN
:
1249 else if( m_bInFootEndNoteAnchor
|| m_bInFootEndNoteSymbol
)
1253 case HtmlTokenId::ANCHOR_OFF
:
1255 m_bCallNextToken
= false;
1258 case HtmlTokenId::TEXTTOKEN
:
1259 InsertFootEndNoteText();
1265 else if( !m_aUnknownToken
.isEmpty() )
1267 // Paste content of unknown tags.
1268 // (but surely if we are not in the header section) fdo#36080 fdo#34666
1269 if (!aToken
.isEmpty() && !IsInHeader() )
1271 if( !m_bDocInitialized
)
1273 m_xDoc
->getIDocumentContentOperations().InsertString( *m_pPam
, aToken
.toString());
1275 // if there are temporary paragraph attributes and the
1276 // paragraph isn't empty then the paragraph attributes
1278 m_aParaAttrs
.clear();
1283 // Unknown token in the header are only closed by a matching
1284 // end-token, </HEAD> or <BODY>. Text inside is ignored.
1287 case HtmlTokenId::UNKNOWNCONTROL_OFF
:
1288 if( m_aUnknownToken
!= sSaveToken
)
1291 case HtmlTokenId::FRAMESET_ON
:
1292 case HtmlTokenId::HEAD_OFF
:
1293 case HtmlTokenId::BODY_ON
:
1294 case HtmlTokenId::IMAGE
: // Don't know why Netscape acts this way.
1295 m_aUnknownToken
.clear();
1297 case HtmlTokenId::TEXTTOKEN
:
1300 m_aUnknownToken
.clear();
1308 case HtmlTokenId::BODY_ON
:
1312 if( !m_aStyleSource
.isEmpty() )
1314 m_pCSS1Parser
->ParseStyleSheet( m_aStyleSource
);
1315 m_aStyleSource
.clear();
1319 InsertBodyOptions();
1320 // If there is a template for the first or the right page,
1322 const SwPageDesc
*pPageDesc
= nullptr;
1323 if( m_pCSS1Parser
->IsSetFirstPageDesc() )
1324 pPageDesc
= m_pCSS1Parser
->GetFirstPageDesc();
1325 else if( m_pCSS1Parser
->IsSetRightPageDesc() )
1326 pPageDesc
= m_pCSS1Parser
->GetRightPageDesc();
1330 m_xDoc
->getIDocumentContentOperations().InsertPoolItem( *m_pPam
, SwFormatPageDesc( pPageDesc
) );
1336 case HtmlTokenId::LINK
:
1340 case HtmlTokenId::BASE
:
1342 const HTMLOptions
& rHTMLOptions
= GetOptions();
1343 for (size_t i
= rHTMLOptions
.size(); i
; )
1345 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
1346 switch( rOption
.GetToken() )
1348 case HtmlOptionId::HREF
:
1349 m_sBaseURL
= rOption
.GetString();
1351 case HtmlOptionId::TARGET
:
1354 SwDocShell
*pDocShell(m_xDoc
->GetDocShell());
1355 OSL_ENSURE(pDocShell
, "no SwDocShell");
1357 uno::Reference
<document::XDocumentPropertiesSupplier
> xDPS(
1358 pDocShell
->GetModel(), uno::UNO_QUERY_THROW
);
1359 uno::Reference
<document::XDocumentProperties
>
1360 xDocProps(xDPS
->getDocumentProperties());
1361 OSL_ENSURE(xDocProps
.is(),"no DocumentProperties");
1362 if (xDocProps
.is()) {
1363 xDocProps
->setDefaultTarget(
1364 rOption
.GetString());
1375 case HtmlTokenId::META
:
1377 SvKeyValueIterator
*pHTTPHeader
= nullptr;
1380 SwDocShell
*pDocSh
= m_xDoc
->GetDocShell();
1382 pHTTPHeader
= pDocSh
->GetHeaderAttributes();
1384 SwDocShell
*pDocShell(m_xDoc
->GetDocShell());
1385 OSL_ENSURE(pDocShell
, "no SwDocShell");
1388 uno::Reference
<document::XDocumentProperties
> xDocProps
;
1391 const uno::Reference
<document::XDocumentPropertiesSupplier
>
1392 xDPS( pDocShell
->GetModel(), uno::UNO_QUERY_THROW
);
1393 xDocProps
= xDPS
->getDocumentProperties();
1394 OSL_ENSURE(xDocProps
.is(), "DocumentProperties is null");
1396 ParseMetaOptions( xDocProps
, pHTTPHeader
);
1401 case HtmlTokenId::TITLE_ON
:
1405 case HtmlTokenId::SCRIPT_ON
:
1409 case HtmlTokenId::SCRIPT_OFF
:
1413 case HtmlTokenId::NOSCRIPT_ON
:
1414 case HtmlTokenId::NOSCRIPT_OFF
:
1415 bInsertUnknown
= true;
1418 case HtmlTokenId::STYLE_ON
:
1422 case HtmlTokenId::STYLE_OFF
:
1426 case HtmlTokenId::RAWDATA
:
1427 if( !m_bIgnoreRawData
)
1429 if( IsReadScript() )
1433 else if( IsReadStyle() )
1435 if( !m_aStyleSource
.isEmpty() )
1436 m_aStyleSource
+= "\n";
1437 m_aStyleSource
+= aToken
;
1442 case HtmlTokenId::OBJECT_ON
:
1449 #if HAVE_FEATURE_JAVA
1451 m_bCallNextToken
= m_pAppletImpl
!=nullptr && m_xTable
;
1455 case HtmlTokenId::OBJECT_OFF
:
1456 if (!m_aEmbeds
.empty())
1460 case HtmlTokenId::APPLET_ON
:
1461 #if HAVE_FEATURE_JAVA
1463 m_bCallNextToken
= m_pAppletImpl
!=nullptr && m_xTable
;
1467 case HtmlTokenId::IFRAME_ON
:
1468 if (bFuzzing
&& m_nFloatingFrames
> 64)
1469 SAL_WARN("sw.html", "Not importing any more FloatingFrames for fuzzing performance");
1472 InsertFloatingFrame();
1473 m_bCallNextToken
= m_bInFloatingFrame
&& m_xTable
;
1477 case HtmlTokenId::LINEBREAK
:
1484 bGetIDOption
= true;
1485 // <BR>s in <PRE> resemble true LFs, hence no break
1488 case HtmlTokenId::NEWPARA
:
1489 // CR in PRE/LISTING/XMP
1491 if( HtmlTokenId::NEWPARA
==nToken
||
1492 m_pPam
->GetPoint()->GetContentIndex() )
1494 AppendTextNode(); // there is no LF at this place
1495 // therefore it will cause no problems
1500 m_xProgress
->Update(rInput
.Tell());
1504 case HtmlTokenId::NONBREAKSPACE
:
1505 m_xDoc
->getIDocumentContentOperations().InsertString( *m_pPam
, OUString(CHAR_HARDBLANK
) );
1508 case HtmlTokenId::SOFTHYPH
:
1509 m_xDoc
->getIDocumentContentOperations().InsertString( *m_pPam
, OUString(CHAR_SOFTHYPHEN
) );
1512 case HtmlTokenId::LINEFEEDCHAR
:
1513 if( m_pPam
->GetPoint()->GetContentIndex() )
1515 if (!m_xTable
&& !m_xDoc
->IsInHeaderFooter(m_pPam
->GetPoint()->GetNode()))
1517 NewAttr(m_xAttrTab
, &m_xAttrTab
->pBreak
, SvxFormatBreakItem(SvxBreak::PageBefore
, RES_BREAK
));
1518 EndAttr( m_xAttrTab
->pBreak
, false );
1522 case HtmlTokenId::TEXTTOKEN
:
1523 case HtmlTokenId::CDATA
:
1524 // insert string without spanning attributes at the end.
1525 if( !aToken
.isEmpty() && ' '==aToken
[0] && !IsReadPRE() )
1527 sal_Int32 nPos
= m_pPam
->GetPoint()->GetContentIndex();
1528 const SwTextNode
* pTextNode
= nPos
? m_pPam
->GetPoint()->GetNode().GetTextNode() : nullptr;
1531 const OUString
& rText
= pTextNode
->GetText();
1532 sal_Unicode cLast
= rText
[--nPos
];
1533 if( ' ' == cLast
|| '\x0a' == cLast
)
1534 aToken
.remove(0, 1);
1537 aToken
.remove(0, 1);
1539 if( aToken
.isEmpty() )
1541 m_bUpperSpace
= bUpperSpaceSave
;
1546 if( !aToken
.isEmpty() )
1548 if( !m_bDocInitialized
)
1551 if (!m_aEmbeds
.empty())
1553 // The text token is inside an OLE object, which means
1555 SwOLENode
* pOLENode
= m_aEmbeds
.top();
1558 // <object> is mapped to an image -> ignore.
1562 if (SwFlyFrameFormat
* pFormat
1563 = dynamic_cast<SwFlyFrameFormat
*>(pOLENode
->GetFlyFormat()))
1565 if (SdrObject
* pObject
= SwXFrame::GetOrCreateSdrObject(*pFormat
))
1567 pObject
->SetTitle(pObject
->GetTitle() + aToken
);
1573 m_xDoc
->getIDocumentContentOperations().InsertString( *m_pPam
, aToken
.toString());
1575 // if there are temporary paragraph attributes and the
1576 // paragraph isn't empty then the paragraph attributes
1578 m_aParaAttrs
.clear();
1584 case HtmlTokenId::HORZRULE
:
1588 case HtmlTokenId::IMAGE
:
1590 // if only the parser references the doc, we can break and set
1592 if( 1 == m_xDoc
->getReferenceCount() )
1594 eState
= SvParserState::Error
;
1598 case HtmlTokenId::SPACER
:
1602 case HtmlTokenId::EMBED
:
1606 case HtmlTokenId::NOEMBED_ON
:
1607 m_bInNoEmbed
= true;
1608 m_bCallNextToken
= bool(m_xTable
);
1609 ReadRawData( OOO_STRING_SVTOOLS_HTML_noembed
);
1612 case HtmlTokenId::DEFLIST_ON
:
1613 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1617 case HtmlTokenId::DEFLIST_OFF
:
1618 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1620 EndDefListItem( HtmlTokenId::NONE
);
1624 case HtmlTokenId::DD_ON
:
1625 case HtmlTokenId::DT_ON
:
1626 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1628 EndDefListItem();// close <DD>/<DT> and set no template
1629 NewDefListItem( nToken
);
1632 case HtmlTokenId::DD_OFF
:
1633 case HtmlTokenId::DT_OFF
:
1634 // c.f. HtmlTokenId::LI_OFF
1635 // Actually we should close a DD/DT now.
1636 // But neither Netscape nor Microsoft do this and so don't we.
1637 EndDefListItem( nToken
);
1641 case HtmlTokenId::DIVISION_ON
:
1642 case HtmlTokenId::CENTER_ON
:
1643 if (!m_isInTableStructure
)
1645 if (m_nOpenParaToken
!= HtmlTokenId::NONE
)
1648 m_nOpenParaToken
= HtmlTokenId::NONE
;
1652 NewDivision( nToken
);
1656 case HtmlTokenId::DIVISION_OFF
:
1657 case HtmlTokenId::CENTER_OFF
:
1658 if (!m_isInTableStructure
)
1660 if (m_nOpenParaToken
!= HtmlTokenId::NONE
)
1663 m_nOpenParaToken
= HtmlTokenId::NONE
;
1671 case HtmlTokenId::MULTICOL_ON
:
1672 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1677 case HtmlTokenId::MULTICOL_OFF
:
1678 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1680 EndTag( HtmlTokenId::MULTICOL_ON
);
1683 case HtmlTokenId::MARQUEE_ON
:
1685 m_bCallNextToken
= m_pMarquee
!=nullptr && m_xTable
;
1688 case HtmlTokenId::FORM_ON
:
1691 case HtmlTokenId::FORM_OFF
:
1696 case HtmlTokenId::PARABREAK_ON
:
1697 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1702 case HtmlTokenId::PARABREAK_OFF
:
1706 case HtmlTokenId::ADDRESS_ON
:
1707 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1709 NewTextFormatColl(HtmlTokenId::ADDRESS_ON
, RES_POOLCOLL_SEND_ADDRESS
);
1712 case HtmlTokenId::ADDRESS_OFF
:
1713 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1715 EndTextFormatColl( HtmlTokenId::ADDRESS_OFF
);
1718 case HtmlTokenId::BLOCKQUOTE_ON
:
1719 case HtmlTokenId::BLOCKQUOTE30_ON
:
1720 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1722 NewTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON
, RES_POOLCOLL_HTML_BLOCKQUOTE
);
1725 case HtmlTokenId::BLOCKQUOTE_OFF
:
1726 case HtmlTokenId::BLOCKQUOTE30_OFF
:
1727 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1729 EndTextFormatColl( HtmlTokenId::BLOCKQUOTE_ON
);
1732 case HtmlTokenId::PREFORMTXT_ON
:
1733 case HtmlTokenId::LISTING_ON
:
1734 case HtmlTokenId::XMP_ON
:
1735 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1737 NewTextFormatColl( nToken
, RES_POOLCOLL_HTML_PRE
);
1740 case HtmlTokenId::PREFORMTXT_OFF
:
1741 m_bNoParSpace
= true; // the last PRE-paragraph gets a spacing
1742 EndTextFormatColl( HtmlTokenId::PREFORMTXT_OFF
);
1745 case HtmlTokenId::LISTING_OFF
:
1746 case HtmlTokenId::XMP_OFF
:
1747 EndTextFormatColl( nToken
);
1750 case HtmlTokenId::HEAD1_ON
:
1751 case HtmlTokenId::HEAD2_ON
:
1752 case HtmlTokenId::HEAD3_ON
:
1753 case HtmlTokenId::HEAD4_ON
:
1754 case HtmlTokenId::HEAD5_ON
:
1755 case HtmlTokenId::HEAD6_ON
:
1756 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1759 m_nOpenParaToken
= HtmlTokenId::NONE
;
1763 NewHeading( nToken
);
1766 case HtmlTokenId::HEAD1_OFF
:
1767 case HtmlTokenId::HEAD2_OFF
:
1768 case HtmlTokenId::HEAD3_OFF
:
1769 case HtmlTokenId::HEAD4_OFF
:
1770 case HtmlTokenId::HEAD5_OFF
:
1771 case HtmlTokenId::HEAD6_OFF
:
1775 case HtmlTokenId::TABLE_ON
:
1776 if( !m_vPendingStack
.empty() )
1777 BuildTable( SvxAdjust::End
);
1780 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1782 OSL_ENSURE(!m_xTable
, "table in table not allowed here");
1783 if( !m_xTable
&& (IsNewDoc() || !m_pPam
->GetPointNode().FindTableNode()) &&
1784 (m_pPam
->GetPoint()->GetNodeIndex() >
1785 m_xDoc
->GetNodes().GetEndOfExtras().GetIndex() ||
1786 !m_pPam
->GetPointNode().FindFootnoteStartNode() ) )
1788 if ( m_nParaCnt
< 5 )
1789 Show(); // show what we have up to here
1791 SvxAdjust eAdjust
= m_xAttrTab
->pAdjust
1792 ? static_cast<const SvxAdjustItem
&>(m_xAttrTab
->pAdjust
->GetItem()).
1795 BuildTable( eAdjust
);
1798 bInsertUnknown
= m_bKeepUnknown
;
1803 case HtmlTokenId::DIRLIST_ON
:
1804 case HtmlTokenId::MENULIST_ON
:
1805 case HtmlTokenId::ORDERLIST_ON
:
1806 case HtmlTokenId::UNORDERLIST_ON
:
1807 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1809 NewNumberBulletList( nToken
);
1812 case HtmlTokenId::DIRLIST_OFF
:
1813 case HtmlTokenId::MENULIST_OFF
:
1814 case HtmlTokenId::ORDERLIST_OFF
:
1815 case HtmlTokenId::UNORDERLIST_OFF
:
1816 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
1818 EndNumberBulletListItem( HtmlTokenId::NONE
, true );
1819 EndNumberBulletList( nToken
);
1822 case HtmlTokenId::LI_ON
:
1823 case HtmlTokenId::LISTHEADER_ON
:
1824 if( m_nOpenParaToken
!= HtmlTokenId::NONE
&&
1825 (m_pPam
->GetPoint()->GetContentIndex()
1826 || HtmlTokenId::PARABREAK_ON
==m_nOpenParaToken
) )
1828 // only finish paragraph for <P><LI>, not for <DD><LI>
1832 if (bFuzzing
&& m_nListItems
> 1024)
1834 SAL_WARN("sw.html", "skipping remaining bullet import for performance during fuzzing");
1838 EndNumberBulletListItem( HtmlTokenId::NONE
, false );// close <LI>/<LH> and don't set a template
1839 NewNumberBulletListItem( nToken
);
1845 case HtmlTokenId::LI_OFF
:
1846 case HtmlTokenId::LISTHEADER_OFF
:
1847 EndNumberBulletListItem( nToken
, false );
1851 case HtmlTokenId::ITALIC_ON
:
1853 SvxPostureItem
aPosture( ITALIC_NORMAL
, RES_CHRATR_POSTURE
);
1854 SvxPostureItem
aPostureCJK( ITALIC_NORMAL
, RES_CHRATR_CJK_POSTURE
);
1855 SvxPostureItem
aPostureCTL( ITALIC_NORMAL
, RES_CHRATR_CTL_POSTURE
);
1856 NewStdAttr( HtmlTokenId::ITALIC_ON
,
1857 &m_xAttrTab
->pItalic
, aPosture
,
1858 &m_xAttrTab
->pItalicCJK
, &aPostureCJK
,
1859 &m_xAttrTab
->pItalicCTL
, &aPostureCTL
);
1863 case HtmlTokenId::BOLD_ON
:
1865 SvxWeightItem
aWeight( WEIGHT_BOLD
, RES_CHRATR_WEIGHT
);
1866 SvxWeightItem
aWeightCJK( WEIGHT_BOLD
, RES_CHRATR_CJK_WEIGHT
);
1867 SvxWeightItem
aWeightCTL( WEIGHT_BOLD
, RES_CHRATR_CTL_WEIGHT
);
1868 NewStdAttr( HtmlTokenId::BOLD_ON
,
1869 &m_xAttrTab
->pBold
, aWeight
,
1870 &m_xAttrTab
->pBoldCJK
, &aWeightCJK
,
1871 &m_xAttrTab
->pBoldCTL
, &aWeightCTL
);
1875 case HtmlTokenId::STRIKE_ON
:
1876 case HtmlTokenId::STRIKETHROUGH_ON
:
1878 NewStdAttr( HtmlTokenId::STRIKE_ON
, &m_xAttrTab
->pStrike
,
1879 SvxCrossedOutItem(STRIKEOUT_SINGLE
, RES_CHRATR_CROSSEDOUT
) );
1883 case HtmlTokenId::UNDERLINE_ON
:
1885 NewStdAttr( HtmlTokenId::UNDERLINE_ON
, &m_xAttrTab
->pUnderline
,
1886 SvxUnderlineItem(LINESTYLE_SINGLE
, RES_CHRATR_UNDERLINE
) );
1890 case HtmlTokenId::SUPERSCRIPT_ON
:
1892 NewStdAttr( HtmlTokenId::SUPERSCRIPT_ON
, &m_xAttrTab
->pEscapement
,
1893 SvxEscapementItem(HTML_ESC_SUPER
,HTML_ESC_PROP
, RES_CHRATR_ESCAPEMENT
) );
1897 case HtmlTokenId::SUBSCRIPT_ON
:
1899 NewStdAttr( HtmlTokenId::SUBSCRIPT_ON
, &m_xAttrTab
->pEscapement
,
1900 SvxEscapementItem(HTML_ESC_SUB
,HTML_ESC_PROP
, RES_CHRATR_ESCAPEMENT
) );
1904 case HtmlTokenId::BLINK_ON
:
1906 NewStdAttr( HtmlTokenId::BLINK_ON
, &m_xAttrTab
->pBlink
,
1907 SvxBlinkItem( true, RES_CHRATR_BLINK
) );
1911 case HtmlTokenId::SPAN_ON
:
1912 NewStdAttr( HtmlTokenId::SPAN_ON
);
1915 case HtmlTokenId::ITALIC_OFF
:
1916 case HtmlTokenId::BOLD_OFF
:
1917 case HtmlTokenId::STRIKE_OFF
:
1918 case HtmlTokenId::UNDERLINE_OFF
:
1919 case HtmlTokenId::SUPERSCRIPT_OFF
:
1920 case HtmlTokenId::SUBSCRIPT_OFF
:
1921 case HtmlTokenId::BLINK_OFF
:
1922 case HtmlTokenId::SPAN_OFF
:
1926 case HtmlTokenId::STRIKETHROUGH_OFF
:
1927 EndTag( HtmlTokenId::STRIKE_OFF
);
1930 case HtmlTokenId::BASEFONT_ON
:
1933 case HtmlTokenId::BASEFONT_OFF
:
1936 case HtmlTokenId::FONT_ON
:
1937 case HtmlTokenId::BIGPRINT_ON
:
1938 case HtmlTokenId::SMALLPRINT_ON
:
1939 NewFontAttr( nToken
);
1941 case HtmlTokenId::FONT_OFF
:
1942 case HtmlTokenId::BIGPRINT_OFF
:
1943 case HtmlTokenId::SMALLPRINT_OFF
:
1944 EndFontAttr( nToken
);
1947 case HtmlTokenId::EMPHASIS_ON
:
1948 case HtmlTokenId::CITATION_ON
:
1949 case HtmlTokenId::STRONG_ON
:
1950 case HtmlTokenId::CODE_ON
:
1951 case HtmlTokenId::SAMPLE_ON
:
1952 case HtmlTokenId::KEYBOARD_ON
:
1953 case HtmlTokenId::VARIABLE_ON
:
1954 case HtmlTokenId::DEFINSTANCE_ON
:
1955 case HtmlTokenId::SHORTQUOTE_ON
:
1956 case HtmlTokenId::LANGUAGE_ON
:
1957 case HtmlTokenId::AUTHOR_ON
:
1958 case HtmlTokenId::PERSON_ON
:
1959 case HtmlTokenId::ACRONYM_ON
:
1960 case HtmlTokenId::ABBREVIATION_ON
:
1961 case HtmlTokenId::INSERTEDTEXT_ON
:
1962 case HtmlTokenId::DELETEDTEXT_ON
:
1964 case HtmlTokenId::TELETYPE_ON
:
1965 NewCharFormat( nToken
);
1968 case HtmlTokenId::SDFIELD_ON
:
1970 m_bCallNextToken
= m_bInField
&& m_xTable
;
1973 case HtmlTokenId::EMPHASIS_OFF
:
1974 case HtmlTokenId::CITATION_OFF
:
1975 case HtmlTokenId::STRONG_OFF
:
1976 case HtmlTokenId::CODE_OFF
:
1977 case HtmlTokenId::SAMPLE_OFF
:
1978 case HtmlTokenId::KEYBOARD_OFF
:
1979 case HtmlTokenId::VARIABLE_OFF
:
1980 case HtmlTokenId::DEFINSTANCE_OFF
:
1981 case HtmlTokenId::SHORTQUOTE_OFF
:
1982 case HtmlTokenId::LANGUAGE_OFF
:
1983 case HtmlTokenId::AUTHOR_OFF
:
1984 case HtmlTokenId::PERSON_OFF
:
1985 case HtmlTokenId::ACRONYM_OFF
:
1986 case HtmlTokenId::ABBREVIATION_OFF
:
1987 case HtmlTokenId::INSERTEDTEXT_OFF
:
1988 case HtmlTokenId::DELETEDTEXT_OFF
:
1990 case HtmlTokenId::TELETYPE_OFF
:
1994 case HtmlTokenId::HEAD_OFF
:
1995 if( !m_aStyleSource
.isEmpty() )
1997 m_pCSS1Parser
->ParseStyleSheet( m_aStyleSource
);
1998 m_aStyleSource
.clear();
2002 case HtmlTokenId::DOCTYPE
:
2003 case HtmlTokenId::BODY_OFF
:
2004 case HtmlTokenId::HTML_OFF
:
2005 case HtmlTokenId::HEAD_ON
:
2006 case HtmlTokenId::TITLE_OFF
:
2007 break; // don't evaluate further???
2008 case HtmlTokenId::HTML_ON
:
2010 const HTMLOptions
& rHTMLOptions
= GetOptions();
2011 for (size_t i
= rHTMLOptions
.size(); i
; )
2013 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
2014 if( HtmlOptionId::DIR == rOption
.GetToken() )
2016 const OUString
& rDir
= rOption
.GetString();
2017 SfxItemSet
aItemSet( m_xDoc
->GetAttrPool(),
2018 m_pCSS1Parser
->GetWhichMap() );
2019 SvxCSS1PropertyInfo aPropInfo
;
2021 ParseStyleOptions( aDummy
, aDummy
, aDummy
, aItemSet
,
2022 aPropInfo
, nullptr, &rDir
);
2024 m_pCSS1Parser
->SetPageDescAttrs( nullptr, &aItemSet
);
2031 case HtmlTokenId::INPUT
:
2035 case HtmlTokenId::TEXTAREA_ON
:
2037 m_bCallNextToken
= m_bTextArea
&& m_xTable
;
2040 case HtmlTokenId::SELECT_ON
:
2042 m_bCallNextToken
= m_bSelect
&& m_xTable
;
2045 case HtmlTokenId::ANCHOR_ON
:
2049 case HtmlTokenId::ANCHOR_OFF
:
2053 case HtmlTokenId::COMMENT
:
2054 if( ( aToken
.getLength() > 5 ) && ( ! m_bIgnoreHTMLComments
) )
2056 // insert as Post-It
2057 // If there are no space characters right behind
2058 // the <!-- and on front of the -->, leave the comment untouched.
2059 if( ' ' == aToken
[ 3 ] &&
2060 ' ' == aToken
[ aToken
.getLength()-3 ] )
2062 std::u16string_view
aComment( aToken
.subView( 3, aToken
.getLength()-5 ) );
2063 InsertComment(OUString(comphelper::string::strip(aComment
, ' ')));
2067 OUString aComment
= "<" + aToken
+ ">";
2068 InsertComment( aComment
);
2073 case HtmlTokenId::MAP_ON
:
2074 // Image Maps are read asynchronously: At first only an image map is created
2075 // Areas are processed later. Nevertheless the
2076 // ImageMap is inserted into the IMap-Array, because it might be used
2078 m_pImageMap
= new ImageMap
;
2079 if( ParseMapOptions( m_pImageMap
) )
2082 m_pImageMaps
.reset( new ImageMaps
);
2083 m_pImageMaps
->push_back(std::unique_ptr
<ImageMap
>(m_pImageMap
));
2088 m_pImageMap
= nullptr;
2092 case HtmlTokenId::MAP_OFF
:
2093 // there is no ImageMap anymore (don't delete IMap, because it's
2094 // already contained in the array!)
2095 m_pImageMap
= nullptr;
2098 case HtmlTokenId::AREA
:
2100 ParseAreaOptions( m_pImageMap
, m_sBaseURL
, SvMacroItemId::OnMouseOver
,
2101 SvMacroItemId::OnMouseOut
);
2104 case HtmlTokenId::FRAMESET_ON
:
2105 bInsertUnknown
= m_bKeepUnknown
;
2108 case HtmlTokenId::NOFRAMES_ON
:
2111 bInsertUnknown
= m_bKeepUnknown
;
2114 case HtmlTokenId::UNKNOWNCONTROL_ON
:
2115 // Ignore content of unknown token in the header, if the token
2116 // does not start with a '!'.
2117 // (but judging from the code, also if does not start with a '%')
2118 // (and also if we're not somewhere we consider PRE)
2119 if( IsInHeader() && !IsReadPRE() && m_aUnknownToken
.isEmpty() &&
2120 !sSaveToken
.isEmpty() && '!' != sSaveToken
[0] &&
2121 '%' != sSaveToken
[0] )
2122 m_aUnknownToken
= sSaveToken
;
2126 bInsertUnknown
= m_bKeepUnknown
;
2133 if( bInsertUnknown
)
2135 OUStringBuffer
aComment("HTML: <");
2136 if( (nToken
>= HtmlTokenId::ONOFF_START
) && isOffToken(nToken
) )
2137 aComment
.append("/");
2138 aComment
.append(sSaveToken
);
2139 if( !aToken
.isEmpty() )
2142 aComment
.append(" " + aToken
);
2144 aComment
.append(">");
2145 InsertComment( aComment
.makeStringAndClear() );
2148 // if there are temporary paragraph attributes and the
2149 // paragraph isn't empty then the paragraph attributes are final.
2150 if( !m_aParaAttrs
.empty() && m_pPam
->GetPoint()->GetContentIndex() )
2151 m_aParaAttrs
.clear();
2154 static void lcl_swhtml_getItemInfo( const HTMLAttr
& rAttr
,
2155 bool& rScriptDependent
,
2156 sal_uInt16
& rScriptType
)
2158 switch( rAttr
.GetItem().Which() )
2160 case RES_CHRATR_FONT
:
2161 case RES_CHRATR_FONTSIZE
:
2162 case RES_CHRATR_LANGUAGE
:
2163 case RES_CHRATR_POSTURE
:
2164 case RES_CHRATR_WEIGHT
:
2165 rScriptType
= i18n::ScriptType::LATIN
;
2166 rScriptDependent
= true;
2168 case RES_CHRATR_CJK_FONT
:
2169 case RES_CHRATR_CJK_FONTSIZE
:
2170 case RES_CHRATR_CJK_LANGUAGE
:
2171 case RES_CHRATR_CJK_POSTURE
:
2172 case RES_CHRATR_CJK_WEIGHT
:
2173 rScriptType
= i18n::ScriptType::ASIAN
;
2174 rScriptDependent
= true;
2176 case RES_CHRATR_CTL_FONT
:
2177 case RES_CHRATR_CTL_FONTSIZE
:
2178 case RES_CHRATR_CTL_LANGUAGE
:
2179 case RES_CHRATR_CTL_POSTURE
:
2180 case RES_CHRATR_CTL_WEIGHT
:
2181 rScriptType
= i18n::ScriptType::COMPLEX
;
2182 rScriptDependent
= true;
2185 rScriptDependent
= false;
2190 bool SwHTMLParser::AppendTextNode( SwHTMLAppendMode eMode
, bool bUpdateNum
)
2192 // A hard line break at the end always must be removed.
2193 // A second one we replace with paragraph spacing.
2194 sal_Int32 nLFStripped
= StripTrailingLF();
2195 if( (AM_NOSPACE
==eMode
|| AM_SOFTNOSPACE
==eMode
) && nLFStripped
> 1 )
2198 // the hard attributes of this paragraph will never be invalid again
2199 m_aParaAttrs
.clear();
2201 SwTextNode
*pTextNode
= (AM_SPACE
==eMode
|| AM_NOSPACE
==eMode
) ?
2202 m_pPam
->GetPoint()->GetNode().GetTextNode() : nullptr;
2206 const SvxULSpaceItem
& rULSpace
=
2207 pTextNode
->SwContentNode::GetAttr( RES_UL_SPACE
);
2209 bool bChange
= AM_NOSPACE
==eMode
? rULSpace
.GetLower() > 0
2210 : rULSpace
.GetLower() == 0;
2214 const SvxULSpaceItem
& rCollULSpace
=
2215 pTextNode
->GetAnyFormatColl().GetULSpace();
2217 bool bMayReset
= AM_NOSPACE
==eMode
? rCollULSpace
.GetLower() == 0
2218 : rCollULSpace
.GetLower() > 0;
2221 rCollULSpace
.GetUpper() == rULSpace
.GetUpper() )
2223 pTextNode
->ResetAttr( RES_UL_SPACE
);
2228 SvxULSpaceItem( rULSpace
.GetUpper(),
2229 AM_NOSPACE
==eMode
? 0 : HTML_PARSPACE
, RES_UL_SPACE
) );
2233 m_bNoParSpace
= AM_NOSPACE
==eMode
|| AM_SOFTNOSPACE
==eMode
;
2235 SwPosition
aOldPos( *m_pPam
->GetPoint() );
2237 bool bRet
= m_xDoc
->getIDocumentContentOperations().AppendTextNode( *m_pPam
->GetPoint() );
2239 // split character attributes and maybe set none,
2240 // which are set for the whole paragraph
2241 const sal_Int32 nEndCnt
= aOldPos
.GetContentIndex();
2242 const SwPosition
& rPos
= *m_pPam
->GetPoint();
2244 HTMLAttr
** pHTMLAttributes
= reinterpret_cast<HTMLAttr
**>(m_xAttrTab
.get());
2245 for (auto nCnt
= sizeof(HTMLAttrTable
) / sizeof(HTMLAttr
*); nCnt
--; ++pHTMLAttributes
)
2247 HTMLAttr
*pAttr
= *pHTMLAttributes
;
2248 if( pAttr
&& pAttr
->GetItem().Which() < RES_PARATR_BEGIN
)
2250 bool bWholePara
= false;
2254 HTMLAttr
*pNext
= pAttr
->GetNext();
2255 if( pAttr
->GetStartParagraphIdx() < aOldPos
.GetNodeIndex() ||
2257 pAttr
->GetStartParagraph() == aOldPos
.GetNode() &&
2258 pAttr
->GetStartContent() != nEndCnt
) )
2261 pAttr
->GetStartParagraph() == aOldPos
.GetNode() &&
2262 pAttr
->GetStartContent() == 0;
2264 sal_Int32 nStt
= pAttr
->m_nStartContent
;
2265 bool bScript
= false;
2266 sal_uInt16 nScriptItem
;
2267 bool bInsert
= true;
2268 lcl_swhtml_getItemInfo( *pAttr
, bScript
,
2270 // set previous part
2273 const SwTextNode
*pTextNd
=
2274 pAttr
->GetStartParagraph().GetNode().GetTextNode();
2275 OSL_ENSURE( pTextNd
, "No text node" );
2278 const OUString
& rText
= pTextNd
->GetText();
2279 sal_uInt16 nScriptText
=
2280 g_pBreakIt
->GetBreakIter()->getScriptType(
2281 rText
, pAttr
->GetStartContent() );
2282 sal_Int32 nScriptEnd
= g_pBreakIt
->GetBreakIter()
2283 ->endOfScript( rText
, nStt
, nScriptText
);
2284 while (nScriptEnd
< nEndCnt
&& nScriptEnd
!= -1)
2286 if( nScriptItem
== nScriptText
)
2288 HTMLAttr
*pSetAttr
=
2289 pAttr
->Clone( aOldPos
.GetNode(), nScriptEnd
);
2290 pSetAttr
->m_nStartContent
= nStt
;
2291 pSetAttr
->ClearPrev();
2292 if( !pNext
|| bWholePara
)
2294 if (pSetAttr
->m_bInsAtStart
)
2295 m_aSetAttrTab
.push_front( pSetAttr
);
2297 m_aSetAttrTab
.push_back( pSetAttr
);
2300 pNext
->InsertPrev( pSetAttr
);
2303 nScriptText
= g_pBreakIt
->GetBreakIter()->getScriptType(
2305 nScriptEnd
= g_pBreakIt
->GetBreakIter()
2306 ->endOfScript( rText
, nStt
, nScriptText
);
2308 bInsert
= nScriptItem
== nScriptText
;
2313 HTMLAttr
*pSetAttr
=
2314 pAttr
->Clone( aOldPos
.GetNode(), nEndCnt
);
2315 pSetAttr
->m_nStartContent
= nStt
;
2317 // When the attribute is for the whole paragraph, the outer
2318 // attributes aren't effective anymore. Hence it may not be inserted
2319 // in the Prev-List of an outer attribute, because that won't be
2320 // set. That leads to shifting when fields are used.
2321 if( !pNext
|| bWholePara
)
2323 if (pSetAttr
->m_bInsAtStart
)
2324 m_aSetAttrTab
.push_front( pSetAttr
);
2326 m_aSetAttrTab
.push_back( pSetAttr
);
2329 pNext
->InsertPrev( pSetAttr
);
2333 HTMLAttr
*pPrev
= pAttr
->GetPrev();
2336 // the previous attributes must be set anyway
2337 if( !pNext
|| bWholePara
)
2339 if (pPrev
->m_bInsAtStart
)
2340 m_aSetAttrTab
.push_front( pPrev
);
2342 m_aSetAttrTab
.push_back( pPrev
);
2345 pNext
->InsertPrev( pPrev
);
2351 pAttr
->SetStart( rPos
);
2359 if( GetNumInfo().GetDepth() )
2361 sal_uInt8 nLvl
= GetNumInfo().GetLevel();
2365 m_pPam
->GetPointNode().GetTextNode()->ResetAttr( RES_PARATR_NUMRULE
);
2368 // We must set the attribute of the paragraph before now (because of JavaScript)
2371 // Now it is time to get rid of all script dependent hints that are
2372 // equal to the settings in the style
2373 SwTextNode
*pTextNd
= aOldPos
.GetNode().GetTextNode();
2374 OSL_ENSURE( pTextNd
, "There is the txt node" );
2375 size_t nCntAttr
= (pTextNd
&& pTextNd
->GetpSwpHints())
2376 ? pTextNd
->GetSwpHints().Count() : 0;
2379 // These are the end position of all script dependent hints.
2380 // If we find a hint that starts before the current end position,
2381 // we have to set it. If we find a hint that start behind or at
2382 // that position, we have to take the hint value into account.
2383 // If it is equal to the style, or in fact the paragraph value
2384 // for that hint, the hint is removed. Otherwise its end position
2386 sal_Int32 aEndPos
[15] =
2387 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
2388 SwpHints
& rHints
= pTextNd
->GetSwpHints();
2389 for( size_t i
=0; i
< nCntAttr
; i
++ )
2391 SwTextAttr
*pHt
= rHints
.Get( i
);
2392 sal_uInt16 nWhich
= pHt
->Which();
2397 case RES_CHRATR_FONT
:
2401 case RES_CHRATR_FONTSIZE
:
2404 case RES_CHRATR_LANGUAGE
:
2407 case RES_CHRATR_POSTURE
:
2410 case RES_CHRATR_WEIGHT
:
2413 case RES_CHRATR_CJK_FONT
:
2417 case RES_CHRATR_CJK_FONTSIZE
:
2420 case RES_CHRATR_CJK_LANGUAGE
:
2423 case RES_CHRATR_CJK_POSTURE
:
2426 case RES_CHRATR_CJK_WEIGHT
:
2429 case RES_CHRATR_CTL_FONT
:
2433 case RES_CHRATR_CTL_FONTSIZE
:
2436 case RES_CHRATR_CTL_LANGUAGE
:
2439 case RES_CHRATR_CTL_POSTURE
:
2442 case RES_CHRATR_CTL_WEIGHT
:
2446 // Skip to next attribute
2449 const sal_Int32 nStt
= pHt
->GetStart();
2450 if( nStt
>= aEndPos
[nIdx
] )
2452 const SfxPoolItem
& rItem
=
2453 static_cast<const SwContentNode
*>(pTextNd
)->GetAttr( nWhich
);
2454 if( bFont
? swhtml_css1atr_equalFontItems(rItem
,pHt
->GetAttr())
2455 : rItem
== pHt
->GetAttr() )
2457 // The hint is the same as set in the paragraph and
2458 // therefore, it can be deleted
2459 // CAUTION!!! This WILL delete the hint and it MAY
2460 // also delete the SwpHints!!! To avoid any trouble
2461 // we leave the loop immediately if this is the last
2463 pTextNd
->DeleteAttribute( pHt
);
2471 // The hint is different. Therefore all hints within that
2472 // hint have to be ignored.
2473 aEndPos
[nIdx
] = pHt
->GetEnd() ? *pHt
->GetEnd() : nStt
;
2478 // The hint starts before another one ends.
2479 // The hint in this case is not deleted
2480 OSL_ENSURE( pHt
->GetEnd() && *pHt
->GetEnd() <= aEndPos
[nIdx
],
2481 "hints aren't nested properly!" );
2486 if (!m_xTable
&& !--m_nParaCnt
)
2492 void SwHTMLParser::AddParSpace()
2494 //If it already has ParSpace, return
2495 if( !m_bNoParSpace
)
2498 m_bNoParSpace
= false;
2500 SwNodeOffset nNdIdx
= m_pPam
->GetPoint()->GetNodeIndex() - 1;
2502 SwTextNode
*pTextNode
= m_xDoc
->GetNodes()[nNdIdx
]->GetTextNode();
2506 SvxULSpaceItem rULSpace
=
2507 pTextNode
->SwContentNode::GetAttr( RES_UL_SPACE
);
2508 if( rULSpace
.GetLower() )
2511 const SvxULSpaceItem
& rCollULSpace
=
2512 pTextNode
->GetAnyFormatColl().GetULSpace();
2513 if( rCollULSpace
.GetLower() &&
2514 rCollULSpace
.GetUpper() == rULSpace
.GetUpper() )
2516 pTextNode
->ResetAttr( RES_UL_SPACE
);
2520 //What I do here, is that I examine the attributes, and if
2521 //I find out, that it's CJK/CTL, then I set the paragraph space
2522 //to the value set in HTML_CJK_PARSPACE/HTML_CTL_PARSPACE.
2524 bool bIsCJK
= false;
2525 bool bIsCTL
= false;
2527 const size_t nCntAttr
= pTextNode
->GetpSwpHints()
2528 ? pTextNode
->GetSwpHints().Count() : 0;
2530 for(size_t i
= 0; i
< nCntAttr
; ++i
)
2532 SwTextAttr
*const pHt
= pTextNode
->GetSwpHints().Get(i
);
2533 sal_uInt16
const nWhich
= pHt
->Which();
2534 if (RES_CHRATR_CJK_FONT
== nWhich
||
2535 RES_CHRATR_CJK_FONTSIZE
== nWhich
||
2536 RES_CHRATR_CJK_LANGUAGE
== nWhich
||
2537 RES_CHRATR_CJK_POSTURE
== nWhich
||
2538 RES_CHRATR_CJK_WEIGHT
== nWhich
)
2543 if (RES_CHRATR_CTL_FONT
== nWhich
||
2544 RES_CHRATR_CTL_FONTSIZE
== nWhich
||
2545 RES_CHRATR_CTL_LANGUAGE
== nWhich
||
2546 RES_CHRATR_CTL_POSTURE
== nWhich
||
2547 RES_CHRATR_CTL_WEIGHT
== nWhich
)
2557 SvxULSpaceItem( rULSpace
.GetUpper(), HTML_CTL_PARSPACE
, RES_UL_SPACE
) );
2562 SvxULSpaceItem( rULSpace
.GetUpper(), HTML_CJK_PARSPACE
, RES_UL_SPACE
) );
2565 SvxULSpaceItem( rULSpace
.GetUpper(), HTML_PARSPACE
, RES_UL_SPACE
) );
2570 void SwHTMLParser::Show()
2573 // - a EndAction is called, so the document is formatted
2574 // - a Reschedule is called,
2575 // - the own View-Shell is set again
2576 // - and a StartAction is called
2578 OSL_ENSURE( SvParserState::Working
==eState
, "Show not in working state - That can go wrong" );
2579 SwViewShell
*pOldVSh
= CallEndAction();
2581 Application::Reschedule();
2583 if( ( m_xDoc
->GetDocShell() && m_xDoc
->GetDocShell()->IsAbortingImport() )
2584 || 1 == m_xDoc
->getReferenceCount() )
2586 // was the import aborted by SFX?
2587 eState
= SvParserState::Error
;
2590 // Fetch the SwViewShell again, as it could be destroyed in Reschedule.
2591 SwViewShell
*pVSh
= CallStartAction( pOldVSh
);
2593 // is the current node not visible anymore, then we use a bigger increment
2596 m_nParaCnt
= (m_pPam
->GetPoint()->GetNode().IsInVisibleArea(pVSh
))
2601 void SwHTMLParser::ShowStatline()
2604 // - a Reschedule is called, so it can be scrolled
2605 // - the own View-Shell is set again
2606 // - a StartAction/EndAction is called, when there was scrolling.
2608 OSL_ENSURE( SvParserState::Working
==eState
, "ShowStatLine not in working state - That can go wrong" );
2613 m_xProgress
->Update(rInput
.Tell());
2614 CheckActionViewShell();
2618 Application::Reschedule();
2620 if( ( m_xDoc
->GetDocShell() && m_xDoc
->GetDocShell()->IsAbortingImport() )
2621 || 1 == m_xDoc
->getReferenceCount() )
2622 // was the import aborted by SFX?
2623 eState
= SvParserState::Error
;
2625 SwViewShell
*pVSh
= CheckActionViewShell();
2626 if( pVSh
&& pVSh
->HasInvalidRect() )
2628 CallEndAction( false, false );
2629 CallStartAction( pVSh
, false );
2634 SwViewShell
*SwHTMLParser::CallStartAction( SwViewShell
*pVSh
, bool bChkPtr
)
2636 OSL_ENSURE( !m_pActionViewShell
, "CallStartAction: SwViewShell already set" );
2638 if( !pVSh
|| bChkPtr
)
2640 #if OSL_DEBUG_LEVEL > 0
2641 SwViewShell
*pOldVSh
= pVSh
;
2643 pVSh
= m_xDoc
->getIDocumentLayoutAccess().GetCurrentViewShell();
2644 #if OSL_DEBUG_LEVEL > 0
2645 OSL_ENSURE( !pVSh
|| !pOldVSh
|| pOldVSh
== pVSh
, "CallStartAction: Who swapped the SwViewShell?" );
2646 if( pOldVSh
&& !pVSh
)
2650 m_pActionViewShell
= pVSh
;
2652 if( m_pActionViewShell
)
2654 if( auto pEditShell
= dynamic_cast< SwEditShell
*>( m_pActionViewShell
) )
2655 pEditShell
->StartAction();
2657 m_pActionViewShell
->StartAction();
2660 return m_pActionViewShell
;
2663 SwViewShell
*SwHTMLParser::CallEndAction( bool bChkAction
, bool bChkPtr
)
2667 SwViewShell
*pVSh
= m_xDoc
->getIDocumentLayoutAccess().GetCurrentViewShell();
2668 OSL_ENSURE( !pVSh
|| m_pActionViewShell
== pVSh
,
2669 "CallEndAction: Who swapped the SwViewShell?" );
2670 #if OSL_DEBUG_LEVEL > 0
2671 if( m_pActionViewShell
&& !pVSh
)
2674 if( pVSh
!= m_pActionViewShell
)
2675 m_pActionViewShell
= nullptr;
2678 if( !m_pActionViewShell
|| (bChkAction
&& !m_pActionViewShell
->ActionPend()) )
2679 return m_pActionViewShell
;
2681 if (SwEditShell
* pEditShell
= dynamic_cast<SwEditShell
*>(m_pActionViewShell
))
2683 // Already scrolled?, then make sure that the view doesn't move!
2684 const bool bOldLock
= m_pActionViewShell
->IsViewLocked();
2685 m_pActionViewShell
->LockView( true );
2686 pEditShell
->EndAction();
2687 m_pActionViewShell
->LockView( bOldLock
);
2689 // bChkJumpMark is only set when the object was also found
2690 if( m_bChkJumpMark
)
2692 const Point
aVisSttPos( DOCUMENTBORDER
, DOCUMENTBORDER
);
2693 if( GetMedium() && aVisSttPos
== m_pActionViewShell
->VisArea().Pos() )
2694 ::JumpToSwMark( m_pActionViewShell
,
2695 GetMedium()->GetURLObject().GetMark() );
2696 m_bChkJumpMark
= false;
2700 m_pActionViewShell
->EndAction();
2702 // if the parser holds the last reference to the document, then we can
2703 // abort here and set an error.
2704 if( 1 == m_xDoc
->getReferenceCount() )
2706 eState
= SvParserState::Error
;
2709 SwViewShell
*pVSh
= m_pActionViewShell
;
2710 m_pActionViewShell
= nullptr;
2715 SwViewShell
*SwHTMLParser::CheckActionViewShell()
2717 SwViewShell
*pVSh
= m_xDoc
->getIDocumentLayoutAccess().GetCurrentViewShell();
2718 OSL_ENSURE( !pVSh
|| m_pActionViewShell
== pVSh
,
2719 "CheckActionViewShell: Who has swapped SwViewShell?" );
2720 #if OSL_DEBUG_LEVEL > 0
2721 if( m_pActionViewShell
&& !pVSh
)
2724 if( pVSh
!= m_pActionViewShell
)
2725 m_pActionViewShell
= nullptr;
2727 return m_pActionViewShell
;
2730 SwHTMLFrameFormatListener::SwHTMLFrameFormatListener(SwFrameFormat
* pFrameFormat
)
2731 : m_pFrameFormat(pFrameFormat
)
2733 StartListening(m_pFrameFormat
->GetNotifier());
2736 void SwHTMLFrameFormatListener::Notify(const SfxHint
& rHint
)
2738 if (rHint
.GetId() == SfxHintId::Dying
)
2739 m_pFrameFormat
= nullptr;
2742 void SwHTMLParser::SetAttr_( bool bChkEnd
, bool bBeforeTable
,
2743 std::deque
<std::unique_ptr
<HTMLAttr
>> *pPostIts
)
2745 SwPaM
aAttrPam( *m_pPam
->GetPoint() );
2746 const SwPosition
& rEndPos
= *m_pPam
->GetPoint();
2747 const sal_Int32 nEndCnt
= m_pPam
->GetPoint()->GetContentIndex();
2749 SwContentNode
* pCNd
;
2751 std::vector
<std::unique_ptr
<HTMLAttr
>> aFields
;
2753 for( auto n
= m_aSetAttrTab
.size(); n
; )
2755 pAttr
= m_aSetAttrTab
[ --n
];
2756 sal_uInt16 nWhich
= pAttr
->m_pItem
->Which();
2758 SwNodeOffset nEndParaIdx
= pAttr
->GetEndParagraphIdx();
2762 // Set character attribute with end early on, so set them still in
2763 // the current paragraph (because of JavaScript and various "chats"(?)).
2764 // This shouldn't be done for attributes which are used for
2765 // the whole paragraph, because they could be from a paragraph style
2766 // which can't be set. Because the attributes are inserted with
2767 // SETATTR_DONTREPLACE, they should be able to be set later.
2768 bSetAttr
= ( nEndParaIdx
< rEndPos
.GetNodeIndex() &&
2769 ((RES_MARGIN_FIRSTLINE
!= nWhich
&& RES_MARGIN_TEXTLEFT
!= nWhich
) || !GetNumInfo().GetNumRule()) ) ||
2770 ( !pAttr
->IsLikePara() &&
2771 nEndParaIdx
== rEndPos
.GetNodeIndex() &&
2772 pAttr
->GetEndContent() < nEndCnt
&&
2773 (isCHRATR(nWhich
) || isTXTATR_WITHEND(nWhich
)) ) ||
2775 nEndParaIdx
== rEndPos
.GetNodeIndex() &&
2776 !pAttr
->GetEndContent() );
2780 // Attributes in body nodes array section shouldn't be set if we are in a
2781 // special nodes array section, but vice versa it's possible.
2782 SwNodeOffset nEndOfIcons
= m_xDoc
->GetNodes().GetEndOfExtras().GetIndex();
2783 bSetAttr
= nEndParaIdx
< rEndPos
.GetNodeIndex() ||
2784 rEndPos
.GetNodeIndex() > nEndOfIcons
||
2785 nEndParaIdx
<= nEndOfIcons
;
2790 // The attribute shouldn't be in the list of temporary paragraph
2791 // attributes, because then it would be deleted.
2792 while( !m_aParaAttrs
.empty() )
2794 OSL_ENSURE( pAttr
!= m_aParaAttrs
.back(),
2795 "SetAttr: Attribute must not yet be set" );
2796 m_aParaAttrs
.pop_back();
2800 m_aSetAttrTab
.erase( m_aSetAttrTab
.begin() + n
);
2804 HTMLAttr
*pPrev
= pAttr
->GetPrev();
2805 if( !pAttr
->m_bValid
)
2807 // invalid attributes can be deleted
2813 pCNd
= pAttr
->m_nStartPara
.GetNode().GetContentNode();
2816 // because of the awful deleting of nodes an index can also
2817 // point to an end node :-(
2818 if ( (pAttr
->GetStartParagraph() == pAttr
->GetEndParagraph()) &&
2819 !isTXTATR_NOEND(nWhich
) )
2821 // when the end index also points to the node, we don't
2822 // need to set attributes anymore, except if it's a text attribute.
2827 pCNd
= m_xDoc
->GetNodes().GoNext( &(pAttr
->m_nStartPara
) );
2829 pAttr
->m_nStartContent
= 0;
2832 OSL_ENSURE( false, "SetAttr: GoNext() failed!" );
2839 // because of the deleting of BRs the start index can also
2840 // point behind the end the text
2841 if( pAttr
->m_nStartContent
> pCNd
->Len() )
2842 pAttr
->m_nStartContent
= pCNd
->Len();
2843 aAttrPam
.GetPoint()->Assign( *pCNd
, pAttr
->m_nStartContent
);
2846 if ( (pAttr
->GetStartParagraph() != pAttr
->GetEndParagraph()) &&
2847 !isTXTATR_NOEND(nWhich
) )
2849 pCNd
= pAttr
->m_nEndPara
.GetNode().GetContentNode();
2852 pCNd
= SwNodes::GoPrevious( &(pAttr
->m_nEndPara
) );
2854 pAttr
->m_nEndContent
= pCNd
->Len();
2857 OSL_ENSURE( false, "SetAttr: GoPrevious() failed!" );
2858 aAttrPam
.DeleteMark();
2865 else if( pAttr
->IsLikePara() )
2867 pAttr
->m_nEndContent
= pCNd
->Len();
2870 // because of the deleting of BRs the start index can also
2871 // point behind the end the text
2872 if( pAttr
->m_nEndContent
> pCNd
->Len() )
2873 pAttr
->m_nEndContent
= pCNd
->Len();
2875 aAttrPam
.GetPoint()->Assign( *pCNd
, pAttr
->m_nEndContent
);
2877 aAttrPam
.GetPoint()->GetNodeIndex() ==
2878 rEndPos
.GetNodeIndex() )
2880 // If we're before inserting a table and the attribute ends
2881 // in the current node, then we must end it in the previous
2882 // node or discard it, if it starts in that node.
2883 if( nWhich
!= RES_BREAK
&& nWhich
!= RES_PAGEDESC
&&
2884 !isTXTATR_NOEND(nWhich
) )
2886 if( aAttrPam
.GetMark()->GetNodeIndex() !=
2887 rEndPos
.GetNodeIndex() )
2889 OSL_ENSURE( !aAttrPam
.GetPoint()->GetContentIndex(),
2890 "Content-Position before table not 0???" );
2891 aAttrPam
.Move( fnMoveBackward
);
2895 aAttrPam
.DeleteMark();
2905 case RES_FLTR_BOOKMARK
: // insert bookmark
2907 const OUString
sName( static_cast<SfxStringItem
*>(pAttr
->m_pItem
.get())->GetValue() );
2908 IDocumentMarkAccess
* const pMarkAccess
= m_xDoc
->getIDocumentMarkAccess();
2909 IDocumentMarkAccess::const_iterator_t ppBkmk
= pMarkAccess
->findMark( sName
);
2910 if( ppBkmk
!= pMarkAccess
->getAllMarksEnd() &&
2911 (*ppBkmk
)->GetMarkStart() == *aAttrPam
.GetPoint() )
2912 break; // do not generate duplicates on this position
2913 aAttrPam
.DeleteMark();
2914 const ::sw::mark::IMark
* const pNewMark
= pMarkAccess
->makeMark(
2917 IDocumentMarkAccess::MarkType::BOOKMARK
,
2918 ::sw::mark::InsertMode::New
);
2921 if( JumpToMarks::Mark
== m_eJumpTo
&& pNewMark
->GetName() == m_sJmpMark
)
2923 m_bChkJumpMark
= true;
2924 m_eJumpTo
= JumpToMarks::NONE
;
2928 case RES_TXTATR_FIELD
:
2929 case RES_TXTATR_ANNOTATION
:
2930 case RES_TXTATR_INPUTFIELD
:
2932 SwFieldIds nFieldWhich
=
2934 ? static_cast<const SwFormatField
*>(pAttr
->m_pItem
.get())->GetField()->GetTyp()->Which()
2935 : SwFieldIds::Database
;
2936 if( pPostIts
&& (SwFieldIds::Postit
== nFieldWhich
||
2937 SwFieldIds::Script
== nFieldWhich
) )
2939 pPostIts
->emplace_front( pAttr
);
2943 aFields
.emplace_back( pAttr
);
2946 aAttrPam
.DeleteMark();
2950 // tdf#94088 expand RES_BACKGROUND to the new fill attribute
2951 // definitions in the range [XATTR_FILL_FIRST .. XATTR_FILL_LAST].
2952 // This is the right place in the future if the adapted fill attributes
2953 // may be handled more directly in HTML import to handle them.
2954 case RES_BACKGROUND
:
2956 const SvxBrushItem
& rBrush
= static_cast< SvxBrushItem
& >(*pAttr
->m_pItem
);
2957 SfxItemSetFixed
<XATTR_FILL_FIRST
, XATTR_FILL_LAST
> aNewSet(m_xDoc
->GetAttrPool());
2959 setSvxBrushItemAsFillAttributesToTargetSet(rBrush
, aNewSet
);
2960 m_xDoc
->getIDocumentContentOperations().InsertItemSet(aAttrPam
, aNewSet
, SetAttrMode::DONTREPLACE
);
2968 case RES_MARGIN_FIRSTLINE
:
2969 case RES_MARGIN_TEXTLEFT
:
2970 case RES_MARGIN_RIGHT
:
2971 if( aAttrPam
.GetPoint()->GetNodeIndex() ==
2972 aAttrPam
.GetMark()->GetNodeIndex())
2974 // because of numbering set this attribute directly at node
2975 pCNd
->SetAttr( *pAttr
->m_pItem
);
2979 "LRSpace set over multiple paragraphs!" );
2980 [[fallthrough
]]; // (shouldn't reach this point anyway)
2983 // maybe jump to a bookmark
2984 if( RES_TXTATR_INETFMT
== nWhich
&&
2985 JumpToMarks::Mark
== m_eJumpTo
&&
2986 m_sJmpMark
== static_cast<SwFormatINetFormat
*>(pAttr
->m_pItem
.get())->GetName() )
2988 m_bChkJumpMark
= true;
2989 m_eJumpTo
= JumpToMarks::NONE
;
2992 m_xDoc
->getIDocumentContentOperations().InsertPoolItem( aAttrPam
, *pAttr
->m_pItem
, SetAttrMode::DONTREPLACE
);
2994 aAttrPam
.DeleteMark();
3002 for( auto n
= m_aMoveFlyFrames
.size(); n
; )
3004 SwFrameFormat
*pFrameFormat
= m_aMoveFlyFrames
[--n
]->GetFrameFormat();
3007 SAL_WARN("sw.html", "SwFrameFormat deleted during import");
3008 m_aMoveFlyFrames
.erase( m_aMoveFlyFrames
.begin() + n
);
3009 m_aMoveFlyCnts
.erase( m_aMoveFlyCnts
.begin() + n
);
3013 const SwFormatAnchor
& rAnchor
= pFrameFormat
->GetAnchor();
3014 OSL_ENSURE( RndStdIds::FLY_AT_PARA
== rAnchor
.GetAnchorId(),
3015 "Only At-Para flys need special handling" );
3016 SwNodeOffset nFlyParaIdx
= rAnchor
.GetAnchorNode()->GetIndex();
3020 bMoveFly
= nFlyParaIdx
< rEndPos
.GetNodeIndex() ||
3021 ( nFlyParaIdx
== rEndPos
.GetNodeIndex() &&
3022 m_aMoveFlyCnts
[n
] < nEndCnt
);
3026 SwNodeOffset nEndOfIcons
= m_xDoc
->GetNodes().GetEndOfExtras().GetIndex();
3027 bMoveFly
= nFlyParaIdx
< rEndPos
.GetNodeIndex() ||
3028 rEndPos
.GetNodeIndex() > nEndOfIcons
||
3029 nFlyParaIdx
<= nEndOfIcons
;
3033 pFrameFormat
->DelFrames();
3034 *aAttrPam
.GetPoint() = *rAnchor
.GetContentAnchor();
3035 aAttrPam
.GetPoint()->SetContent( m_aMoveFlyCnts
[n
] );
3036 SwFormatAnchor
aAnchor( rAnchor
);
3037 aAnchor
.SetType( RndStdIds::FLY_AT_CHAR
);
3038 aAnchor
.SetAnchor( aAttrPam
.GetPoint() );
3039 pFrameFormat
->SetFormatAttr( aAnchor
);
3041 const SwFormatHoriOrient
& rHoriOri
= pFrameFormat
->GetHoriOrient();
3042 if( text::HoriOrientation::LEFT
== rHoriOri
.GetHoriOrient() )
3044 SwFormatHoriOrient
aHoriOri( rHoriOri
);
3045 aHoriOri
.SetRelationOrient( text::RelOrientation::CHAR
);
3046 pFrameFormat
->SetFormatAttr( aHoriOri
);
3048 const SwFormatVertOrient
& rVertOri
= pFrameFormat
->GetVertOrient();
3049 if( text::VertOrientation::TOP
== rVertOri
.GetVertOrient() )
3051 SwFormatVertOrient
aVertOri( rVertOri
);
3052 aVertOri
.SetRelationOrient( text::RelOrientation::CHAR
);
3053 pFrameFormat
->SetFormatAttr( aVertOri
);
3056 pFrameFormat
->MakeFrames();
3057 m_aMoveFlyFrames
.erase( m_aMoveFlyFrames
.begin() + n
);
3058 m_aMoveFlyCnts
.erase( m_aMoveFlyCnts
.begin() + n
);
3061 for (auto & field
: aFields
)
3063 pCNd
= field
->m_nStartPara
.GetNode().GetContentNode();
3064 aAttrPam
.GetPoint()->Assign( *pCNd
, field
->m_nStartContent
);
3067 aAttrPam
.GetPoint()->GetNodeIndex() == rEndPos
.GetNodeIndex() )
3069 OSL_ENSURE( !bBeforeTable
, "Aha, the case does occur" );
3070 OSL_ENSURE( !aAttrPam
.GetPoint()->GetContentIndex(),
3071 "Content-Position before table not 0???" );
3073 aAttrPam
.Move( fnMoveBackward
);
3076 m_xDoc
->getIDocumentContentOperations().InsertPoolItem( aAttrPam
, *field
->m_pItem
);
3083 void SwHTMLParser::NewAttr(const std::shared_ptr
<HTMLAttrTable
>& rAttrTable
, HTMLAttr
**ppAttr
, const SfxPoolItem
& rItem
)
3085 // Font height and font colour as well as escape attributes may not be
3086 // combined. Therefore they're saved in a list and in it the last opened
3087 // attribute is at the beginning and count is always one. For all other
3088 // attributes count is just incremented.
3091 HTMLAttr
*pAttr
= new HTMLAttr(*m_pPam
->GetPoint(), rItem
, ppAttr
, rAttrTable
);
3092 pAttr
->InsertNext( *ppAttr
);
3096 (*ppAttr
) = new HTMLAttr(*m_pPam
->GetPoint(), rItem
, ppAttr
, rAttrTable
);
3099 bool SwHTMLParser::EndAttr( HTMLAttr
* pAttr
, bool bChkEmpty
)
3103 // The list header is saved in the attribute.
3104 HTMLAttr
**ppHead
= pAttr
->m_ppHead
;
3106 OSL_ENSURE( ppHead
, "No list header attribute found!" );
3108 // save the current position as end position
3109 const SwPosition
* pEndPos
= m_pPam
->GetPoint();
3110 sal_Int32 nEndCnt
= m_pPam
->GetPoint()->GetContentIndex();
3112 // Is the last started or an earlier started attribute being ended?
3113 HTMLAttr
*pLast
= nullptr;
3114 if( ppHead
&& pAttr
!= *ppHead
)
3116 // The last started attribute isn't being ended
3118 // Then we look for attribute which was started immediately afterwards,
3119 // which has also not yet been ended (otherwise it would no longer be
3122 while( pLast
&& pLast
->GetNext() != pAttr
)
3123 pLast
= pLast
->GetNext();
3125 OSL_ENSURE( pLast
, "Attribute not found in own list!" );
3128 bool bMoveBack
= false;
3129 sal_uInt16 nWhich
= pAttr
->m_pItem
->Which();
3130 if( !nEndCnt
&& RES_PARATR_BEGIN
<= nWhich
&&
3131 pEndPos
->GetNodeIndex() != pAttr
->GetStartParagraph().GetIndex() )
3133 // Then move back one position in the content!
3134 bMoveBack
= m_pPam
->Move( fnMoveBackward
);
3135 nEndCnt
= m_pPam
->GetPoint()->GetContentIndex();
3138 // now end the attribute
3139 HTMLAttr
*pNext
= pAttr
->GetNext();
3142 sal_uInt16 nScriptItem
= 0;
3143 bool bScript
= false;
3144 // does it have a non-empty range?
3145 if( !bChkEmpty
|| (RES_PARATR_BEGIN
<= nWhich
&& bMoveBack
) ||
3146 RES_PAGEDESC
== nWhich
|| RES_BREAK
== nWhich
||
3147 pEndPos
->GetNodeIndex() != pAttr
->GetStartParagraph().GetIndex() ||
3148 nEndCnt
!= pAttr
->GetStartContent() )
3151 // We do some optimization for script dependent attributes here.
3152 if( pEndPos
->GetNodeIndex() == pAttr
->GetStartParagraph().GetIndex() )
3154 lcl_swhtml_getItemInfo( *pAttr
, bScript
, nScriptItem
);
3162 const SwTextNode
*pTextNd
= (bInsert
&& bScript
) ?
3163 pAttr
->GetStartParagraph().GetNode().GetTextNode() :
3168 const OUString
& rText
= pTextNd
->GetText();
3169 sal_uInt16 nScriptText
= g_pBreakIt
->GetBreakIter()->getScriptType(
3170 rText
, pAttr
->GetStartContent() );
3171 sal_Int32 nScriptEnd
= g_pBreakIt
->GetBreakIter()
3172 ->endOfScript( rText
, pAttr
->GetStartContent(), nScriptText
);
3173 while (nScriptEnd
< nEndCnt
&& nScriptEnd
!= -1)
3175 if( nScriptItem
== nScriptText
)
3177 HTMLAttr
*pSetAttr
= pAttr
->Clone( pEndPos
->GetNode(), nScriptEnd
);
3178 pSetAttr
->ClearPrev();
3180 pNext
->InsertPrev( pSetAttr
);
3183 if (pSetAttr
->m_bInsAtStart
)
3184 m_aSetAttrTab
.push_front( pSetAttr
);
3186 m_aSetAttrTab
.push_back( pSetAttr
);
3189 pAttr
->m_nStartContent
= nScriptEnd
;
3190 nScriptText
= g_pBreakIt
->GetBreakIter()->getScriptType(
3191 rText
, nScriptEnd
);
3192 nScriptEnd
= g_pBreakIt
->GetBreakIter()
3193 ->endOfScript( rText
, nScriptEnd
, nScriptText
);
3195 bInsert
= nScriptItem
== nScriptText
;
3199 pAttr
->m_nEndPara
= pEndPos
->GetNode();
3200 pAttr
->m_nEndContent
= nEndCnt
;
3201 pAttr
->m_bInsAtStart
= RES_TXTATR_INETFMT
!= nWhich
&&
3202 RES_TXTATR_CHARFMT
!= nWhich
;
3206 // No open attributes of that type exists any longer, so all
3207 // can be set. Except they depend on another attribute, then
3208 // they're appended there.
3209 if (pAttr
->m_bInsAtStart
)
3210 m_aSetAttrTab
.push_front( pAttr
);
3212 m_aSetAttrTab
.push_back( pAttr
);
3216 // There are other open attributes of that type,
3217 // therefore the setting must be postponed.
3218 // Hence the current attribute is added at the end
3219 // of the Prev-List of the successor.
3220 pNext
->InsertPrev( pAttr
);
3225 // Then don't insert, but delete. Because of the "faking" of styles
3226 // by hard attributing there can be also other empty attributes in the
3227 // Prev-List, which must be set anyway.
3228 HTMLAttr
*pPrev
= pAttr
->GetPrev();
3234 // The previous attributes must be set anyway.
3236 pNext
->InsertPrev( pPrev
);
3239 if (pPrev
->m_bInsAtStart
)
3240 m_aSetAttrTab
.push_front( pPrev
);
3242 m_aSetAttrTab
.push_back( pPrev
);
3248 // If the first attribute of the list was set, then the list header
3249 // must be corrected as well.
3251 pLast
->m_pNext
= pNext
;
3256 m_pPam
->Move( fnMoveForward
);
3261 void SwHTMLParser::DeleteAttr( HTMLAttr
* pAttr
)
3263 // preliminary paragraph attributes are not allowed here, they could
3264 // be set here and then the pointers become invalid!
3265 OSL_ENSURE(m_aParaAttrs
.empty(),
3266 "Danger: there are non-final paragraph attributes");
3267 m_aParaAttrs
.clear();
3269 // The list header is saved in the attribute
3270 HTMLAttr
**ppHead
= pAttr
->m_ppHead
;
3272 OSL_ENSURE( ppHead
, "no list header attribute found!" );
3274 // Is the last started or an earlier started attribute being removed?
3275 HTMLAttr
*pLast
= nullptr;
3276 if( ppHead
&& pAttr
!= *ppHead
)
3278 // The last started attribute isn't being ended
3280 // Then we look for attribute which was started immediately afterwards,
3281 // which has also not yet been ended (otherwise it would no longer be
3284 while( pLast
&& pLast
->GetNext() != pAttr
)
3285 pLast
= pLast
->GetNext();
3287 OSL_ENSURE( pLast
, "Attribute not found in own list!" );
3290 // now delete the attribute
3291 HTMLAttr
*pNext
= pAttr
->GetNext();
3292 HTMLAttr
*pPrev
= pAttr
->GetPrev();
3293 //hold ref to xAttrTab until end of scope to ensure *ppHead validity
3294 std::shared_ptr
<HTMLAttrTable
> xKeepAlive(pAttr
->m_xAttrTab
);
3299 // The previous attributes must be set anyway.
3301 pNext
->InsertPrev( pPrev
);
3304 if (pPrev
->m_bInsAtStart
)
3305 m_aSetAttrTab
.push_front( pPrev
);
3307 m_aSetAttrTab
.push_back( pPrev
);
3311 // If the first attribute of the list was deleted, then the list header
3312 // must be corrected as well.
3314 pLast
->m_pNext
= pNext
;
3319 void SwHTMLParser::SaveAttrTab(std::shared_ptr
<HTMLAttrTable
> const & rNewAttrTab
)
3321 // preliminary paragraph attributes are not allowed here, they could
3322 // be set here and then the pointers become invalid!
3323 OSL_ENSURE(m_aParaAttrs
.empty(),
3324 "Danger: there are non-final paragraph attributes");
3325 m_aParaAttrs
.clear();
3327 HTMLAttr
** pHTMLAttributes
= reinterpret_cast<HTMLAttr
**>(m_xAttrTab
.get());
3328 HTMLAttr
** pSaveAttributes
= reinterpret_cast<HTMLAttr
**>(rNewAttrTab
.get());
3330 for (auto nCnt
= sizeof(HTMLAttrTable
) / sizeof(HTMLAttr
*); nCnt
--; ++pHTMLAttributes
, ++pSaveAttributes
)
3332 *pSaveAttributes
= *pHTMLAttributes
;
3334 HTMLAttr
*pAttr
= *pSaveAttributes
;
3337 pAttr
->SetHead(pSaveAttributes
, rNewAttrTab
);
3338 pAttr
= pAttr
->GetNext();
3341 *pHTMLAttributes
= nullptr;
3345 void SwHTMLParser::SplitAttrTab( std::shared_ptr
<HTMLAttrTable
> const & rNewAttrTab
,
3348 // preliminary paragraph attributes are not allowed here, they could
3349 // be set here and then the pointers become invalid!
3350 OSL_ENSURE(m_aParaAttrs
.empty(),
3351 "Danger: there are non-final paragraph attributes");
3352 m_aParaAttrs
.clear();
3354 SwNodeIndex
nEndIdx( m_pPam
->GetPoint()->GetNode() );
3356 // close all still open attributes and re-open them after the table
3357 HTMLAttr
** pHTMLAttributes
= reinterpret_cast<HTMLAttr
**>(m_xAttrTab
.get());
3358 HTMLAttr
** pSaveAttributes
= reinterpret_cast<HTMLAttr
**>(rNewAttrTab
.get());
3359 bool bSetAttr
= true;
3360 const sal_Int32 nSttCnt
= m_pPam
->GetPoint()->GetContentIndex();
3361 sal_Int32 nEndCnt
= nSttCnt
;
3365 SwNodeOffset nOldEnd
= nEndIdx
.GetIndex();
3366 SwNodeOffset nTmpIdx
;
3367 if( ( nTmpIdx
= m_xDoc
->GetNodes().GetEndOfExtras().GetIndex()) >= nOldEnd
||
3368 ( nTmpIdx
= m_xDoc
->GetNodes().GetEndOfAutotext().GetIndex()) >= nOldEnd
)
3370 nTmpIdx
= m_xDoc
->GetNodes().GetEndOfInserts().GetIndex();
3372 SwContentNode
* pCNd
= SwNodes::GoPrevious(&nEndIdx
);
3374 // Don't set attributes, when the PaM was moved outside of the content area.
3375 bSetAttr
= pCNd
&& nTmpIdx
< nEndIdx
.GetIndex();
3377 nEndCnt
= (bSetAttr
? pCNd
->Len() : 0);
3379 for (auto nCnt
= sizeof(HTMLAttrTable
) / sizeof(HTMLAttr
*); nCnt
--; (++pHTMLAttributes
, ++pSaveAttributes
))
3381 HTMLAttr
*pAttr
= *pHTMLAttributes
;
3382 *pSaveAttributes
= nullptr;
3385 HTMLAttr
*pNext
= pAttr
->GetNext();
3386 HTMLAttr
*pPrev
= pAttr
->GetPrev();
3389 ( pAttr
->GetStartParagraphIdx() < nEndIdx
.GetIndex() ||
3390 (pAttr
->GetStartParagraph() == nEndIdx
&&
3391 pAttr
->GetStartContent() != nEndCnt
) ) )
3393 // The attribute must be set before the list. We need the
3394 // original and therefore we clone it, because pointer to the
3395 // attribute exist in the other contexts. The Next-List is lost
3396 // in doing so, but the Previous-List is preserved.
3397 HTMLAttr
*pSetAttr
= pAttr
->Clone( nEndIdx
.GetNode(), nEndCnt
);
3400 pNext
->InsertPrev( pSetAttr
);
3403 if (pSetAttr
->m_bInsAtStart
)
3404 m_aSetAttrTab
.push_front( pSetAttr
);
3406 m_aSetAttrTab
.push_back( pSetAttr
);
3411 // If the attribute doesn't need to be set before the table, then
3412 // the previous attributes must still be set.
3414 pNext
->InsertPrev( pPrev
);
3417 if (pPrev
->m_bInsAtStart
)
3418 m_aSetAttrTab
.push_front( pPrev
);
3420 m_aSetAttrTab
.push_back( pPrev
);
3424 // set the start of the attribute anew and break link
3425 pAttr
->Reset(m_pPam
->GetPoint()->GetNode(), nSttCnt
, pSaveAttributes
, rNewAttrTab
);
3427 if (*pSaveAttributes
)
3429 HTMLAttr
*pSAttr
= *pSaveAttributes
;
3430 while( pSAttr
->GetNext() )
3431 pSAttr
= pSAttr
->GetNext();
3432 pSAttr
->InsertNext( pAttr
);
3435 *pSaveAttributes
= pAttr
;
3440 *pHTMLAttributes
= nullptr;
3444 void SwHTMLParser::RestoreAttrTab(std::shared_ptr
<HTMLAttrTable
> const & rNewAttrTab
)
3446 // preliminary paragraph attributes are not allowed here, they could
3447 // be set here and then the pointers become invalid!
3448 OSL_ENSURE(m_aParaAttrs
.empty(),
3449 "Danger: there are non-final paragraph attributes");
3450 m_aParaAttrs
.clear();
3452 HTMLAttr
** pHTMLAttributes
= reinterpret_cast<HTMLAttr
**>(m_xAttrTab
.get());
3453 HTMLAttr
** pSaveAttributes
= reinterpret_cast<HTMLAttr
**>(rNewAttrTab
.get());
3455 for (auto nCnt
= sizeof(HTMLAttrTable
) / sizeof(HTMLAttr
*); nCnt
--; ++pHTMLAttributes
, ++pSaveAttributes
)
3457 OSL_ENSURE(!*pHTMLAttributes
, "The attribute table is not empty!");
3459 *pHTMLAttributes
= *pSaveAttributes
;
3461 HTMLAttr
*pAttr
= *pHTMLAttributes
;
3464 OSL_ENSURE( !pAttr
->GetPrev() || !pAttr
->GetPrev()->m_ppHead
,
3465 "Previous attribute has still a header" );
3466 pAttr
->SetHead(pHTMLAttributes
, m_xAttrTab
);
3467 pAttr
= pAttr
->GetNext();
3470 *pSaveAttributes
= nullptr;
3474 void SwHTMLParser::InsertAttr( const SfxPoolItem
& rItem
, bool bInsAtStart
)
3476 HTMLAttr
* pTmp
= new HTMLAttr(*m_pPam
->GetPoint(), rItem
, nullptr, std::shared_ptr
<HTMLAttrTable
>());
3478 m_aSetAttrTab
.push_front( pTmp
);
3480 m_aSetAttrTab
.push_back( pTmp
);
3483 void SwHTMLParser::InsertAttrs( std::deque
<std::unique_ptr
<HTMLAttr
>> rAttrs
)
3485 while( !rAttrs
.empty() )
3487 std::unique_ptr
<HTMLAttr
> pAttr
= std::move(rAttrs
.front());
3488 InsertAttr( pAttr
->GetItem(), false );
3493 void SwHTMLParser::NewStdAttr( HtmlTokenId nToken
)
3495 OUString aId
, aStyle
, aLang
, aDir
;
3498 const HTMLOptions
& rHTMLOptions
= GetOptions();
3499 for (size_t i
= rHTMLOptions
.size(); i
; )
3501 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
3502 switch( rOption
.GetToken() )
3504 case HtmlOptionId::ID
:
3505 aId
= rOption
.GetString();
3507 case HtmlOptionId::STYLE
:
3508 aStyle
= rOption
.GetString();
3510 case HtmlOptionId::CLASS
:
3511 aClass
= rOption
.GetString();
3513 case HtmlOptionId::LANG
:
3514 aLang
= rOption
.GetString();
3516 case HtmlOptionId::DIR:
3517 aDir
= rOption
.GetString();
3523 // create a new context
3524 std::unique_ptr
<HTMLAttrContext
> xCntxt(new HTMLAttrContext(nToken
));
3527 if( HasStyleOptions( aStyle
, aId
, aClass
, &aLang
, &aDir
) )
3529 SfxItemSet
aItemSet( m_xDoc
->GetAttrPool(), m_pCSS1Parser
->GetWhichMap() );
3530 SvxCSS1PropertyInfo aPropInfo
;
3532 if( ParseStyleOptions( aStyle
, aId
, aClass
, aItemSet
, aPropInfo
, &aLang
, &aDir
) )
3534 if( HtmlTokenId::SPAN_ON
!= nToken
|| aClass
.isEmpty() ||
3535 !CreateContainer( aClass
, aItemSet
, aPropInfo
, xCntxt
.get() ) )
3536 DoPositioning( aItemSet
, aPropInfo
, xCntxt
.get() );
3537 InsertAttrs( aItemSet
, aPropInfo
, xCntxt
.get(), true );
3542 PushContext(xCntxt
);
3545 void SwHTMLParser::NewStdAttr( HtmlTokenId nToken
,
3546 HTMLAttr
**ppAttr
, const SfxPoolItem
& rItem
,
3547 HTMLAttr
**ppAttr2
, const SfxPoolItem
*pItem2
,
3548 HTMLAttr
**ppAttr3
, const SfxPoolItem
*pItem3
)
3550 OUString aId
, aStyle
, aClass
, aLang
, aDir
;
3552 const HTMLOptions
& rHTMLOptions
= GetOptions();
3553 for (size_t i
= rHTMLOptions
.size(); i
; )
3555 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
3556 switch( rOption
.GetToken() )
3558 case HtmlOptionId::ID
:
3559 aId
= rOption
.GetString();
3561 case HtmlOptionId::STYLE
:
3562 aStyle
= rOption
.GetString();
3564 case HtmlOptionId::CLASS
:
3565 aClass
= rOption
.GetString();
3567 case HtmlOptionId::LANG
:
3568 aLang
= rOption
.GetString();
3570 case HtmlOptionId::DIR:
3571 aDir
= rOption
.GetString();
3577 // create a new context
3578 std::unique_ptr
<HTMLAttrContext
> xCntxt(new HTMLAttrContext(nToken
));
3581 if( HasStyleOptions( aStyle
, aId
, aClass
, &aLang
, &aDir
) )
3583 SfxItemSet
aItemSet( m_xDoc
->GetAttrPool(), m_pCSS1Parser
->GetWhichMap() );
3584 SvxCSS1PropertyInfo aPropInfo
;
3586 aItemSet
.Put( rItem
);
3588 aItemSet
.Put( *pItem2
);
3590 aItemSet
.Put( *pItem3
);
3592 if( ParseStyleOptions( aStyle
, aId
, aClass
, aItemSet
, aPropInfo
, &aLang
, &aDir
) )
3593 DoPositioning( aItemSet
, aPropInfo
, xCntxt
.get() );
3595 InsertAttrs( aItemSet
, aPropInfo
, xCntxt
.get(), true );
3599 InsertAttr( ppAttr
,rItem
, xCntxt
.get() );
3602 OSL_ENSURE( ppAttr2
, "missing table entry for item2" );
3603 InsertAttr( ppAttr2
, *pItem2
, xCntxt
.get() );
3607 OSL_ENSURE( ppAttr3
, "missing table entry for item3" );
3608 InsertAttr( ppAttr3
, *pItem3
, xCntxt
.get() );
3613 PushContext(xCntxt
);
3616 void SwHTMLParser::EndTag( HtmlTokenId nToken
)
3619 std::unique_ptr
<HTMLAttrContext
> xCntxt(PopContext(getOnToken(nToken
)));
3622 // and maybe end the attributes
3623 EndContext(xCntxt
.get());
3627 void SwHTMLParser::NewBasefontAttr()
3629 OUString aId
, aStyle
, aClass
, aLang
, aDir
;
3630 sal_uInt16 nSize
= 3;
3632 const HTMLOptions
& rHTMLOptions
= GetOptions();
3633 for (size_t i
= rHTMLOptions
.size(); i
; )
3635 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
3636 switch( rOption
.GetToken() )
3638 case HtmlOptionId::SIZE
:
3639 nSize
= o3tl::narrowing
<sal_uInt16
>(rOption
.GetNumber());
3641 case HtmlOptionId::ID
:
3642 aId
= rOption
.GetString();
3644 case HtmlOptionId::STYLE
:
3645 aStyle
= rOption
.GetString();
3647 case HtmlOptionId::CLASS
:
3648 aClass
= rOption
.GetString();
3650 case HtmlOptionId::LANG
:
3651 aLang
= rOption
.GetString();
3653 case HtmlOptionId::DIR:
3654 aDir
= rOption
.GetString();
3666 // create a new context
3667 std::unique_ptr
<HTMLAttrContext
> xCntxt(new HTMLAttrContext(HtmlTokenId::BASEFONT_ON
));
3670 if( HasStyleOptions( aStyle
, aId
, aClass
, &aLang
, &aDir
) )
3672 SfxItemSet
aItemSet( m_xDoc
->GetAttrPool(), m_pCSS1Parser
->GetWhichMap() );
3673 SvxCSS1PropertyInfo aPropInfo
;
3675 //CJK has different defaults
3676 SvxFontHeightItem
aFontHeight( m_aFontHeights
[nSize
-1], 100, RES_CHRATR_FONTSIZE
);
3677 aItemSet
.Put( aFontHeight
);
3678 SvxFontHeightItem
aFontHeightCJK( m_aFontHeights
[nSize
-1], 100, RES_CHRATR_CJK_FONTSIZE
);
3679 aItemSet
.Put( aFontHeightCJK
);
3680 //Complex type can contain so many types of letters,
3681 //that it's not really worthy to bother, IMO.
3682 //Still, I have set a default.
3683 SvxFontHeightItem
aFontHeightCTL( m_aFontHeights
[nSize
-1], 100, RES_CHRATR_CTL_FONTSIZE
);
3684 aItemSet
.Put( aFontHeightCTL
);
3686 if( ParseStyleOptions( aStyle
, aId
, aClass
, aItemSet
, aPropInfo
, &aLang
, &aDir
) )
3687 DoPositioning( aItemSet
, aPropInfo
, xCntxt
.get() );
3689 InsertAttrs( aItemSet
, aPropInfo
, xCntxt
.get(), true );
3693 SvxFontHeightItem
aFontHeight( m_aFontHeights
[nSize
-1], 100, RES_CHRATR_FONTSIZE
);
3694 InsertAttr( &m_xAttrTab
->pFontHeight
, aFontHeight
, xCntxt
.get() );
3695 SvxFontHeightItem
aFontHeightCJK( m_aFontHeights
[nSize
-1], 100, RES_CHRATR_CJK_FONTSIZE
);
3696 InsertAttr( &m_xAttrTab
->pFontHeightCJK
, aFontHeightCJK
, xCntxt
.get() );
3697 SvxFontHeightItem
aFontHeightCTL( m_aFontHeights
[nSize
-1], 100, RES_CHRATR_CTL_FONTSIZE
);
3698 InsertAttr( &m_xAttrTab
->pFontHeightCTL
, aFontHeightCTL
, xCntxt
.get() );
3702 PushContext(xCntxt
);
3704 // save the font size
3705 m_aBaseFontStack
.push_back( nSize
);
3708 void SwHTMLParser::EndBasefontAttr()
3710 EndTag( HtmlTokenId::BASEFONT_ON
);
3712 // avoid stack underflow in tables
3713 if( m_aBaseFontStack
.size() > m_nBaseFontStMin
)
3714 m_aBaseFontStack
.erase( m_aBaseFontStack
.begin() + m_aBaseFontStack
.size() - 1 );
3717 void SwHTMLParser::NewFontAttr( HtmlTokenId nToken
)
3719 sal_uInt16 nBaseSize
=
3720 ( m_aBaseFontStack
.size() > m_nBaseFontStMin
3721 ? (m_aBaseFontStack
[m_aBaseFontStack
.size()-1] & FONTSIZE_MASK
)
3723 sal_uInt16 nFontSize
=
3724 ( m_aFontStack
.size() > m_nFontStMin
3725 ? (m_aFontStack
[m_aFontStack
.size()-1] & FONTSIZE_MASK
)
3728 OUString aFace
, aId
, aStyle
, aClass
, aLang
, aDir
;
3730 sal_uLong nFontHeight
= 0; // actual font height to set
3731 sal_uInt16 nSize
= 0; // font height in Netscape notation (1-7)
3732 bool bColor
= false;
3734 const HTMLOptions
& rHTMLOptions
= GetOptions();
3735 for (size_t i
= rHTMLOptions
.size(); i
; )
3737 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
3738 switch( rOption
.GetToken() )
3740 case HtmlOptionId::SIZE
:
3741 if( HtmlTokenId::FONT_ON
==nToken
&& !rOption
.GetString().isEmpty() )
3744 if( '+' == rOption
.GetString()[0] ||
3745 '-' == rOption
.GetString()[0] )
3746 nSSize
= o3tl::saturating_add
<sal_Int32
>(nBaseSize
, rOption
.GetSNumber());
3748 nSSize
= static_cast<sal_Int32
>(rOption
.GetNumber());
3752 else if( nSSize
> 7 )
3755 nSize
= o3tl::narrowing
<sal_uInt16
>(nSSize
);
3756 nFontHeight
= m_aFontHeights
[nSize
-1];
3759 case HtmlOptionId::COLOR
:
3760 if( HtmlTokenId::FONT_ON
==nToken
)
3762 rOption
.GetColor( aColor
);
3766 case HtmlOptionId::FACE
:
3767 if( HtmlTokenId::FONT_ON
==nToken
)
3768 aFace
= rOption
.GetString();
3770 case HtmlOptionId::ID
:
3771 aId
= rOption
.GetString();
3773 case HtmlOptionId::STYLE
:
3774 aStyle
= rOption
.GetString();
3776 case HtmlOptionId::CLASS
:
3777 aClass
= rOption
.GetString();
3779 case HtmlOptionId::LANG
:
3780 aLang
= rOption
.GetString();
3782 case HtmlOptionId::DIR:
3783 aDir
= rOption
.GetString();
3789 if( HtmlTokenId::FONT_ON
!= nToken
)
3791 // HTML_BIGPRINT_ON or HTML_SMALLPRINT_ON
3793 // In headings the current heading sets the font height
3794 // and not BASEFONT.
3795 const SwFormatColl
*pColl
= GetCurrFormatColl();
3796 sal_uInt16 nPoolId
= pColl
? pColl
->GetPoolFormatId() : 0;
3797 if( nPoolId
>=RES_POOLCOLL_HEADLINE1
&&
3798 nPoolId
<=RES_POOLCOLL_HEADLINE6
)
3800 // If the font height in the heading wasn't changed yet,
3801 // then take the one from the style.
3802 if( m_nFontStHeadStart
==m_aFontStack
.size() )
3803 nFontSize
= static_cast< sal_uInt16
>(6 - (nPoolId
- RES_POOLCOLL_HEADLINE1
));
3808 if( HtmlTokenId::BIGPRINT_ON
== nToken
)
3809 nSize
= ( nFontSize
<7 ? nFontSize
+1 : 7 );
3811 nSize
= ( nFontSize
>1 ? nFontSize
-1 : 1 );
3813 // If possible in headlines we fetch the new font height
3815 if( nPoolId
&& nSize
>=1 && nSize
<=6 )
3817 m_pCSS1Parser
->GetTextCollFromPool(
3818 RES_POOLCOLL_HEADLINE1
+6-nSize
)->GetSize().GetHeight();
3820 nFontHeight
= m_aFontHeights
[nSize
-1];
3823 OSL_ENSURE( !nSize
== !nFontHeight
, "HTML-Font-Size != Font-Height" );
3826 const OUString aStyleName
;
3827 FontFamily eFamily
= FAMILY_DONTKNOW
; // family and pitch,
3828 FontPitch ePitch
= PITCH_DONTKNOW
; // if not found
3829 rtl_TextEncoding eEnc
= osl_getThreadTextEncoding();
3831 if( !aFace
.isEmpty() && !m_pCSS1Parser
->IsIgnoreFontFamily() )
3833 const FontList
*pFList
= nullptr;
3834 SwDocShell
*pDocSh
= m_xDoc
->GetDocShell();
3837 const SvxFontListItem
*pFListItem
=
3838 static_cast<const SvxFontListItem
*>(pDocSh
->GetItem(SID_ATTR_CHAR_FONTLIST
));
3840 pFList
= pFListItem
->GetFontList();
3843 bool bFound
= false;
3844 sal_Int32 nStrPos
= 0;
3845 while( nStrPos
!= -1 )
3847 OUString aFName
= aFace
.getToken( 0, ',', nStrPos
);
3848 aFName
= comphelper::string::strip(aFName
, ' ');
3849 if( !aFName
.isEmpty() )
3851 if( !bFound
&& pFList
)
3853 sal_Handle hFont
= pFList
->GetFirstFontMetric( aFName
);
3854 if( nullptr != hFont
)
3856 const FontMetric
& rFMetric
= FontList::GetFontMetric( hFont
);
3857 if( RTL_TEXTENCODING_DONTKNOW
!= rFMetric
.GetCharSet() )
3860 if( RTL_TEXTENCODING_SYMBOL
== rFMetric
.GetCharSet() )
3861 eEnc
= RTL_TEXTENCODING_SYMBOL
;
3865 if( !aFontName
.isEmpty() )
3867 aFontName
+= aFName
;
3872 // create a new context
3873 std::unique_ptr
<HTMLAttrContext
> xCntxt(new HTMLAttrContext(nToken
));
3876 if( HasStyleOptions( aStyle
, aId
, aClass
, &aLang
, &aDir
) )
3878 SfxItemSet
aItemSet( m_xDoc
->GetAttrPool(), m_pCSS1Parser
->GetWhichMap() );
3879 SvxCSS1PropertyInfo aPropInfo
;
3883 SvxFontHeightItem
aFontHeight( nFontHeight
, 100, RES_CHRATR_FONTSIZE
);
3884 aItemSet
.Put( aFontHeight
);
3885 SvxFontHeightItem
aFontHeightCJK( nFontHeight
, 100, RES_CHRATR_CJK_FONTSIZE
);
3886 aItemSet
.Put( aFontHeightCJK
);
3887 SvxFontHeightItem
aFontHeightCTL( nFontHeight
, 100, RES_CHRATR_CTL_FONTSIZE
);
3888 aItemSet
.Put( aFontHeightCTL
);
3891 aItemSet
.Put( SvxColorItem(aColor
, RES_CHRATR_COLOR
) );
3892 if( !aFontName
.isEmpty() )
3894 SvxFontItem
aFont( eFamily
, aFontName
, aStyleName
, ePitch
, eEnc
, RES_CHRATR_FONT
);
3895 aItemSet
.Put( aFont
);
3896 SvxFontItem
aFontCJK( eFamily
, aFontName
, aStyleName
, ePitch
, eEnc
, RES_CHRATR_CJK_FONT
);
3897 aItemSet
.Put( aFontCJK
);
3898 SvxFontItem
aFontCTL( eFamily
, aFontName
, aStyleName
, ePitch
, eEnc
, RES_CHRATR_CTL_FONT
);
3899 aItemSet
.Put( aFontCTL
);
3902 if( ParseStyleOptions( aStyle
, aId
, aClass
, aItemSet
, aPropInfo
, &aLang
, &aDir
) )
3903 DoPositioning( aItemSet
, aPropInfo
, xCntxt
.get() );
3905 InsertAttrs( aItemSet
, aPropInfo
, xCntxt
.get(), true );
3911 SvxFontHeightItem
aFontHeight( nFontHeight
, 100, RES_CHRATR_FONTSIZE
);
3912 InsertAttr( &m_xAttrTab
->pFontHeight
, aFontHeight
, xCntxt
.get() );
3913 SvxFontHeightItem
aFontHeightCJK( nFontHeight
, 100, RES_CHRATR_CJK_FONTSIZE
);
3914 InsertAttr( &m_xAttrTab
->pFontHeight
, aFontHeightCJK
, xCntxt
.get() );
3915 SvxFontHeightItem
aFontHeightCTL( nFontHeight
, 100, RES_CHRATR_CTL_FONTSIZE
);
3916 InsertAttr( &m_xAttrTab
->pFontHeight
, aFontHeightCTL
, xCntxt
.get() );
3919 InsertAttr( &m_xAttrTab
->pFontColor
, SvxColorItem(aColor
, RES_CHRATR_COLOR
), xCntxt
.get() );
3920 if( !aFontName
.isEmpty() )
3922 SvxFontItem
aFont( eFamily
, aFontName
, aStyleName
, ePitch
, eEnc
, RES_CHRATR_FONT
);
3923 InsertAttr( &m_xAttrTab
->pFont
, aFont
, xCntxt
.get() );
3924 SvxFontItem
aFontCJK( eFamily
, aFontName
, aStyleName
, ePitch
, eEnc
, RES_CHRATR_CJK_FONT
);
3925 InsertAttr( &m_xAttrTab
->pFont
, aFontCJK
, xCntxt
.get() );
3926 SvxFontItem
aFontCTL( eFamily
, aFontName
, aStyleName
, ePitch
, eEnc
, RES_CHRATR_CTL_FONT
);
3927 InsertAttr( &m_xAttrTab
->pFont
, aFontCTL
, xCntxt
.get() );
3932 PushContext(xCntxt
);
3934 m_aFontStack
.push_back( nSize
);
3937 void SwHTMLParser::EndFontAttr( HtmlTokenId nToken
)
3941 // avoid stack underflow in tables
3942 if( m_aFontStack
.size() > m_nFontStMin
)
3943 m_aFontStack
.erase( m_aFontStack
.begin() + m_aFontStack
.size() - 1 );
3946 void SwHTMLParser::NewPara()
3948 if( m_pPam
->GetPoint()->GetContentIndex() )
3949 AppendTextNode( AM_SPACE
);
3953 m_eParaAdjust
= SvxAdjust::End
;
3954 OUString aId
, aStyle
, aClass
, aLang
, aDir
;
3956 const HTMLOptions
& rHTMLOptions
= GetOptions();
3957 for (size_t i
= rHTMLOptions
.size(); i
; )
3959 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
3960 switch( rOption
.GetToken() )
3962 case HtmlOptionId::ID
:
3963 aId
= rOption
.GetString();
3965 case HtmlOptionId::ALIGN
:
3966 m_eParaAdjust
= rOption
.GetEnum( aHTMLPAlignTable
, m_eParaAdjust
);
3968 case HtmlOptionId::STYLE
:
3969 aStyle
= rOption
.GetString();
3971 case HtmlOptionId::CLASS
:
3972 aClass
= rOption
.GetString();
3974 case HtmlOptionId::LANG
:
3975 aLang
= rOption
.GetString();
3977 case HtmlOptionId::DIR:
3978 aDir
= rOption
.GetString();
3984 // create a new context
3985 std::unique_ptr
<HTMLAttrContext
> xCntxt(
3986 !aClass
.isEmpty() ? new HTMLAttrContext( HtmlTokenId::PARABREAK_ON
,
3987 RES_POOLCOLL_TEXT
, aClass
)
3988 : new HTMLAttrContext( HtmlTokenId::PARABREAK_ON
));
3990 // parse styles (Don't consider class. This is only possible as long as none of
3991 // the CSS1 properties of the class must be formatted hard!!!)
3992 if (HasStyleOptions(aStyle
, aId
, {}, &aLang
, &aDir
))
3994 SfxItemSet
aItemSet( m_xDoc
->GetAttrPool(), m_pCSS1Parser
->GetWhichMap() );
3995 SvxCSS1PropertyInfo aPropInfo
;
3997 if (ParseStyleOptions(aStyle
, aId
, OUString(), aItemSet
, aPropInfo
, &aLang
, &aDir
))
3999 OSL_ENSURE( aClass
.isEmpty() || !m_pCSS1Parser
->GetClass( aClass
),
4000 "Class is not considered" );
4001 DoPositioning( aItemSet
, aPropInfo
, xCntxt
.get() );
4002 InsertAttrs( aItemSet
, aPropInfo
, xCntxt
.get() );
4006 if( SvxAdjust::End
!= m_eParaAdjust
)
4007 InsertAttr( &m_xAttrTab
->pAdjust
, SvxAdjustItem(m_eParaAdjust
, RES_PARATR_ADJUST
), xCntxt
.get() );
4009 // and push on stack
4010 PushContext( xCntxt
);
4012 // set the current style or its attributes
4013 SetTextCollAttrs( !aClass
.isEmpty() ? m_aContexts
.back().get() : nullptr );
4018 OSL_ENSURE( m_nOpenParaToken
== HtmlTokenId::NONE
, "Now an open paragraph element will be lost." );
4019 m_nOpenParaToken
= HtmlTokenId::PARABREAK_ON
;
4022 void SwHTMLParser::EndPara( bool bReal
)
4024 if (HtmlTokenId::LI_ON
==m_nOpenParaToken
&& m_xTable
)
4026 #if OSL_DEBUG_LEVEL > 0
4027 const SwNumRule
*pNumRule
= m_pPam
->GetPointNode().GetTextNode()->GetNumRule();
4028 OSL_ENSURE( pNumRule
, "Where is the NumRule" );
4032 // Netscape skips empty paragraphs, we do the same; unless in XHTML mode, which prefers mapping
4033 // the source document to the doc model 1:1 if possible.
4036 if (m_pPam
->GetPoint()->GetContentIndex() || m_bXHTML
)
4037 AppendTextNode( AM_SPACE
);
4042 // If a DD or DT was open, it's an implied definition list,
4043 // which must be closed now.
4044 if( (m_nOpenParaToken
== HtmlTokenId::DT_ON
|| m_nOpenParaToken
== HtmlTokenId::DD_ON
) &&
4050 // Pop the context of the stack. It can also be from an
4051 // implied opened definition list.
4052 std::unique_ptr
<HTMLAttrContext
> xCntxt(
4053 PopContext( m_nOpenParaToken
!= HtmlTokenId::NONE
? getOnToken(m_nOpenParaToken
) : HtmlTokenId::PARABREAK_ON
));
4058 EndContext(xCntxt
.get());
4059 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4063 // reset the existing style
4067 m_nOpenParaToken
= HtmlTokenId::NONE
;
4070 void SwHTMLParser::NewHeading( HtmlTokenId nToken
)
4072 m_eParaAdjust
= SvxAdjust::End
;
4074 OUString aId
, aStyle
, aClass
, aLang
, aDir
;
4076 const HTMLOptions
& rHTMLOptions
= GetOptions();
4077 for (size_t i
= rHTMLOptions
.size(); i
; )
4079 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
4080 switch( rOption
.GetToken() )
4082 case HtmlOptionId::ID
:
4083 aId
= rOption
.GetString();
4085 case HtmlOptionId::ALIGN
:
4086 m_eParaAdjust
= rOption
.GetEnum( aHTMLPAlignTable
, m_eParaAdjust
);
4088 case HtmlOptionId::STYLE
:
4089 aStyle
= rOption
.GetString();
4091 case HtmlOptionId::CLASS
:
4092 aClass
= rOption
.GetString();
4094 case HtmlOptionId::LANG
:
4095 aLang
= rOption
.GetString();
4097 case HtmlOptionId::DIR:
4098 aDir
= rOption
.GetString();
4104 // open a new paragraph
4105 if( m_pPam
->GetPoint()->GetContentIndex() )
4106 AppendTextNode( AM_SPACE
);
4110 // search for the matching style
4111 sal_uInt16 nTextColl
;
4114 case HtmlTokenId::HEAD1_ON
: nTextColl
= RES_POOLCOLL_HEADLINE1
; break;
4115 case HtmlTokenId::HEAD2_ON
: nTextColl
= RES_POOLCOLL_HEADLINE2
; break;
4116 case HtmlTokenId::HEAD3_ON
: nTextColl
= RES_POOLCOLL_HEADLINE3
; break;
4117 case HtmlTokenId::HEAD4_ON
: nTextColl
= RES_POOLCOLL_HEADLINE4
; break;
4118 case HtmlTokenId::HEAD5_ON
: nTextColl
= RES_POOLCOLL_HEADLINE5
; break;
4119 case HtmlTokenId::HEAD6_ON
: nTextColl
= RES_POOLCOLL_HEADLINE6
; break;
4120 default: nTextColl
= RES_POOLCOLL_STANDARD
; break;
4123 // create the context
4124 std::unique_ptr
<HTMLAttrContext
> xCntxt(new HTMLAttrContext(nToken
, nTextColl
, aClass
));
4126 // parse styles (regarding class see also NewPara)
4127 if (HasStyleOptions(aStyle
, aId
, {}, &aLang
, &aDir
))
4129 SfxItemSet
aItemSet( m_xDoc
->GetAttrPool(), m_pCSS1Parser
->GetWhichMap() );
4130 SvxCSS1PropertyInfo aPropInfo
;
4132 if (ParseStyleOptions(aStyle
, aId
, OUString(), aItemSet
, aPropInfo
, &aLang
, &aDir
))
4134 OSL_ENSURE( aClass
.isEmpty() || !m_pCSS1Parser
->GetClass( aClass
),
4135 "Class is not considered" );
4136 DoPositioning( aItemSet
, aPropInfo
, xCntxt
.get() );
4137 InsertAttrs( aItemSet
, aPropInfo
, xCntxt
.get() );
4141 if( SvxAdjust::End
!= m_eParaAdjust
)
4142 InsertAttr( &m_xAttrTab
->pAdjust
, SvxAdjustItem(m_eParaAdjust
, RES_PARATR_ADJUST
), xCntxt
.get() );
4144 // and push on stack
4145 PushContext(xCntxt
);
4147 // set the current style or its attributes
4148 SetTextCollAttrs(m_aContexts
.back().get());
4150 m_nFontStHeadStart
= m_aFontStack
.size();
4156 void SwHTMLParser::EndHeading()
4158 // open a new paragraph
4159 if( m_pPam
->GetPoint()->GetContentIndex() )
4160 AppendTextNode( AM_SPACE
);
4164 // search context matching the token and fetch it from stack
4165 std::unique_ptr
<HTMLAttrContext
> xCntxt
;
4166 auto nPos
= m_aContexts
.size();
4167 while( !xCntxt
&& nPos
>m_nContextStMin
)
4169 switch( m_aContexts
[--nPos
]->GetToken() )
4171 case HtmlTokenId::HEAD1_ON
:
4172 case HtmlTokenId::HEAD2_ON
:
4173 case HtmlTokenId::HEAD3_ON
:
4174 case HtmlTokenId::HEAD4_ON
:
4175 case HtmlTokenId::HEAD5_ON
:
4176 case HtmlTokenId::HEAD6_ON
:
4177 xCntxt
= std::move(m_aContexts
[nPos
]);
4178 m_aContexts
.erase( m_aContexts
.begin() + nPos
);
4184 // and now end attributes
4187 EndContext(xCntxt
.get());
4188 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4192 // reset existing style
4195 m_nFontStHeadStart
= m_nFontStMin
;
4198 void SwHTMLParser::NewTextFormatColl( HtmlTokenId nToken
, sal_uInt16 nColl
)
4200 OUString aId
, aStyle
, aClass
, aLang
, aDir
;
4202 const HTMLOptions
& rHTMLOptions
= GetOptions();
4203 for (size_t i
= rHTMLOptions
.size(); i
; )
4205 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
4206 switch( rOption
.GetToken() )
4208 case HtmlOptionId::ID
:
4209 aId
= rOption
.GetString();
4211 case HtmlOptionId::STYLE
:
4212 aStyle
= rOption
.GetString();
4214 case HtmlOptionId::CLASS
:
4215 aClass
= rOption
.GetString();
4217 case HtmlOptionId::LANG
:
4218 aLang
= rOption
.GetString();
4220 case HtmlOptionId::DIR:
4221 aDir
= rOption
.GetString();
4227 // open a new paragraph
4228 SwHTMLAppendMode eMode
= AM_NORMAL
;
4231 case HtmlTokenId::LISTING_ON
:
4232 case HtmlTokenId::XMP_ON
:
4233 // These both tags will be mapped to the PRE style. For the case that a
4234 // a CLASS exists we will delete it so that we don't get the CLASS of
4238 case HtmlTokenId::BLOCKQUOTE_ON
:
4239 case HtmlTokenId::BLOCKQUOTE30_ON
:
4240 case HtmlTokenId::PREFORMTXT_ON
:
4243 case HtmlTokenId::ADDRESS_ON
:
4244 eMode
= AM_NOSPACE
; // ADDRESS can follow on a <P> without </P>
4246 case HtmlTokenId::DT_ON
:
4247 case HtmlTokenId::DD_ON
:
4248 eMode
= AM_SOFTNOSPACE
;
4251 OSL_ENSURE( false, "unknown style" );
4254 if( m_pPam
->GetPoint()->GetContentIndex() )
4255 AppendTextNode( eMode
);
4256 else if( AM_SPACE
==eMode
)
4259 // ... and save in a context
4260 std::unique_ptr
<HTMLAttrContext
> xCntxt(new HTMLAttrContext(nToken
, nColl
, aClass
));
4262 // parse styles (regarding class see also NewPara)
4263 if (HasStyleOptions(aStyle
, aId
, {}, &aLang
, &aDir
))
4265 SfxItemSet
aItemSet( m_xDoc
->GetAttrPool(), m_pCSS1Parser
->GetWhichMap() );
4266 SvxCSS1PropertyInfo aPropInfo
;
4268 if (ParseStyleOptions(aStyle
, aId
, OUString(), aItemSet
, aPropInfo
, &aLang
, &aDir
))
4270 OSL_ENSURE( aClass
.isEmpty() || !m_pCSS1Parser
->GetClass( aClass
),
4271 "Class is not considered" );
4272 DoPositioning( aItemSet
, aPropInfo
, xCntxt
.get() );
4273 InsertAttrs( aItemSet
, aPropInfo
, xCntxt
.get() );
4277 PushContext(xCntxt
);
4279 // set the new style
4280 SetTextCollAttrs(m_aContexts
.back().get());
4282 // update progress bar
4286 void SwHTMLParser::EndTextFormatColl( HtmlTokenId nToken
)
4288 SwHTMLAppendMode eMode
= AM_NORMAL
;
4289 switch( getOnToken(nToken
) )
4291 case HtmlTokenId::BLOCKQUOTE_ON
:
4292 case HtmlTokenId::BLOCKQUOTE30_ON
:
4293 case HtmlTokenId::PREFORMTXT_ON
:
4294 case HtmlTokenId::LISTING_ON
:
4295 case HtmlTokenId::XMP_ON
:
4298 case HtmlTokenId::ADDRESS_ON
:
4299 case HtmlTokenId::DT_ON
:
4300 case HtmlTokenId::DD_ON
:
4301 eMode
= AM_SOFTNOSPACE
;
4304 OSL_ENSURE( false, "unknown style" );
4307 if( m_pPam
->GetPoint()->GetContentIndex() )
4308 AppendTextNode( eMode
);
4309 else if( AM_SPACE
==eMode
)
4312 // pop current context of stack
4313 std::unique_ptr
<HTMLAttrContext
> xCntxt(PopContext(getOnToken(nToken
)));
4315 // and now end attributes
4318 EndContext(xCntxt
.get());
4319 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4323 // reset existing style
4327 void SwHTMLParser::NewDefList()
4329 OUString aId
, aStyle
, aClass
, aLang
, aDir
;
4331 const HTMLOptions
& rHTMLOptions
= GetOptions();
4332 for (size_t i
= rHTMLOptions
.size(); i
; )
4334 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
4335 switch( rOption
.GetToken() )
4337 case HtmlOptionId::ID
:
4338 aId
= rOption
.GetString();
4340 case HtmlOptionId::STYLE
:
4341 aStyle
= rOption
.GetString();
4343 case HtmlOptionId::CLASS
:
4344 aClass
= rOption
.GetString();
4346 case HtmlOptionId::LANG
:
4347 aLang
= rOption
.GetString();
4349 case HtmlOptionId::DIR:
4350 aDir
= rOption
.GetString();
4356 // open a new paragraph
4357 bool bSpace
= (GetNumInfo().GetDepth() + m_nDefListDeep
) == 0;
4358 if( m_pPam
->GetPoint()->GetContentIndex() )
4359 AppendTextNode( bSpace
? AM_SPACE
: AM_SOFTNOSPACE
);
4366 bool bInDD
= false, bNotInDD
= false;
4367 auto nPos
= m_aContexts
.size();
4368 while( !bInDD
&& !bNotInDD
&& nPos
>m_nContextStMin
)
4370 HtmlTokenId nCntxtToken
= m_aContexts
[--nPos
]->GetToken();
4371 switch( nCntxtToken
)
4373 case HtmlTokenId::DEFLIST_ON
:
4374 case HtmlTokenId::DIRLIST_ON
:
4375 case HtmlTokenId::MENULIST_ON
:
4376 case HtmlTokenId::ORDERLIST_ON
:
4377 case HtmlTokenId::UNORDERLIST_ON
:
4380 case HtmlTokenId::DD_ON
:
4387 // ... and save in a context
4388 std::unique_ptr
<HTMLAttrContext
> xCntxt(new HTMLAttrContext(HtmlTokenId::DEFLIST_ON
));
4390 // in it save also the margins
4391 sal_uInt16 nLeft
=0, nRight
=0;
4393 GetMarginsFromContext( nLeft
, nRight
, nIndent
);
4395 // The indentation, which already results from a DL, correlates with a DT
4396 // on the current level and this correlates to a DD from the previous level.
4397 // For a level >=2 we must add DD distance.
4398 if( !bInDD
&& m_nDefListDeep
> 1 )
4401 // and the one of the DT-style of the current level
4402 SvxTextLeftMarginItem
const& rTextLeftMargin
=
4403 m_pCSS1Parser
->GetTextFormatColl(RES_POOLCOLL_HTML_DD
, OUString())
4404 ->GetTextLeftMargin();
4405 nLeft
= nLeft
+ static_cast<sal_uInt16
>(rTextLeftMargin
.GetTextLeft());
4408 xCntxt
->SetMargins( nLeft
, nRight
, nIndent
);
4411 if( HasStyleOptions( aStyle
, aId
, aClass
, &aLang
, &aDir
) )
4413 SfxItemSet
aItemSet( m_xDoc
->GetAttrPool(), m_pCSS1Parser
->GetWhichMap() );
4414 SvxCSS1PropertyInfo aPropInfo
;
4416 if( ParseStyleOptions( aStyle
, aId
, aClass
, aItemSet
, aPropInfo
, &aLang
, &aDir
) )
4418 DoPositioning( aItemSet
, aPropInfo
, xCntxt
.get() );
4419 InsertAttrs( aItemSet
, aPropInfo
, xCntxt
.get() );
4423 PushContext(xCntxt
);
4425 // set the attributes of the new style
4426 if( m_nDefListDeep
> 1 )
4427 SetTextCollAttrs(m_aContexts
.back().get());
4430 void SwHTMLParser::EndDefList()
4432 bool bSpace
= (GetNumInfo().GetDepth() + m_nDefListDeep
) == 1;
4433 if( m_pPam
->GetPoint()->GetContentIndex() )
4434 AppendTextNode( bSpace
? AM_SPACE
: AM_SOFTNOSPACE
);
4439 if( m_nDefListDeep
> 0 )
4442 // pop current context of stack
4443 std::unique_ptr
<HTMLAttrContext
> xCntxt(PopContext(HtmlTokenId::DEFLIST_ON
));
4445 // and now end attributes
4448 EndContext(xCntxt
.get());
4449 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4457 void SwHTMLParser::NewDefListItem( HtmlTokenId nToken
)
4459 // determine if the DD/DT exist in a DL
4460 bool bInDefList
= false, bNotInDefList
= false;
4461 auto nPos
= m_aContexts
.size();
4462 while( !bInDefList
&& !bNotInDefList
&& nPos
>m_nContextStMin
)
4464 HtmlTokenId nCntxtToken
= m_aContexts
[--nPos
]->GetToken();
4465 switch( nCntxtToken
)
4467 case HtmlTokenId::DEFLIST_ON
:
4470 case HtmlTokenId::DIRLIST_ON
:
4471 case HtmlTokenId::MENULIST_ON
:
4472 case HtmlTokenId::ORDERLIST_ON
:
4473 case HtmlTokenId::UNORDERLIST_ON
:
4474 bNotInDefList
= true;
4480 // if not, then implicitly open a new DL
4484 OSL_ENSURE( m_nOpenParaToken
== HtmlTokenId::NONE
,
4485 "Now an open paragraph element will be lost." );
4486 m_nOpenParaToken
= nToken
;
4489 NewTextFormatColl( nToken
, static_cast< sal_uInt16
>(nToken
==HtmlTokenId::DD_ON
? RES_POOLCOLL_HTML_DD
4490 : RES_POOLCOLL_HTML_DT
) );
4493 void SwHTMLParser::EndDefListItem( HtmlTokenId nToken
)
4495 // open a new paragraph
4496 if( nToken
== HtmlTokenId::NONE
&& m_pPam
->GetPoint()->GetContentIndex() )
4497 AppendTextNode( AM_SOFTNOSPACE
);
4499 // search context matching the token and fetch it from stack
4500 nToken
= getOnToken(nToken
);
4501 std::unique_ptr
<HTMLAttrContext
> xCntxt
;
4502 auto nPos
= m_aContexts
.size();
4503 while( !xCntxt
&& nPos
>m_nContextStMin
)
4505 HtmlTokenId nCntxtToken
= m_aContexts
[--nPos
]->GetToken();
4506 switch( nCntxtToken
)
4508 case HtmlTokenId::DD_ON
:
4509 case HtmlTokenId::DT_ON
:
4510 if( nToken
== HtmlTokenId::NONE
|| nToken
== nCntxtToken
)
4512 xCntxt
= std::move(m_aContexts
[nPos
]);
4513 m_aContexts
.erase( m_aContexts
.begin() + nPos
);
4516 case HtmlTokenId::DEFLIST_ON
:
4517 // don't look at DD/DT outside the current DefList
4518 case HtmlTokenId::DIRLIST_ON
:
4519 case HtmlTokenId::MENULIST_ON
:
4520 case HtmlTokenId::ORDERLIST_ON
:
4521 case HtmlTokenId::UNORDERLIST_ON
:
4522 // and also not outside another list
4523 nPos
= m_nContextStMin
;
4529 // and now end attributes
4532 EndContext(xCntxt
.get());
4533 SetAttr(); // because of JavaScript set paragraph attributes as fast as possible
4539 * @param bNoSurroundOnly The paragraph contains at least one frame
4541 * @param bSurroundOnly The paragraph contains at least one frame
4542 * with wrapping, but none without wrapping.
4544 * Otherwise the paragraph contains any frame.
4546 bool SwHTMLParser::HasCurrentParaFlys( bool bNoSurroundOnly
,
4547 bool bSurroundOnly
) const
4549 SwNode
& rNode
= m_pPam
->GetPoint()->GetNode();
4552 bool bFound
= false;
4553 for(sw::SpzFrameFormat
* pFormat
: *m_xDoc
->GetSpzFrameFormats())
4555 SwFormatAnchor
const*const pAnchor
= &pFormat
->GetAnchor();
4556 // A frame was found, when
4557 // - it is paragraph-bound, and
4558 // - is anchored in current paragraph, and
4559 // - every paragraph-bound frame counts, or
4560 // - (only frames without wrapping count and) the frame doesn't have
4562 SwNode
const*const pAnchorNode
= pAnchor
->GetAnchorNode();
4564 ((RndStdIds::FLY_AT_PARA
== pAnchor
->GetAnchorId()) ||
4565 (RndStdIds::FLY_AT_CHAR
== pAnchor
->GetAnchorId())) &&
4566 *pAnchorNode
== rNode
)
4568 if( !(bNoSurroundOnly
|| bSurroundOnly
) )
4575 // When looking for frames with wrapping, also disregard
4576 // ones with wrap-through. In this case it's (still) HIDDEN-Controls,
4577 // and you don't want to evade those when positioning.
4578 css::text::WrapTextMode eSurround
= pFormat
->GetSurround().GetSurround();
4579 if( bNoSurroundOnly
)
4581 if( css::text::WrapTextMode_NONE
==eSurround
)
4589 if( css::text::WrapTextMode_NONE
==eSurround
)
4594 else if( css::text::WrapTextMode_THROUGH
!=eSurround
)
4597 // Continue searching: It's possible that some without
4598 // wrapping will follow...
4608 // the special methods for inserting of objects
4610 const SwFormatColl
*SwHTMLParser::GetCurrFormatColl() const
4612 const SwContentNode
* pCNd
= m_pPam
->GetPointContentNode();
4613 return pCNd
? &pCNd
->GetAnyFormatColl() : nullptr;
4616 void SwHTMLParser::SetTextCollAttrs( HTMLAttrContext
*pContext
)
4618 SwTextFormatColl
*pCollToSet
= nullptr; // the style to set
4619 SfxItemSet
*pItemSet
= nullptr; // set of hard attributes
4620 sal_uInt16 nTopColl
= pContext
? pContext
->GetTextFormatColl() : 0;
4621 const OUString rTopClass
= pContext
? pContext
->GetClass() : OUString();
4622 sal_uInt16 nDfltColl
= RES_POOLCOLL_TEXT
;
4624 bool bInPRE
=false; // some context info
4626 sal_uInt16 nLeftMargin
= 0, nRightMargin
= 0; // the margins and
4627 short nFirstLineIndent
= 0; // indentations
4629 auto nDepth
= m_aContexts
.size();
4630 if (bFuzzing
&& nDepth
> 128)
4632 SAL_WARN("sw.html", "Not applying any more text collection attributes to a deeply nested node for fuzzing performance");
4636 for (auto i
= m_nContextStAttrMin
; i
< nDepth
; ++i
)
4638 const HTMLAttrContext
*pCntxt
= m_aContexts
[i
].get();
4640 sal_uInt16 nColl
= pCntxt
->GetTextFormatColl();
4643 // There is a style to set. Then at first we must decide,
4644 // if the style can be set.
4645 bool bSetThis
= true;
4648 case RES_POOLCOLL_HTML_PRE
:
4651 case RES_POOLCOLL_TEXT
:
4652 // <TD><P CLASS=xxx> must become TD.xxx
4653 if( nDfltColl
==RES_POOLCOLL_TABLE
||
4654 nDfltColl
==RES_POOLCOLL_TABLE_HDLN
)
4657 case RES_POOLCOLL_HTML_HR
:
4658 // also <HR> in <PRE> set as style, otherwise it can't
4659 // be exported anymore
4667 SwTextFormatColl
*pNewColl
=
4668 m_pCSS1Parser
->GetTextFormatColl( nColl
, pCntxt
->GetClass() );
4672 // If now a different style should be set as previously, the
4673 // previous style must be replaced by hard attribution.
4677 // insert the attributes hard, which previous style sets
4679 pItemSet
= new SfxItemSet( pCollToSet
->GetAttrSet() );
4682 const SfxItemSet
& rCollSet
= pCollToSet
->GetAttrSet();
4683 SfxItemSet
aItemSet( *rCollSet
.GetPool(),
4684 rCollSet
.GetRanges() );
4685 aItemSet
.Set( rCollSet
);
4686 pItemSet
->Put( aItemSet
);
4688 // but remove the attributes, which the current style sets,
4689 // because otherwise they will be overwritten later
4690 pItemSet
->Differentiate( pNewColl
->GetAttrSet() );
4693 pCollToSet
= pNewColl
;
4699 pItemSet
= new SfxItemSet( pNewColl
->GetAttrSet() );
4702 const SfxItemSet
& rCollSet
= pNewColl
->GetAttrSet();
4703 SfxItemSet
aItemSet( *rCollSet
.GetPool(),
4704 rCollSet
.GetRanges() );
4705 aItemSet
.Set( rCollSet
);
4706 pItemSet
->Put( aItemSet
);
4712 // Maybe a default style exists?
4713 nColl
= pCntxt
->GetDefaultTextFormatColl();
4718 // if applicable fetch new paragraph indents
4719 if( pCntxt
->IsLRSpaceChanged() )
4721 sal_uInt16 nLeft
=0, nRight
=0;
4723 pCntxt
->GetMargins( nLeft
, nRight
, nFirstLineIndent
);
4724 nLeftMargin
= nLeft
;
4725 nRightMargin
= nRight
;
4729 // If in current context a new style should be set,
4730 // its paragraph margins must be inserted in the context.
4731 if( pContext
&& nTopColl
)
4733 // <TD><P CLASS=xxx> must become TD.xxx
4734 if( nTopColl
==RES_POOLCOLL_TEXT
&&
4735 (nDfltColl
==RES_POOLCOLL_TABLE
||
4736 nDfltColl
==RES_POOLCOLL_TABLE_HDLN
) )
4737 nTopColl
= nDfltColl
;
4739 const SwTextFormatColl
*pTopColl
=
4740 m_pCSS1Parser
->GetTextFormatColl( nTopColl
, rTopClass
);
4741 const SfxItemSet
& rItemSet
= pTopColl
->GetAttrSet();
4742 if (rItemSet
.GetItemIfSet(RES_MARGIN_FIRSTLINE
)
4743 || rItemSet
.GetItemIfSet(RES_MARGIN_TEXTLEFT
)
4744 || rItemSet
.GetItemIfSet(RES_MARGIN_RIGHT
))
4746 sal_Int32 nLeft
= rItemSet
.Get(RES_MARGIN_TEXTLEFT
).GetTextLeft();
4747 sal_Int32 nRight
= rItemSet
.Get(RES_MARGIN_RIGHT
).GetRight();
4748 nFirstLineIndent
= rItemSet
.Get(RES_MARGIN_FIRSTLINE
).GetTextFirstLineOffset();
4750 // In Definition lists the margins also contain the margins from the previous levels
4751 if( RES_POOLCOLL_HTML_DD
== nTopColl
)
4753 auto const*const pColl(m_pCSS1Parser
->GetTextFormatColl(RES_POOLCOLL_HTML_DT
, OUString()));
4754 nLeft
-= pColl
->GetTextLeftMargin().GetTextLeft();
4755 nRight
-= pColl
->GetRightMargin().GetRight();
4757 else if( RES_POOLCOLL_HTML_DT
== nTopColl
)
4763 // the paragraph margins add up
4764 nLeftMargin
= nLeftMargin
+ static_cast< sal_uInt16
>(nLeft
);
4765 nRightMargin
= nRightMargin
+ static_cast< sal_uInt16
>(nRight
);
4767 pContext
->SetMargins( nLeftMargin
, nRightMargin
,
4770 if( const SvxULSpaceItem
* pULItem
= rItemSet
.GetItemIfSet(RES_UL_SPACE
) )
4772 pContext
->SetULSpace( pULItem
->GetUpper(), pULItem
->GetLower() );
4776 // If no style is set in the context use the text body.
4779 pCollToSet
= m_pCSS1Parser
->GetTextCollFromPool( nDfltColl
);
4782 nLeftMargin
= static_cast<sal_uInt16
>(pCollToSet
->GetTextLeftMargin().GetTextLeft());
4786 nRightMargin
= static_cast<sal_uInt16
>(pCollToSet
->GetRightMargin().GetRight());
4788 if( !nFirstLineIndent
)
4790 nFirstLineIndent
= pCollToSet
->GetFirstLineIndent().GetTextFirstLineOffset();
4794 // remove previous hard attribution of paragraph
4795 for( auto pParaAttr
: m_aParaAttrs
)
4796 pParaAttr
->Invalidate();
4797 m_aParaAttrs
.clear();
4800 m_xDoc
->SetTextFormatColl( *m_pPam
, pCollToSet
);
4802 // if applicable correct the paragraph indent
4803 const SvxFirstLineIndentItem
& rFirstLine
= pCollToSet
->GetFirstLineIndent();
4804 const SvxTextLeftMarginItem
& rTextLeftMargin
= pCollToSet
->GetTextLeftMargin();
4805 const SvxRightMarginItem
& rRightMargin
= pCollToSet
->GetRightMargin();
4806 bool bSetLRSpace
= nLeftMargin
!= rTextLeftMargin
.GetTextLeft() ||
4807 nFirstLineIndent
!= rFirstLine
.GetTextFirstLineOffset() ||
4808 nRightMargin
!= rRightMargin
.GetRight();
4812 SvxFirstLineIndentItem
firstLine(rFirstLine
);
4813 SvxTextLeftMarginItem
leftMargin(rTextLeftMargin
);
4814 SvxRightMarginItem
rightMargin(rRightMargin
);
4815 firstLine
.SetTextFirstLineOffset(nFirstLineIndent
);
4816 leftMargin
.SetTextLeft(nLeftMargin
);
4817 rightMargin
.SetRight(nRightMargin
);
4820 pItemSet
->Put(firstLine
);
4821 pItemSet
->Put(leftMargin
);
4822 pItemSet
->Put(rightMargin
);
4826 NewAttr(m_xAttrTab
, &m_xAttrTab
->pFirstLineIndent
, firstLine
);
4827 m_xAttrTab
->pFirstLineIndent
->SetLikePara();
4828 m_aParaAttrs
.push_back(m_xAttrTab
->pFirstLineIndent
);
4829 EndAttr(m_xAttrTab
->pFirstLineIndent
, false);
4830 NewAttr(m_xAttrTab
, &m_xAttrTab
->pTextLeftMargin
, leftMargin
);
4831 m_xAttrTab
->pTextLeftMargin
->SetLikePara();
4832 m_aParaAttrs
.push_back(m_xAttrTab
->pTextLeftMargin
);
4833 EndAttr(m_xAttrTab
->pTextLeftMargin
, false);
4834 NewAttr(m_xAttrTab
, &m_xAttrTab
->pRightMargin
, rightMargin
);
4835 m_xAttrTab
->pRightMargin
->SetLikePara();
4836 m_aParaAttrs
.push_back(m_xAttrTab
->pRightMargin
);
4837 EndAttr(m_xAttrTab
->pRightMargin
, false);
4841 // and now set the attributes
4844 InsertParaAttrs( *pItemSet
);
4849 void SwHTMLParser::NewCharFormat( HtmlTokenId nToken
)
4851 OUString aId
, aStyle
, aLang
, aDir
;
4854 const HTMLOptions
& rHTMLOptions
= GetOptions();
4855 for (size_t i
= rHTMLOptions
.size(); i
; )
4857 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
4858 switch( rOption
.GetToken() )
4860 case HtmlOptionId::ID
:
4861 aId
= rOption
.GetString();
4863 case HtmlOptionId::STYLE
:
4864 aStyle
= rOption
.GetString();
4866 case HtmlOptionId::CLASS
:
4867 aClass
= rOption
.GetString();
4869 case HtmlOptionId::LANG
:
4870 aLang
= rOption
.GetString();
4872 case HtmlOptionId::DIR:
4873 aDir
= rOption
.GetString();
4879 // create a new context
4880 std::unique_ptr
<HTMLAttrContext
> xCntxt(new HTMLAttrContext(nToken
));
4882 // set the style and save it in the context
4883 SwCharFormat
* pCFormat
= m_pCSS1Parser
->GetChrFormat( nToken
, aClass
);
4884 OSL_ENSURE( pCFormat
, "No character format found for token" );
4886 // parse styles (regarding class see also NewPara)
4887 if (HasStyleOptions(aStyle
, aId
, {}, &aLang
, &aDir
))
4889 SfxItemSet
aItemSet( m_xDoc
->GetAttrPool(), m_pCSS1Parser
->GetWhichMap() );
4890 SvxCSS1PropertyInfo aPropInfo
;
4892 if (ParseStyleOptions(aStyle
, aId
, OUString(), aItemSet
, aPropInfo
, &aLang
, &aDir
))
4894 OSL_ENSURE( aClass
.isEmpty() || !m_pCSS1Parser
->GetClass( aClass
),
4895 "Class is not considered" );
4896 DoPositioning( aItemSet
, aPropInfo
, xCntxt
.get() );
4897 InsertAttrs( aItemSet
, aPropInfo
, xCntxt
.get(), true );
4901 // Character formats are stored in their own stack and can never be inserted
4902 // by styles. Therefore the attribute doesn't exist in CSS1-Which-Range.
4904 InsertAttr( &m_xAttrTab
->pCharFormats
, SwFormatCharFormat( pCFormat
), xCntxt
.get() );
4907 PushContext(xCntxt
);
4910 void SwHTMLParser::InsertSpacer()
4912 // and if applicable change it via the options
4913 sal_Int16 eVertOri
= text::VertOrientation::TOP
;
4914 sal_Int16 eHoriOri
= text::HoriOrientation::NONE
;
4916 tools::Long nSize
= 0;
4917 bool bPercentWidth
= false;
4918 bool bPercentHeight
= false;
4919 sal_uInt16 nType
= HTML_SPTYPE_HORI
;
4921 const HTMLOptions
& rHTMLOptions
= GetOptions();
4922 for (size_t i
= rHTMLOptions
.size(); i
; )
4924 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
4925 switch( rOption
.GetToken() )
4927 case HtmlOptionId::TYPE
:
4928 rOption
.GetEnum( nType
, aHTMLSpacerTypeTable
);
4930 case HtmlOptionId::ALIGN
:
4932 rOption
.GetEnum( aHTMLImgVAlignTable
,
4935 rOption
.GetEnum( aHTMLImgHAlignTable
,
4938 case HtmlOptionId::WIDTH
:
4939 // First only save as pixel value!
4940 bPercentWidth
= (rOption
.GetString().indexOf('%') != -1);
4941 aSize
.setWidth( static_cast<tools::Long
>(rOption
.GetNumber()) );
4943 case HtmlOptionId::HEIGHT
:
4944 // First only save as pixel value!
4945 bPercentHeight
= (rOption
.GetString().indexOf('%') != -1);
4946 aSize
.setHeight( static_cast<tools::Long
>(rOption
.GetNumber()) );
4948 case HtmlOptionId::SIZE
:
4949 // First only save as pixel value!
4950 nSize
= rOption
.GetNumber();
4958 case HTML_SPTYPE_BLOCK
:
4960 // create an empty text frame
4962 // fetch the ItemSet
4963 SfxItemSetFixed
<RES_FRMATR_BEGIN
, RES_FRMATR_END
-1> aFrameSet( m_xDoc
->GetAttrPool() );
4965 Reader::ResetFrameFormatAttrs( aFrameSet
);
4967 // set the anchor and the adjustment
4968 SetAnchorAndAdjustment( eVertOri
, eHoriOri
, aFrameSet
);
4970 // and the size of the frame
4971 Size
aDfltSz( MINFLY
, MINFLY
);
4972 Size
aSpace( 0, 0 );
4973 SfxItemSet
aDummyItemSet( m_xDoc
->GetAttrPool(),
4974 m_pCSS1Parser
->GetWhichMap() );
4975 SvxCSS1PropertyInfo aDummyPropInfo
;
4977 SetFixSize( aSize
, aDfltSz
, bPercentWidth
, bPercentHeight
,
4978 aDummyPropInfo
, aFrameSet
);
4979 SetSpace( aSpace
, aDummyItemSet
, aDummyPropInfo
, aFrameSet
);
4981 // protect the content
4982 SvxProtectItem
aProtectItem( RES_PROTECT
) ;
4983 aProtectItem
.SetContentProtect( true );
4984 aFrameSet
.Put( aProtectItem
);
4987 RndStdIds eAnchorId
=
4988 aFrameSet
.Get(RES_ANCHOR
).GetAnchorId();
4989 SwFrameFormat
*pFlyFormat
= m_xDoc
->MakeFlySection( eAnchorId
,
4990 m_pPam
->GetPoint(), &aFrameSet
);
4991 // Possibly create frames and register auto-bound frames.
4992 RegisterFlyFrame( pFlyFormat
);
4995 case HTML_SPTYPE_VERT
:
4998 nSize
= o3tl::convert(nSize
, o3tl::Length::px
, o3tl::Length::twip
);
5000 // set a paragraph margin
5001 SwTextNode
*pTextNode
= nullptr;
5002 if( !m_pPam
->GetPoint()->GetContentIndex() )
5004 // if possible change the bottom paragraph margin
5007 SetAttr(); // set still open paragraph attributes
5009 pTextNode
= m_xDoc
->GetNodes()[m_pPam
->GetPoint()->GetNodeIndex()-1]
5012 // If the previous paragraph isn't a text node, then now an
5013 // empty paragraph is created, which already generates a single
5016 nSize
= nSize
>HTML_PARSPACE
? nSize
-HTML_PARSPACE
: 0;
5021 SvxULSpaceItem
aULSpace( pTextNode
->SwContentNode::GetAttr( RES_UL_SPACE
) );
5022 aULSpace
.SetLower( aULSpace
.GetLower() + o3tl::narrowing
<sal_uInt16
>(nSize
) );
5023 pTextNode
->SetAttr( aULSpace
);
5027 NewAttr(m_xAttrTab
, &m_xAttrTab
->pULSpace
, SvxULSpaceItem(0, o3tl::narrowing
<sal_uInt16
>(nSize
), RES_UL_SPACE
));
5028 EndAttr( m_xAttrTab
->pULSpace
, false );
5030 AppendTextNode(); // Don't change spacing!
5034 case HTML_SPTYPE_HORI
:
5037 // If the paragraph is still empty, set first line
5038 // indentation, otherwise apply letter spacing over a space.
5040 nSize
= o3tl::convert(nSize
, o3tl::Length::px
, o3tl::Length::twip
);
5042 if( !m_pPam
->GetPoint()->GetContentIndex() )
5044 sal_uInt16 nLeft
=0, nRight
=0;
5047 GetMarginsFromContextWithNumberBullet( nLeft
, nRight
, nIndent
);
5048 nIndent
= nIndent
+ static_cast<short>(nSize
);
5050 SvxFirstLineIndentItem
const firstLine(nIndent
, RES_MARGIN_FIRSTLINE
);
5051 SvxTextLeftMarginItem
const leftMargin(nLeft
, RES_MARGIN_TEXTLEFT
);
5052 SvxRightMarginItem
const rightMargin(nRight
, RES_MARGIN_RIGHT
);
5054 NewAttr(m_xAttrTab
, &m_xAttrTab
->pFirstLineIndent
, firstLine
);
5055 EndAttr(m_xAttrTab
->pFirstLineIndent
, false);
5056 NewAttr(m_xAttrTab
, &m_xAttrTab
->pTextLeftMargin
, leftMargin
);
5057 EndAttr(m_xAttrTab
->pTextLeftMargin
, false);
5058 NewAttr(m_xAttrTab
, &m_xAttrTab
->pRightMargin
, rightMargin
);
5059 EndAttr(m_xAttrTab
->pRightMargin
, false);
5063 NewAttr(m_xAttrTab
, &m_xAttrTab
->pKerning
, SvxKerningItem( static_cast<short>(nSize
), RES_CHRATR_KERNING
));
5064 m_xDoc
->getIDocumentContentOperations().InsertString( *m_pPam
, " " );
5065 EndAttr( m_xAttrTab
->pKerning
);
5071 sal_uInt16
SwHTMLParser::ToTwips( sal_uInt16 nPixel
)
5073 return std::min(o3tl::convert(nPixel
, o3tl::Length::px
, o3tl::Length::twip
),
5074 sal_Int64(SAL_MAX_UINT16
));
5077 SwTwips
SwHTMLParser::GetCurrentBrowseWidth()
5079 const SwTwips nWidth
= SwHTMLTableLayout::GetBrowseWidth( *m_xDoc
);
5083 if( !m_aHTMLPageSize
.Width() )
5085 const SwFrameFormat
& rPgFormat
= m_pCSS1Parser
->GetMasterPageDesc()->GetMaster();
5087 const SwFormatFrameSize
& rSz
= rPgFormat
.GetFrameSize();
5088 const SvxLRSpaceItem
& rLR
= rPgFormat
.GetLRSpace();
5089 const SvxULSpaceItem
& rUL
= rPgFormat
.GetULSpace();
5090 const SwFormatCol
& rCol
= rPgFormat
.GetCol();
5092 m_aHTMLPageSize
.setWidth( rSz
.GetWidth() - rLR
.GetLeft() - rLR
.GetRight() );
5093 m_aHTMLPageSize
.setHeight( rSz
.GetHeight() - rUL
.GetUpper() - rUL
.GetLower() );
5095 if( 1 < rCol
.GetNumCols() )
5096 m_aHTMLPageSize
.setWidth( m_aHTMLPageSize
.Width() / ( rCol
.GetNumCols()) );
5099 return m_aHTMLPageSize
.Width();
5102 void SwHTMLParser::InsertIDOption()
5105 const HTMLOptions
& rHTMLOptions
= GetOptions();
5106 for (size_t i
= rHTMLOptions
.size(); i
; )
5108 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
5109 if( HtmlOptionId::ID
==rOption
.GetToken() )
5111 aId
= rOption
.GetString();
5116 if( !aId
.isEmpty() )
5117 InsertBookmark( aId
);
5120 void SwHTMLParser::InsertLineBreak()
5122 OUString aId
, aStyle
, aClass
; // the id of bookmark
5123 SwLineBreakClear eClear
= SwLineBreakClear::NONE
;
5125 // then we fetch the options
5126 const HTMLOptions
& rHTMLOptions
= GetOptions();
5127 for (size_t i
= rHTMLOptions
.size(); i
; )
5129 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
5130 switch( rOption
.GetToken() )
5132 case HtmlOptionId::CLEAR
:
5134 const OUString
&rClear
= rOption
.GetString();
5135 if( rClear
.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_all
) )
5137 eClear
= SwLineBreakClear::ALL
;
5139 else if( rClear
.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left
) )
5141 eClear
= SwLineBreakClear::LEFT
;
5143 else if( rClear
.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right
) )
5145 eClear
= SwLineBreakClear::LEFT
;
5149 case HtmlOptionId::ID
:
5150 aId
= rOption
.GetString();
5152 case HtmlOptionId::STYLE
:
5153 aStyle
= rOption
.GetString();
5155 case HtmlOptionId::CLASS
:
5156 aClass
= rOption
.GetString();
5163 std::shared_ptr
<SvxFormatBreakItem
> aBreakItem(std::make_shared
<SvxFormatBreakItem
>(SvxBreak::NONE
, RES_BREAK
));
5164 bool bBreakItem
= false;
5165 if( HasStyleOptions( aStyle
, aId
, aClass
) )
5167 SfxItemSet
aItemSet( m_xDoc
->GetAttrPool(), m_pCSS1Parser
->GetWhichMap() );
5168 SvxCSS1PropertyInfo aPropInfo
;
5170 if( ParseStyleOptions( aStyle
, aId
, aClass
, aItemSet
, aPropInfo
) )
5172 if( m_pCSS1Parser
->SetFormatBreak( aItemSet
, aPropInfo
) )
5174 aBreakItem
.reset(aItemSet
.Get(RES_BREAK
).Clone());
5177 if( !aPropInfo
.m_aId
.isEmpty() )
5178 InsertBookmark( aPropInfo
.m_aId
);
5182 if( bBreakItem
&& SvxBreak::PageAfter
== aBreakItem
->GetBreak() )
5184 NewAttr(m_xAttrTab
, &m_xAttrTab
->pBreak
, *aBreakItem
);
5185 EndAttr( m_xAttrTab
->pBreak
, false );
5190 if (eClear
== SwLineBreakClear::NONE
)
5192 // If no CLEAR could or should be executed, a line break will be inserted
5193 m_xDoc
->getIDocumentContentOperations().InsertString(*m_pPam
, "\x0A");
5197 // <BR CLEAR=xxx> is mapped an SwFormatLineBreak.
5198 SwTextNode
* pTextNode
= m_pPam
->GetPointNode().GetTextNode();
5201 SwFormatLineBreak
aLineBreak(eClear
);
5202 sal_Int32 nPos
= m_pPam
->GetPoint()->GetContentIndex();
5203 pTextNode
->InsertItem(aLineBreak
, nPos
, nPos
);
5207 else if( m_pPam
->GetPoint()->GetContentIndex() )
5209 // If a CLEAR is executed in a non-empty paragraph, then after it
5210 // a new paragraph has to be opened.
5211 // MIB 21.02.97: Here actually we should change the bottom paragraph
5212 // margin to zero. This will fail for something like this <BR ..><P>
5213 // (>Netscape). That's why we don't do it.
5214 AppendTextNode( AM_NOSPACE
);
5216 if( bBreakItem
&& SvxBreak::PageBefore
== aBreakItem
->GetBreak() )
5218 NewAttr(m_xAttrTab
, &m_xAttrTab
->pBreak
, *aBreakItem
);
5219 EndAttr( m_xAttrTab
->pBreak
, false );
5223 void SwHTMLParser::InsertHorzRule()
5225 sal_uInt16 nSize
= 0;
5226 sal_uInt16 nWidth
= 0;
5228 SvxAdjust eAdjust
= SvxAdjust::End
;
5230 bool bPercentWidth
= false;
5231 bool bNoShade
= false;
5232 bool bColor
= false;
5237 // let's fetch the options
5238 const HTMLOptions
& rHTMLOptions
= GetOptions();
5239 for (size_t i
= rHTMLOptions
.size(); i
; )
5241 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
5242 switch( rOption
.GetToken() )
5244 case HtmlOptionId::ID
:
5245 aId
= rOption
.GetString();
5247 case HtmlOptionId::SIZE
:
5248 nSize
= o3tl::narrowing
<sal_uInt16
>(rOption
.GetNumber());
5250 case HtmlOptionId::WIDTH
:
5251 bPercentWidth
= (rOption
.GetString().indexOf('%') != -1);
5252 nWidth
= o3tl::narrowing
<sal_uInt16
>(rOption
.GetNumber());
5253 if( bPercentWidth
&& nWidth
>=100 )
5255 // the default case are 100% lines (no attributes necessary)
5257 bPercentWidth
= false;
5260 case HtmlOptionId::ALIGN
:
5261 eAdjust
= rOption
.GetEnum( aHTMLPAlignTable
, eAdjust
);
5263 case HtmlOptionId::NOSHADE
:
5266 case HtmlOptionId::COLOR
:
5267 rOption
.GetColor( aColor
);
5274 if( m_pPam
->GetPoint()->GetContentIndex() )
5275 AppendTextNode( AM_NOSPACE
);
5276 if( m_nOpenParaToken
!= HtmlTokenId::NONE
)
5279 m_pPam
->Move( fnMoveBackward
);
5281 // ...and save in a context
5282 std::unique_ptr
<HTMLAttrContext
> xCntxt(
5283 new HTMLAttrContext(HtmlTokenId::HORZRULE
, RES_POOLCOLL_HTML_HR
, OUString()));
5285 PushContext(xCntxt
);
5287 // set the new style
5288 SetTextCollAttrs(m_aContexts
.back().get());
5290 // the hard attributes of the current paragraph will never become invalid
5291 m_aParaAttrs
.clear();
5293 if( nSize
>0 || bColor
|| bNoShade
)
5295 // set line colour and/or width
5299 SvxBorderLine
aBorderLine( &aColor
);
5302 tools::Long nPWidth
= 0;
5303 tools::Long nPHeight
= static_cast<tools::Long
>(nSize
);
5304 SvxCSS1Parser::PixelToTwip( nPWidth
, nPHeight
);
5307 aBorderLine
.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE
);
5309 aBorderLine
.SetWidth( nPHeight
);
5313 aBorderLine
.SetWidth( SvxBorderLineWidth::Medium
);
5317 aBorderLine
.SetBorderLineStyle(SvxBorderLineStyle::DOUBLE
);
5318 aBorderLine
.SetWidth(SvxBorderLineWidth::Hairline
);
5321 SvxBoxItem
aBoxItem(RES_BOX
);
5322 aBoxItem
.SetLine( &aBorderLine
, SvxBoxItemLine::BOTTOM
);
5323 HTMLAttr
* pTmp
= new HTMLAttr(*m_pPam
->GetPoint(), aBoxItem
, nullptr, std::shared_ptr
<HTMLAttrTable
>());
5324 m_aSetAttrTab
.push_back( pTmp
);
5328 // If we aren't in a table, then the width value will be "faked" with
5329 // paragraph indents. That makes little sense in a table. In order to
5330 // avoid that the line is considered during the width calculation, it
5331 // still gets an appropriate LRSpace-Item.
5334 // fake length and alignment of line above paragraph indents
5335 tools::Long nBrowseWidth
= GetCurrentBrowseWidth();
5336 nWidth
= bPercentWidth
? o3tl::narrowing
<sal_uInt16
>((nWidth
*nBrowseWidth
) / 100)
5337 : ToTwips( o3tl::narrowing
<sal_uInt16
>(nBrowseWidth
) );
5338 if( nWidth
< MINLAY
)
5341 const SwFormatColl
*pColl
= (static_cast<tools::Long
>(nWidth
) < nBrowseWidth
) ? GetCurrFormatColl() : nullptr;
5344 tools::Long nDist
= nBrowseWidth
- nWidth
;
5345 ::std::optional
<SvxTextLeftMarginItem
> oLeft
;
5346 ::std::optional
<SvxRightMarginItem
> oRight
;
5350 case SvxAdjust::Right
:
5351 oLeft
.emplace(o3tl::narrowing
<sal_uInt16
>(nDist
), RES_MARGIN_TEXTLEFT
);
5353 case SvxAdjust::Left
:
5354 oRight
.emplace(o3tl::narrowing
<sal_uInt16
>(nDist
), RES_MARGIN_RIGHT
);
5356 case SvxAdjust::Center
:
5359 oLeft
.emplace(o3tl::narrowing
<sal_uInt16
>(nDist
), RES_MARGIN_TEXTLEFT
);
5360 oRight
.emplace(o3tl::narrowing
<sal_uInt16
>(nDist
), RES_MARGIN_RIGHT
);
5366 HTMLAttr
* pTmp
= new HTMLAttr(*m_pPam
->GetPoint(), *oLeft
, nullptr, std::shared_ptr
<HTMLAttrTable
>());
5367 m_aSetAttrTab
.push_back( pTmp
);
5371 HTMLAttr
* pTmp
= new HTMLAttr(*m_pPam
->GetPoint(), *oRight
, nullptr, std::shared_ptr
<HTMLAttrTable
>());
5372 m_aSetAttrTab
.push_back( pTmp
);
5378 // it's not possible to insert bookmarks in links
5379 if( !aId
.isEmpty() )
5380 InsertBookmark( aId
);
5382 // pop current context of stack
5383 std::unique_ptr
<HTMLAttrContext
> xPoppedContext(PopContext(HtmlTokenId::HORZRULE
));
5384 xPoppedContext
.reset();
5386 m_pPam
->Move( fnMoveForward
);
5388 // and set the current style in the next paragraph
5392 void SwHTMLParser::ParseMoreMetaOptions()
5394 OUString aName
, aContent
;
5395 bool bHTTPEquiv
= false;
5397 const HTMLOptions
& rHTMLOptions
= GetOptions();
5398 for (size_t i
= rHTMLOptions
.size(); i
; )
5400 const HTMLOption
& rOption
= rHTMLOptions
[--i
];
5401 switch( rOption
.GetToken() )
5403 case HtmlOptionId::NAME
:
5404 aName
= rOption
.GetString();
5407 case HtmlOptionId::HTTPEQUIV
:
5408 aName
= rOption
.GetString();
5411 case HtmlOptionId::CONTENT
:
5412 aContent
= rOption
.GetString();
5418 // Here things get a little tricky: We know for sure, that the Doc-Info
5419 // wasn't changed. Therefore it's enough to query for Generator and Refresh
5420 // to find a not processed Token. These are the only ones which won't change
5422 if( aName
.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_generator
) ||
5423 aName
.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_refresh
) ||
5424 aName
.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_type
) ||
5425 aName
.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_script_type
) )
5428 aContent
= aContent
.replaceAll("\r", "").replaceAll("\n", "");
5430 if( aName
.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdendnote
) )
5432 FillEndNoteInfo( aContent
);
5436 if( aName
.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_sdfootnote
) )
5438 FillFootNoteInfo( aContent
);
5442 OUStringBuffer
sText(
5444 OOO_STRING_SVTOOLS_HTML_meta
5447 sText
.append(OOO_STRING_SVTOOLS_HTML_O_httpequiv
);
5449 sText
.append(OOO_STRING_SVTOOLS_HTML_O_name
);
5453 OOO_STRING_SVTOOLS_HTML_O_content
5458 SwPostItField
aPostItField(
5459 static_cast<SwPostItFieldType
*>(m_xDoc
->getIDocumentFieldsAccess().GetSysFieldType( SwFieldIds::Postit
)),
5460 OUString(), sText
.makeStringAndClear(), OUString(), OUString(), DateTime(DateTime::SYSTEM
));
5461 SwFormatField
aFormatField( aPostItField
);
5462 InsertAttr( aFormatField
, false );
5465 HTMLAttr::HTMLAttr( const SwPosition
& rPos
, const SfxPoolItem
& rItem
,
5466 HTMLAttr
**ppHd
, std::shared_ptr
<HTMLAttrTable
> xAttrTab
) :
5467 m_nStartPara( rPos
.GetNode() ),
5468 m_nEndPara( rPos
.GetNode() ),
5469 m_nStartContent( rPos
.GetContentIndex() ),
5470 m_nEndContent(rPos
.GetContentIndex() ),
5471 m_bInsAtStart( true ),
5472 m_bLikePara( false ),
5474 m_pItem( rItem
.Clone() ),
5475 m_xAttrTab(std::move( xAttrTab
)),
5482 HTMLAttr::HTMLAttr( const HTMLAttr
&rAttr
, const SwNode
&rEndPara
,
5483 sal_Int32 nEndCnt
, HTMLAttr
**ppHd
, std::shared_ptr
<HTMLAttrTable
> xAttrTab
) :
5484 m_nStartPara( rAttr
.m_nStartPara
),
5485 m_nEndPara( rEndPara
),
5486 m_nStartContent( rAttr
.m_nStartContent
),
5487 m_nEndContent( nEndCnt
),
5488 m_bInsAtStart( rAttr
.m_bInsAtStart
),
5489 m_bLikePara( rAttr
.m_bLikePara
),
5490 m_bValid( rAttr
.m_bValid
),
5491 m_pItem( rAttr
.m_pItem
->Clone() ),
5492 m_xAttrTab(std::move( xAttrTab
)),
5499 HTMLAttr::~HTMLAttr()
5503 HTMLAttr
*HTMLAttr::Clone(const SwNode
& rEndPara
, sal_Int32 nEndCnt
) const
5505 // create the attribute anew with old start position
5506 HTMLAttr
*pNew
= new HTMLAttr( *this, rEndPara
, nEndCnt
, m_ppHead
, m_xAttrTab
);
5508 // The Previous-List must be taken over, the Next-List not!
5509 pNew
->m_pPrev
= m_pPrev
;
5514 void HTMLAttr::Reset(const SwNode
& rSttPara
, sal_Int32 nSttCnt
,
5515 HTMLAttr
**ppHd
, const std::shared_ptr
<HTMLAttrTable
>& rAttrTab
)
5517 // reset the start (and the end)
5518 m_nStartPara
= rSttPara
;
5519 m_nStartContent
= nSttCnt
;
5520 m_nEndPara
= rSttPara
;
5521 m_nEndContent
= nSttCnt
;
5523 // correct the head and nullify link
5527 m_xAttrTab
= rAttrTab
;
5530 void HTMLAttr::InsertPrev( HTMLAttr
*pPrv
)
5532 OSL_ENSURE( !pPrv
->m_pNext
|| pPrv
->m_pNext
== this,
5533 "HTMLAttr::InsertPrev: pNext wrong" );
5534 pPrv
->m_pNext
= nullptr;
5536 OSL_ENSURE( nullptr == pPrv
->m_ppHead
|| m_ppHead
== pPrv
->m_ppHead
,
5537 "HTMLAttr::InsertPrev: ppHead wrong" );
5538 pPrv
->m_ppHead
= nullptr;
5540 HTMLAttr
*pAttr
= this;
5541 while( pAttr
->GetPrev() )
5542 pAttr
= pAttr
->GetPrev();
5544 pAttr
->m_pPrev
= pPrv
;
5547 bool SwHTMLParser::ParseMetaOptions(
5548 const uno::Reference
<document::XDocumentProperties
> & i_xDocProps
,
5549 SvKeyValueIterator
*i_pHeader
)
5551 // always call base ParseMetaOptions, it sets the encoding (#i96700#)
5552 bool ret( HTMLParser::ParseMetaOptions(i_xDocProps
, i_pHeader
) );
5553 if (!ret
&& IsNewDoc())
5555 ParseMoreMetaOptions();
5560 // override so we can parse DOCINFO field subtypes INFO[1-4]
5561 void SwHTMLParser::AddMetaUserDefined( OUString
const & i_rMetaName
)
5563 // unless we already have 4 names, append the argument to m_InfoNames
5564 OUString
* pName
// the first empty string in m_InfoNames
5565 (m_InfoNames
[0].isEmpty() ? &m_InfoNames
[0] :
5566 (m_InfoNames
[1].isEmpty() ? &m_InfoNames
[1] :
5567 (m_InfoNames
[2].isEmpty() ? &m_InfoNames
[2] :
5568 (m_InfoNames
[3].isEmpty() ? &m_InfoNames
[3] : nullptr ))));
5571 (*pName
) = i_rMetaName
;
5575 void HTMLReader::SetupFilterOptions()
5577 // Reset state from previous Read() invocation.
5578 m_aNamespace
.clear();
5583 const SfxItemSet
* pItemSet
= m_pMedium
->GetItemSet();
5587 auto pItem
= pItemSet
->GetItem
<SfxStringItem
>(SID_FILE_FILTEROPTIONS
);
5591 OUString aFilterOptions
= pItem
->GetValue();
5592 static const OUStringLiteral
aXhtmlNsKey(u
"xhtmlns=");
5593 if (aFilterOptions
.startsWith(aXhtmlNsKey
))
5595 OUString aNamespace
= aFilterOptions
.copy(aXhtmlNsKey
.getLength());
5596 m_aNamespace
= aNamespace
;
5602 class FontCacheGuard
5612 bool TestImportHTML(SvStream
&rStream
)
5614 FontCacheGuard aFontCacheGuard
;
5616 aReader
.m_pStream
= &rStream
;
5618 SwGlobals::ensure();
5620 SfxObjectShellLock
xDocSh(new SwDocShell(SfxObjectCreateMode::INTERNAL
));
5621 xDocSh
->DoInitNew();
5622 SwDoc
*pD
= static_cast<SwDocShell
*>((&xDocSh
))->GetDoc();
5624 SwPaM
aPaM(pD
->GetNodes().GetEndOfContent(), SwNodeOffset(-1));
5625 pD
->SetInReading(true);
5629 bRet
= aReader
.Read(*pD
, OUString(), aPaM
, OUString()) == ERRCODE_NONE
;
5631 catch (const std::runtime_error
&)
5634 catch (const std::out_of_range
&)
5637 pD
->SetInReading(false);
5642 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */