Get the style color and number just once
[LibreOffice.git] / sc / source / filter / html / htmlpars.cxx
blob6c225c3020c85f6827c524da5f87711408ad48d3
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <memory>
21 #include <sal/config.h>
23 #include <comphelper/lok.hxx>
24 #include <comphelper/string.hxx>
26 #include <scitems.hxx>
28 #include <editeng/colritem.hxx>
29 #include <editeng/crossedoutitem.hxx>
30 #include <editeng/brushitem.hxx>
31 #include <editeng/editeng.hxx>
32 #include <editeng/fhgtitem.hxx>
33 #include <editeng/fontitem.hxx>
34 #include <editeng/postitem.hxx>
35 #include <editeng/udlnitem.hxx>
36 #include <editeng/wghtitem.hxx>
37 #include <editeng/borderline.hxx>
38 #include <editeng/boxitem.hxx>
39 #include <editeng/justifyitem.hxx>
40 #include <sal/log.hxx>
41 #include <sfx2/objsh.hxx>
42 #include <sfx2/lokhelper.hxx>
43 #include <svl/numformat.hxx>
44 #include <svl/intitem.hxx>
45 #include <utility>
46 #include <vcl/graphicfilter.hxx>
47 #include <svtools/parhtml.hxx>
48 #include <svtools/htmlkywd.hxx>
49 #include <svtools/htmltokn.h>
51 #include <vcl/outdev.hxx>
52 #include <vcl/svapp.hxx>
53 #include <tools/hostfilter.hxx>
54 #include <tools/urlobj.hxx>
55 #include <osl/diagnose.h>
56 #include <o3tl/string_view.hxx>
58 #include <rtl/tencinfo.h>
60 #include <attrib.hxx>
61 #include <htmlpars.hxx>
62 #include <global.hxx>
63 #include <document.hxx>
64 #include <docsh.hxx>
65 #include <rangelst.hxx>
67 #include <orcus/css_parser.hpp>
68 #include <boost/property_tree/json_parser.hpp>
70 #include <com/sun/star/document/XDocumentProperties.hpp>
71 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
72 #include <com/sun/star/frame/XModel.hpp>
73 #include <numeric>
74 #include <officecfg/Office/Common.hxx>
76 using ::editeng::SvxBorderLine;
77 using namespace ::com::sun::star;
79 namespace
81 /// data-sheets-value from google sheets, value is a JSON.
82 void ParseDataSheetsValue(const OUString& rDataSheetsValue, std::optional<OUString>& rVal, std::optional<OUString>& rNum)
84 OString aEncodedOption = rDataSheetsValue.toUtf8();
85 const char* pEncodedOption = aEncodedOption.getStr();
86 std::stringstream aStream(pEncodedOption);
87 boost::property_tree::ptree aTree;
88 try
90 boost::property_tree::read_json(aStream, aTree);
92 catch (const std::exception&)
94 SAL_WARN("sc", "ParseDataSheetsValue: not well-formed json");
95 return;
97 // The "1" key describes the original data type.
98 auto it = aTree.find("1");
99 if (it != aTree.not_found())
101 int nValueType = std::stoi(it->second.get_value<std::string>());
102 switch (nValueType)
104 case 2:
106 // 2 is text.
107 // See SfxHTMLParser::GetTableDataOptionsValNum(), we leave the parse and a number
108 // language unspecified.
109 rNum = ";;@";
110 break;
112 case 3:
114 // 3 is number.
115 it = aTree.find("3");
116 if (it != aTree.not_found())
118 rVal = OUString::fromUtf8(it->second.get_value<std::string>());
120 break;
122 case 4:
124 // 4 is boolean.
125 it = aTree.find("4");
126 if (it != aTree.not_found())
128 rVal = OUString::fromUtf8(it->second.get_value<std::string>());
130 rNum = ";;BOOLEAN";
131 break;
137 /// data-sheets-numberformat from google sheets, value is a JSON.
138 void ParseDataSheetsNumberformat(const OUString& rDataSheetsValue, std::optional<OUString>& rNum)
140 OString aEncodedOption = rDataSheetsValue.toUtf8();
141 const char* pEncodedOption = aEncodedOption.getStr();
142 std::stringstream aStream(pEncodedOption);
143 boost::property_tree::ptree aTree;
144 boost::property_tree::read_json(aStream, aTree);
145 // The "1" key describes the other keys.
146 auto it = aTree.find("1");
147 if (it != aTree.not_found())
149 int nType = std::stoi(it->second.get_value<std::string>());
150 switch (nType)
152 case 2:
154 // 2 is number format.
155 it = aTree.find("2");
156 if (it != aTree.not_found())
158 // Leave the parse and a number language unspecified.
159 OUString aNum = ";;" + OUString::fromUtf8(it->second.get_value<std::string>());
160 rNum = aNum;
162 break;
168 /// data-sheets-formula from google sheets, grammar is R1C1 reference style.
169 void ParseDataSheetsFormula(const OUString& rDataSheetsFormula, std::optional<OUString>& rVal,
170 std::optional<formula::FormulaGrammar::Grammar>& rGrammar)
172 rVal = rDataSheetsFormula;
173 rGrammar = formula::FormulaGrammar::GRAM_ENGLISH_XL_R1C1;
177 ScHTMLStyles::ScHTMLStyles() : maEmpty() {}
179 void ScHTMLStyles::add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
180 const OUString& aProp, const OUString& aValue)
182 if (nElemName)
184 OUString aElem(pElemName, nElemName, RTL_TEXTENCODING_UTF8);
185 aElem = aElem.toAsciiLowerCase();
186 if (nClassName)
188 // Both element and class names given.
189 ElemsType::iterator itrElem = m_ElemProps.find(aElem);
190 if (itrElem == m_ElemProps.end())
192 // new element
193 std::pair<ElemsType::iterator, bool> r =
194 m_ElemProps.insert(std::make_pair(aElem, NamePropsType()));
195 if (!r.second)
196 // insertion failed.
197 return;
198 itrElem = r.first;
201 NamePropsType& rClsProps = itrElem->second;
202 OUString aClass(pClassName, nClassName, RTL_TEXTENCODING_UTF8);
203 aClass = aClass.toAsciiLowerCase();
204 insertProp(rClsProps, aClass, aProp, aValue);
206 else
208 // Element name only. Add it to the element global.
209 insertProp(m_ElemGlobalProps, aElem, aProp, aValue);
212 else
214 if (nClassName)
216 // Class name only. Add it to the global.
217 OUString aClass(pClassName, nClassName, RTL_TEXTENCODING_UTF8);
218 aClass = aClass.toAsciiLowerCase();
219 insertProp(m_GlobalProps, aClass, aProp, aValue);
224 const OUString& ScHTMLStyles::getPropertyValue(
225 const OUString& rElem, const OUString& rClass, const OUString& rPropName) const
227 // First, look into the element-class storage.
229 auto const itr = m_ElemProps.find(rElem);
230 if (itr != m_ElemProps.end())
232 const NamePropsType& rClasses = itr->second;
233 NamePropsType::const_iterator itr2 = rClasses.find(rClass);
234 if (itr2 != rClasses.end())
236 const PropsType& rProps = itr2->second;
237 PropsType::const_iterator itr3 = rProps.find(rPropName);
238 if (itr3 != rProps.end())
239 return itr3->second;
243 // Next, look into the class global storage.
245 auto const itr = m_GlobalProps.find(rClass);
246 if (itr != m_GlobalProps.end())
248 const PropsType& rProps = itr->second;
249 PropsType::const_iterator itr2 = rProps.find(rPropName);
250 if (itr2 != rProps.end())
251 return itr2->second;
254 // As the last resort, look into the element global storage.
256 auto const itr = m_ElemGlobalProps.find(rClass);
257 if (itr != m_ElemGlobalProps.end())
259 const PropsType& rProps = itr->second;
260 PropsType::const_iterator itr2 = rProps.find(rPropName);
261 if (itr2 != rProps.end())
262 return itr2->second;
266 return maEmpty; // nothing found.
269 void ScHTMLStyles::insertProp(
270 NamePropsType& rStore, const OUString& aName,
271 const OUString& aProp, const OUString& aValue)
273 NamePropsType::iterator itr = rStore.find(aName);
274 if (itr == rStore.end())
276 // new element
277 std::pair<NamePropsType::iterator, bool> r =
278 rStore.insert(std::make_pair(aName, PropsType()));
279 if (!r.second)
280 // insertion failed.
281 return;
283 itr = r.first;
286 PropsType& rProps = itr->second;
287 rProps.emplace(aProp, aValue);
290 // BASE class for HTML parser classes
292 ScHTMLParser::ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc ) :
293 ScEEParser( pEditEngine ),
294 mpDoc( pDoc )
296 maFontHeights[0] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_1::get() * 20;
297 maFontHeights[1] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_2::get() * 20;
298 maFontHeights[2] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_3::get() * 20;
299 maFontHeights[3] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_4::get() * 20;
300 maFontHeights[4] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_5::get() * 20;
301 maFontHeights[5] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_6::get() * 20;
302 maFontHeights[6] = officecfg::Office::Common::Filter::HTML::Import::FontSize::Size_7::get() * 20;
305 ScHTMLParser::~ScHTMLParser()
309 ScHTMLLayoutParser::ScHTMLLayoutParser(
310 EditEngine* pEditP, OUString _aBaseURL, const Size& aPageSizeP,
311 ScDocument* pDocP ) :
312 ScHTMLParser( pEditP, pDocP ),
313 aPageSize( aPageSizeP ),
314 aBaseURL(std::move( _aBaseURL )),
315 xLockedList( new ScRangeList ),
316 xLocalColOffset( new ScHTMLColOffset ),
317 nFirstTableCell(0),
318 nTableLevel(0),
319 nTable(0),
320 nMaxTable(0),
321 nColCntStart(0),
322 nMaxCol(0),
323 nTableWidth(0),
324 nColOffset(0),
325 nColOffsetStart(0),
326 nOffsetTolerance( SC_HTML_OFFSET_TOLERANCE_SMALL ),
327 bFirstRow( true ),
328 bTabInTabCell( false ),
329 bInCell( false ),
330 bInTitle( false )
332 MakeColNoRef( xLocalColOffset.get(), 0, 0, 0, 0 );
333 MakeColNoRef( &maColOffset, 0, 0, 0, 0 );
336 ScHTMLLayoutParser::~ScHTMLLayoutParser()
338 while (!aTableStack.empty())
339 aTableStack.pop();
340 xLocalColOffset.reset();
341 if ( pTables )
343 for( auto& rEntry : *pTables)
344 rEntry.second.reset();
345 pTables.reset();
349 ErrCode ScHTMLLayoutParser::Read( SvStream& rStream, const OUString& rBaseURL )
351 Link<HtmlImportInfo&,void> aOldLink = pEdit->GetHtmlImportHdl();
352 pEdit->SetHtmlImportHdl( LINK( this, ScHTMLLayoutParser, HTMLImportHdl ) );
354 ScDocShell* pObjSh = mpDoc->GetDocumentShell();
355 bool bLoading = pObjSh && pObjSh->IsLoading();
357 SvKeyValueIteratorRef xValues;
358 SvKeyValueIterator* pAttributes = nullptr;
359 if ( bLoading )
360 pAttributes = pObjSh->GetHeaderAttributes();
361 else
363 // When not loading, set up fake http headers to force the SfxHTMLParser to use UTF8
364 // (used when pasting from clipboard)
365 const char* pCharSet = rtl_getBestMimeCharsetFromTextEncoding( RTL_TEXTENCODING_UTF8 );
366 if( pCharSet )
368 OUString aContentType = "text/html; charset=" +
369 OUString::createFromAscii( pCharSet );
371 xValues = new SvKeyValueIterator;
372 xValues->Append( SvKeyValue( u"" OOO_STRING_SVTOOLS_HTML_META_content_type ""_ustr, aContentType ) );
373 pAttributes = xValues.get();
377 ErrCode nErr = pEdit->Read( rStream, rBaseURL, EETextFormat::Html, pAttributes );
379 pEdit->SetHtmlImportHdl( aOldLink );
380 // Create column width
381 Adjust();
382 OutputDevice* pDefaultDev = Application::GetDefaultDevice();
383 sal_uInt16 nCount = maColOffset.size();
384 sal_uLong nOff = maColOffset[0];
385 Size aSize;
386 for ( sal_uInt16 j = 1; j < nCount; j++ )
388 aSize.setWidth( maColOffset[j] - nOff );
389 aSize = pDefaultDev->PixelToLogic( aSize, MapMode( MapUnit::MapTwip ) );
390 maColWidths[ j-1 ] = aSize.Width();
391 nOff = maColOffset[j];
393 return nErr;
396 const ScHTMLTable* ScHTMLLayoutParser::GetGlobalTable() const
398 return nullptr;
401 void ScHTMLLayoutParser::NewActEntry( const ScEEParseEntry* pE )
403 ScEEParser::NewActEntry( pE );
404 if ( pE )
406 if ( !pE->aSel.HasRange() )
407 { // Completely empty, following text ends up in the same paragraph!
408 mxActEntry->aSel.start = pE->aSel.end;
411 mxActEntry->aSel.CollapseToStart();
414 void ScHTMLLayoutParser::EntryEnd( ScEEParseEntry* pE, const ESelection& rSel )
416 if (rSel.end.nPara >= pE->aSel.start.nPara)
418 pE->aSel.end = rSel.end;
420 else if (rSel.start.nPara == pE->aSel.start.nPara - 1 && !pE->aSel.HasRange())
421 { // Did not attach a paragraph, but empty, do nothing
423 else
425 OSL_FAIL( "EntryEnd: EditEngine ESelection End < Start" );
429 void ScHTMLLayoutParser::NextRow( const HtmlImportInfo* pInfo )
431 if ( bInCell )
432 CloseEntry( pInfo );
433 if ( nRowMax < ++nRowCnt )
434 nRowMax = nRowCnt;
435 nColCnt = nColCntStart;
436 nColOffset = nColOffsetStart;
437 bFirstRow = false;
440 bool ScHTMLLayoutParser::SeekOffset( const ScHTMLColOffset* pOffset, sal_uInt16 nOffset,
441 SCCOL* pCol, sal_uInt16 nOffsetTol )
443 assert(pOffset && "ScHTMLLayoutParser::SeekOffset - illegal call");
444 ScHTMLColOffset::const_iterator it = pOffset->find( nOffset );
445 bool bFound = it != pOffset->end();
446 size_t nPos = it - pOffset->begin();
447 if (nPos > o3tl::make_unsigned(std::numeric_limits<SCCOL>::max()))
448 return false;
449 *pCol = static_cast<SCCOL>(nPos);
450 if ( bFound )
451 return true;
452 sal_uInt16 nCount = pOffset->size();
453 if ( !nCount )
454 return false;
455 // nPos is the position of insertion, that's where the next higher one is (or isn't)
456 if ( nPos < nCount && (((*pOffset)[nPos] - nOffsetTol) <= nOffset) )
457 return true;
458 // Not smaller than everything else? Then compare with the next lower one
459 else if ( nPos && (((*pOffset)[nPos-1] + nOffsetTol) >= nOffset) )
461 (*pCol)--;
462 return true;
464 return false;
467 void ScHTMLLayoutParser::MakeCol( ScHTMLColOffset* pOffset, sal_uInt16& nOffset,
468 sal_uInt16& nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol )
470 assert(pOffset && "ScHTMLLayoutParser::MakeCol - illegal call");
471 SCCOL nPos;
472 if ( SeekOffset( pOffset, nOffset, &nPos, nOffsetTol ) )
473 nOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
474 else
475 pOffset->insert( nOffset );
476 if ( nWidth )
478 if ( SeekOffset( pOffset, nOffset + nWidth, &nPos, nWidthTol ) )
479 nWidth = static_cast<sal_uInt16>((*pOffset)[nPos]) - nOffset;
480 else
481 pOffset->insert( nOffset + nWidth );
485 void ScHTMLLayoutParser::MakeColNoRef( ScHTMLColOffset* pOffset, sal_uInt16 nOffset,
486 sal_uInt16 nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol )
488 assert(pOffset && "ScHTMLLayoutParser::MakeColNoRef - illegal call");
489 SCCOL nPos;
490 if ( SeekOffset( pOffset, nOffset, &nPos, nOffsetTol ) )
491 nOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
492 else
493 pOffset->insert( nOffset );
494 if ( nWidth )
496 if ( !SeekOffset( pOffset, nOffset + nWidth, &nPos, nWidthTol ) )
497 pOffset->insert( nOffset + nWidth );
501 void ScHTMLLayoutParser::ModifyOffset( ScHTMLColOffset* pOffset, sal_uInt16& nOldOffset,
502 sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol )
504 assert(pOffset && "ScHTMLLayoutParser::ModifyOffset - illegal call");
505 SCCOL nPos;
506 if ( !SeekOffset( pOffset, nOldOffset, &nPos, nOffsetTol ) )
508 if ( SeekOffset( pOffset, nNewOffset, &nPos, nOffsetTol ) )
509 nNewOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
510 else
511 pOffset->insert( nNewOffset );
512 return ;
514 nOldOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
515 SCCOL nPos2;
516 if ( SeekOffset( pOffset, nNewOffset, &nPos2, nOffsetTol ) )
518 nNewOffset = static_cast<sal_uInt16>((*pOffset)[nPos2]);
519 return ;
521 tools::Long nDiff = nNewOffset - nOldOffset;
522 if ( nDiff < 0 )
526 const_cast<sal_uLong&>((*pOffset)[nPos]) += nDiff;
527 } while ( nPos-- );
529 else
533 const_cast<sal_uLong&>((*pOffset)[nPos]) += nDiff;
534 } while ( ++nPos < static_cast<sal_uInt16>(pOffset->size()) );
538 void ScHTMLLayoutParser::SkipLocked( ScEEParseEntry* pE, bool bJoin )
540 if ( !mpDoc->ValidCol(pE->nCol) )
541 return;
543 // Or else this would create a wrong value at ScAddress (chance for an infinite loop)!
544 bool bBadCol = false;
545 bool bAgain;
547 SCCOL nEndCol(0);
548 SCROW nEndRow(0);
549 bool bFail = o3tl::checked_add<SCCOL>(pE->nCol, pE->nColOverlap - 1, nEndCol) ||
550 o3tl::checked_add<SCROW>(pE->nRow, pE->nRowOverlap - 1, nEndRow);
552 if (bFail || nEndRow > mpDoc->MaxRow())
554 SAL_WARN("sc", "invalid range: " << pE->nCol << " " << pE->nColOverlap <<
555 " " << pE->nRow << " " << pE->nRowOverlap);
556 return;
559 ScRange aRange(pE->nCol, pE->nRow, 0, nEndCol, nEndRow, 0);
562 bAgain = false;
563 for ( size_t i = 0, nRanges = xLockedList->size(); i < nRanges; ++i )
565 ScRange & rR = (*xLockedList)[i];
566 if ( rR.Intersects( aRange ) )
568 SCCOL nTmp(0);
569 bFail = o3tl::checked_add<SCCOL>(rR.aEnd.Col(), 1, pE->nCol) ||
570 o3tl::checked_add<SCCOL>(pE->nCol, pE->nRowOverlap - 1, nTmp);
571 if ( bFail || pE->nCol > mpDoc->MaxCol() || nTmp > mpDoc->MaxCol() )
572 bBadCol = true;
573 else
575 bAgain = true;
576 aRange.aStart.SetCol( pE->nCol );
577 aRange.aEnd.SetCol( nTmp );
579 break;
582 } while ( bAgain );
583 if ( bJoin && !bBadCol )
584 xLockedList->Join( aRange );
587 void ScHTMLLayoutParser::Adjust()
589 xLockedList->RemoveAll();
591 std::stack< std::unique_ptr<ScHTMLAdjustStackEntry> > aStack;
592 sal_uInt16 nTab = 0;
593 SCCOL nLastCol = SCCOL_MAX;
594 SCROW nNextRow = 0;
595 SCROW nCurRow = 0;
596 sal_uInt16 nPageWidth = static_cast<sal_uInt16>(aPageSize.Width());
597 InnerMap* pTab = nullptr;
598 for (auto& pE : maList)
600 if ( pE->nTab < nTab )
601 { // Table finished
602 if ( !aStack.empty() )
604 std::unique_ptr<ScHTMLAdjustStackEntry> pS = std::move(aStack.top());
605 aStack.pop();
607 nLastCol = pS->nLastCol;
608 nNextRow = pS->nNextRow;
609 nCurRow = pS->nCurRow;
611 nTab = pE->nTab;
612 if (pTables)
614 OuterMap::const_iterator it = pTables->find( nTab );
615 if ( it != pTables->end() )
616 pTab = it->second.get();
620 SCROW nRow = pE->nRow;
621 if ( pE->nCol <= nLastCol )
622 { // Next row
623 if ( pE->nRow < nNextRow )
624 pE->nRow = nCurRow = nNextRow;
625 else
626 nCurRow = nNextRow = pE->nRow;
627 SCROW nR = 0;
628 if ( pTab )
630 InnerMap::const_iterator it = pTab->find( nCurRow );
631 if ( it != pTab->end() )
632 nR = it->second;
634 if ( nR )
635 nNextRow += nR;
636 else
637 nNextRow++;
639 else
640 pE->nRow = nCurRow;
641 nLastCol = pE->nCol; // Read column
642 if ( pE->nTab > nTab )
643 { // New table
644 aStack.push( std::make_unique<ScHTMLAdjustStackEntry>(
645 nLastCol, nNextRow, nCurRow ) );
646 nTab = pE->nTab;
647 if ( pTables )
649 OuterMap::const_iterator it = pTables->find( nTab );
650 if ( it != pTables->end() )
651 pTab = it->second.get();
653 // New line spacing
654 SCROW nR = 0;
655 if ( pTab )
657 InnerMap::const_iterator it = pTab->find( nCurRow );
658 if ( it != pTab->end() )
659 nR = it->second;
661 if ( nR )
662 nNextRow = nCurRow + nR;
663 else
664 nNextRow = nCurRow + 1;
666 if ( nTab == 0 )
667 pE->nWidth = nPageWidth;
668 else
669 { // Real table, no paragraphs on the field
670 if ( pTab )
672 SCROW nRowSpan = pE->nRowOverlap;
673 for ( SCROW j=0; j < nRowSpan; j++ )
674 { // RowSpan resulting from merged rows
675 SCROW nRows = 0;
676 InnerMap::const_iterator it = pTab->find( nRow+j );
677 if ( it != pTab->end() )
678 nRows = it->second;
679 if ( nRows > 1 )
681 pE->nRowOverlap += nRows - 1;
682 if ( j == 0 )
683 { // Merged rows move the next row
684 SCROW nTmp = nCurRow + nRows;
685 if ( nNextRow < nTmp )
686 nNextRow = nTmp;
692 // Real column
693 (void)SeekOffset( &maColOffset, pE->nOffset, &pE->nCol, nOffsetTolerance );
694 SCCOL nColBeforeSkip = pE->nCol;
695 SkipLocked(pE.get(), false);
696 if ( pE->nCol != nColBeforeSkip )
698 size_t nCount = maColOffset.size();
699 if (pE->nCol < 0 || nCount <= o3tl::make_unsigned(pE->nCol))
701 pE->nOffset = static_cast<sal_uInt16>(maColOffset[nCount-1]);
702 MakeCol( &maColOffset, pE->nOffset, pE->nWidth, nOffsetTolerance, nOffsetTolerance );
704 else
706 pE->nOffset = static_cast<sal_uInt16>(maColOffset[pE->nCol]);
709 SCCOL nPos;
710 if ( pE->nWidth && SeekOffset( &maColOffset, pE->nOffset + pE->nWidth, &nPos, nOffsetTolerance ) )
711 pE->nColOverlap = (nPos > pE->nCol ? nPos - pE->nCol : 1);
712 else
714 //FIXME: This may not be correct, but works anyway ...
715 pE->nColOverlap = 1;
717 SCCOL nColTmp = o3tl::saturating_add(pE->nCol, pE->nColOverlap);
718 SCROW nRowTmp = o3tl::saturating_add(pE->nRow, pE->nRowOverlap);
719 xLockedList->Join(ScRange(pE->nCol, pE->nRow, 0,
720 o3tl::saturating_sub<SCCOL>(nColTmp, 1),
721 o3tl::saturating_sub<SCROW>(nRowTmp, 1), 0));
722 // Take over MaxDimensions
723 if ( nColMax < nColTmp )
724 nColMax = nColTmp;
725 if ( nRowMax < nRowTmp )
726 nRowMax = nRowTmp;
730 sal_uInt16 ScHTMLLayoutParser::GetWidth( const ScEEParseEntry* pE )
732 if ( pE->nWidth )
733 return pE->nWidth;
734 sal_Int32 nTmp = std::min( static_cast<sal_Int32>( pE->nCol -
735 nColCntStart + pE->nColOverlap),
736 static_cast<sal_Int32>( xLocalColOffset->size() - 1));
737 SCCOL nPos = (nTmp < 0 ? 0 : static_cast<SCCOL>(nTmp));
738 sal_uInt16 nOff2 = static_cast<sal_uInt16>((*xLocalColOffset)[nPos]);
739 if ( pE->nOffset < nOff2 )
740 return nOff2 - pE->nOffset;
741 return 0;
744 void ScHTMLLayoutParser::SetWidths()
746 if ( !nTableWidth )
747 nTableWidth = static_cast<sal_uInt16>(aPageSize.Width());
748 SCCOL nColsPerRow = nMaxCol - nColCntStart;
749 if ( nColsPerRow <= 0 )
750 nColsPerRow = 1;
751 if ( xLocalColOffset->size() <= 2 )
752 { // Only PageSize, there was no width setting
753 sal_uInt16 nWidth = nTableWidth / static_cast<sal_uInt16>(nColsPerRow);
754 sal_uInt16 nOff = nColOffsetStart;
755 xLocalColOffset->clear();
756 for (int nCol = 0; nCol <= nColsPerRow; ++nCol, nOff = nOff + nWidth)
758 MakeColNoRef( xLocalColOffset.get(), nOff, 0, 0, 0 );
760 nTableWidth = static_cast<sal_uInt16>(xLocalColOffset->back() - xLocalColOffset->front());
761 const auto nColsAvailable = xLocalColOffset->size();
762 for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
764 auto& pE = maList[ i ];
765 if ( pE->nTab == nTable )
767 const size_t nColRequested = pE->nCol - nColCntStart;
768 if (nColRequested < nColsAvailable)
769 pE->nOffset = static_cast<sal_uInt16>((*xLocalColOffset)[nColRequested]);
770 else
771 SAL_WARN("sc", "missing information for column: " << nColRequested);
772 pE->nWidth = 0; // to be recalculated later
776 else
777 { // Some without width
778 // Why actually no pE?
779 if ( nFirstTableCell < maList.size() )
781 std::unique_ptr<sal_uInt16[]> pOffsets(new sal_uInt16[ nColsPerRow+1 ]);
782 memset( pOffsets.get(), 0, (nColsPerRow+1) * sizeof(sal_uInt16) );
783 std::unique_ptr<sal_uInt16[]> pWidths(new sal_uInt16[ nColsPerRow ]);
784 memset( pWidths.get(), 0, nColsPerRow * sizeof(sal_uInt16) );
785 pOffsets[0] = nColOffsetStart;
786 for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
788 auto& pE = maList[ i ];
789 if ( pE->nTab == nTable && pE->nWidth )
791 SCCOL nCol = pE->nCol - nColCntStart;
792 if (nCol >= 0 && nCol < nColsPerRow)
794 if ( pE->nColOverlap == 1 )
796 if ( pWidths[nCol] < pE->nWidth )
797 pWidths[nCol] = pE->nWidth;
799 else
800 { // try to find a single undefined width
801 sal_uInt16 nTotal = 0;
802 bool bFound = false;
803 SCCOL nHere = 0;
804 SCCOL nStop = std::min( static_cast<SCCOL>(nCol + pE->nColOverlap), nColsPerRow );
805 for ( ; nCol < nStop; nCol++ )
807 if ( pWidths[nCol] )
808 nTotal = nTotal + pWidths[nCol];
809 else
811 if ( bFound )
813 bFound = false;
814 break; // for
816 bFound = true;
817 nHere = nCol;
820 if ( bFound && pE->nWidth > nTotal )
821 pWidths[nHere] = pE->nWidth - nTotal;
826 sal_uInt16 nWidths = 0;
827 sal_uInt16 nUnknown = 0;
828 for (SCCOL nCol = 0; nCol < nColsPerRow; nCol++)
830 if ( pWidths[nCol] )
831 nWidths = nWidths + pWidths[nCol];
832 else
833 nUnknown++;
835 if ( nUnknown )
837 sal_uInt16 nW = ((nWidths < nTableWidth) ?
838 ((nTableWidth - nWidths) / nUnknown) :
839 (nTableWidth / nUnknown));
840 for (SCCOL nCol = 0; nCol < nColsPerRow; nCol++)
842 if ( !pWidths[nCol] )
843 pWidths[nCol] = nW;
846 for (int nCol = 1; nCol <= nColsPerRow; nCol++)
848 pOffsets[nCol] = pOffsets[nCol-1] + pWidths[nCol-1];
850 xLocalColOffset->clear();
851 for (int nCol = 0; nCol <= nColsPerRow; nCol++)
853 MakeColNoRef( xLocalColOffset.get(), pOffsets[nCol], 0, 0, 0 );
855 nTableWidth = pOffsets[nColsPerRow] - pOffsets[0];
857 for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
859 auto& pE = maList[ i ];
860 if (pE->nTab != nTable)
861 continue;
862 SCCOL nCol = pE->nCol - nColCntStart;
863 OSL_ENSURE( nCol < nColsPerRow, "ScHTMLLayoutParser::SetWidths: column overflow" );
864 if (nCol >= nColsPerRow)
865 continue;
866 if (nCol < 0)
868 SAL_WARN("sc", "negative offset: " << nCol);
869 continue;
871 pE->nOffset = pOffsets[nCol];
872 nCol = nCol + pE->nColOverlap;
873 if ( nCol > nColsPerRow )
874 nCol = nColsPerRow;
875 if (nCol < 0)
877 SAL_WARN("sc", "negative offset: " << nCol);
878 continue;
880 pE->nWidth = pOffsets[nCol] - pE->nOffset;
884 if ( !xLocalColOffset->empty() )
886 sal_uInt16 nMax = static_cast<sal_uInt16>(xLocalColOffset->back());
887 if ( aPageSize.Width() < nMax )
888 aPageSize.setWidth( nMax );
889 if (nTableLevel == 0)
891 // Local table is very outer table, create missing offsets.
892 for (auto it = xLocalColOffset->begin(); it != xLocalColOffset->end(); ++it)
894 // Only exact offsets, do not use MakeColNoRef().
895 maColOffset.insert(*it);
899 for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
901 auto& pE = maList[ i ];
902 if ( pE->nTab == nTable )
904 if ( !pE->nWidth )
906 pE->nWidth = GetWidth(pE.get());
907 OSL_ENSURE( pE->nWidth, "SetWidths: pE->nWidth == 0" );
909 MakeCol( &maColOffset, pE->nOffset, pE->nWidth, nOffsetTolerance, nOffsetTolerance );
914 void ScHTMLLayoutParser::Colonize( ScEEParseEntry* pE )
916 if ( pE->nCol == SCCOL_MAX )
917 pE->nCol = nColCnt;
918 if ( pE->nRow == SCROW_MAX )
919 pE->nRow = nRowCnt;
920 SCCOL nCol = pE->nCol;
921 SkipLocked( pE ); // Change of columns to the right
923 if ( nCol < pE->nCol )
924 { // Replaced
925 nCol = pE->nCol - nColCntStart;
926 SCCOL nCount = static_cast<SCCOL>(xLocalColOffset->size());
927 if (nCol >= 0 && nCol < nCount)
928 nColOffset = static_cast<sal_uInt16>((*xLocalColOffset)[nCol]);
929 else
930 nColOffset = static_cast<sal_uInt16>((*xLocalColOffset)[nCount - 1]);
932 pE->nOffset = nColOffset;
933 sal_uInt16 nWidth = GetWidth( pE );
934 MakeCol( xLocalColOffset.get(), pE->nOffset, nWidth, nOffsetTolerance, nOffsetTolerance );
935 if ( pE->nWidth )
936 pE->nWidth = nWidth;
937 nColOffset = pE->nOffset + nWidth;
938 if ( nTableWidth < nColOffset - nColOffsetStart )
939 nTableWidth = nColOffset - nColOffsetStart;
942 void ScHTMLLayoutParser::CloseEntry( const HtmlImportInfo* pInfo )
944 bInCell = false;
945 if ( bTabInTabCell )
946 { // From the stack in TableOff
947 bTabInTabCell = false;
948 SAL_WARN_IF(maList.empty(), "sc", "unexpected close entry without open");
949 NewActEntry(maList.empty() ? nullptr : maList.back().get()); // New free flying mxActEntry
950 return ;
952 if (mxActEntry->nTab == 0)
953 mxActEntry->nWidth = static_cast<sal_uInt16>(aPageSize.Width());
954 Colonize(mxActEntry.get());
955 nColCnt = mxActEntry->nCol + mxActEntry->nColOverlap;
956 if ( nMaxCol < nColCnt )
957 nMaxCol = nColCnt; // TableStack MaxCol
958 if ( nColMax < nColCnt )
959 nColMax = nColCnt; // Global MaxCol for ScEEParser GetDimensions!
960 EntryEnd(mxActEntry.get(), pInfo->aSelection);
961 ESelection& rSel = mxActEntry->aSel;
962 while ( rSel.start.nPara < rSel.end.nPara
963 && pEdit->GetTextLen( rSel.start.nPara ) == 0 )
964 { // Strip preceding empty paragraphs
965 rSel.start.nPara++;
967 while ( rSel.end.nIndex == 0 && rSel.end.nPara > rSel.start.nPara )
968 { // Strip successive empty paragraphs
969 rSel.end.nPara--;
970 rSel.end.nIndex = pEdit->GetTextLen( rSel.end.nPara );
972 if ( rSel.start.nPara > rSel.end.nPara )
973 { // Gives GPF in CreateTextObject
974 OSL_FAIL( "CloseEntry: EditEngine ESelection Start > End" );
975 rSel.end.nPara = rSel.start.nPara;
977 if ( rSel.HasRange() )
978 mxActEntry->aItemSet.Put( ScLineBreakCell(true) );
979 maList.push_back(mxActEntry);
980 NewActEntry(mxActEntry.get()); // New free flying mxActEntry
983 IMPL_LINK( ScHTMLLayoutParser, HTMLImportHdl, HtmlImportInfo&, rInfo, void )
985 switch ( rInfo.eState )
987 case HtmlImportState::NextToken:
988 ProcToken( &rInfo );
989 break;
990 case HtmlImportState::Start:
991 break;
992 case HtmlImportState::End:
993 if (rInfo.aSelection.end.nIndex)
995 // If text remains: create paragraph, without calling CloseEntry().
996 if( bInCell ) // ...but only in opened table cells.
998 bInCell = false;
999 NextRow( &rInfo );
1000 bInCell = true;
1002 CloseEntry( &rInfo );
1004 while ( nTableLevel > 0 )
1005 TableOff( &rInfo ); // close tables, if </TABLE> missing
1006 break;
1007 case HtmlImportState::SetAttr:
1008 break;
1009 case HtmlImportState::InsertText:
1010 break;
1011 case HtmlImportState::InsertPara:
1012 if ( nTableLevel < 1 )
1014 CloseEntry( &rInfo );
1015 NextRow( &rInfo );
1017 break;
1018 case HtmlImportState::InsertField:
1019 break;
1020 default:
1021 OSL_FAIL("HTMLImportHdl: unknown ImportInfo.eState");
1025 void ScHTMLLayoutParser::HandleDataSheetsAttributes(const HTMLOptions& rOptions)
1027 for (const auto& rOption : rOptions)
1029 switch (rOption.GetToken())
1031 case HtmlOptionId::DSVAL:
1033 ParseDataSheetsValue(rOption.GetString(), mxActEntry->pValStr, mxActEntry->pNumStr);
1034 break;
1036 case HtmlOptionId::DSNUM:
1038 ParseDataSheetsNumberformat(rOption.GetString(), mxActEntry->pNumStr);
1039 break;
1041 case HtmlOptionId::DSFORMULA:
1043 ParseDataSheetsFormula(rOption.GetString(), mxActEntry->moFormulaStr,
1044 mxActEntry->moFormulaGrammar);
1045 break;
1047 default:
1048 break;
1053 void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo )
1055 if ( bInCell )
1056 CloseEntry( pInfo );
1057 if ( !nTableLevel )
1059 OSL_FAIL( "dumbo doc! <TH> or <TD> without previous <TABLE>" );
1060 TableOn( pInfo );
1062 bInCell = true;
1063 bool bHorJustifyCenterTH = (pInfo->nToken == HtmlTokenId::TABLEHEADER_ON);
1064 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1065 for (const auto & rOption : rOptions)
1067 switch( rOption.GetToken() )
1069 case HtmlOptionId::COLSPAN:
1071 sal_Int32 nColOverlap = rOption.GetString().toInt32();
1072 if (nColOverlap >= 0 && nColOverlap <= mpDoc->MaxCol())
1073 mxActEntry->nColOverlap = static_cast<SCCOL>(nColOverlap);
1074 else
1075 SAL_WARN("sc", "ScHTMLLayoutParser::TableDataOn ignoring colspan: " << nColOverlap);
1077 break;
1078 case HtmlOptionId::ROWSPAN:
1080 sal_Int32 nRowOverlap = rOption.GetString().toInt32();
1081 if (nRowOverlap >= 0 && nRowOverlap <= mpDoc->MaxRow())
1082 mxActEntry->nRowOverlap = static_cast<SCROW>(nRowOverlap);
1083 else
1084 SAL_WARN("sc", "ScHTMLLayoutParser::TableDataOn ignoring rowspan: " << nRowOverlap);
1085 if (comphelper::IsFuzzing())
1086 mxActEntry->nRowOverlap = std::min(mxActEntry->nRowOverlap, sal_Int32(1024));
1088 break;
1089 case HtmlOptionId::ALIGN:
1091 bHorJustifyCenterTH = false;
1092 SvxCellHorJustify eVal;
1093 const OUString& rOptVal = rOption.GetString();
1094 if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
1095 eVal = SvxCellHorJustify::Right;
1096 else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_center ) )
1097 eVal = SvxCellHorJustify::Center;
1098 else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
1099 eVal = SvxCellHorJustify::Left;
1100 else
1101 eVal = SvxCellHorJustify::Standard;
1102 if ( eVal != SvxCellHorJustify::Standard )
1103 mxActEntry->aItemSet.Put(SvxHorJustifyItem(eVal, ATTR_HOR_JUSTIFY));
1105 break;
1106 case HtmlOptionId::VALIGN:
1108 SvxCellVerJustify eVal;
1109 const OUString& rOptVal = rOption.GetString();
1110 if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_top ) )
1111 eVal = SvxCellVerJustify::Top;
1112 else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_middle ) )
1113 eVal = SvxCellVerJustify::Center;
1114 else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_bottom ) )
1115 eVal = SvxCellVerJustify::Bottom;
1116 else
1117 eVal = SvxCellVerJustify::Standard;
1118 mxActEntry->aItemSet.Put(SvxVerJustifyItem(eVal, ATTR_VER_JUSTIFY));
1120 break;
1121 case HtmlOptionId::WIDTH:
1123 mxActEntry->nWidth = GetWidthPixel(rOption);
1125 break;
1126 case HtmlOptionId::BGCOLOR:
1128 Color aColor;
1129 rOption.GetColor( aColor );
1130 mxActEntry->aItemSet.Put(SvxBrushItem(aColor, ATTR_BACKGROUND));
1132 break;
1133 case HtmlOptionId::SDVAL:
1135 mxActEntry->pValStr = rOption.GetString();
1137 break;
1138 case HtmlOptionId::SDNUM:
1140 mxActEntry->pNumStr = rOption.GetString();
1142 break;
1143 default: break;
1147 HandleDataSheetsAttributes(rOptions);
1149 mxActEntry->nCol = nColCnt;
1150 mxActEntry->nRow = nRowCnt;
1151 mxActEntry->nTab = nTable;
1153 if ( bHorJustifyCenterTH )
1154 mxActEntry->aItemSet.Put(
1155 SvxHorJustifyItem( SvxCellHorJustify::Center, ATTR_HOR_JUSTIFY) );
1158 void ScHTMLLayoutParser::SpanOn(HtmlImportInfo* pInfo)
1160 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1161 HandleDataSheetsAttributes(rOptions);
1164 void ScHTMLLayoutParser::TableRowOn( const HtmlImportInfo* pInfo )
1166 if ( nColCnt > nColCntStart )
1167 NextRow( pInfo ); // The optional TableRowOff wasn't there
1168 nColOffset = nColOffsetStart;
1171 void ScHTMLLayoutParser::TableRowOff( const HtmlImportInfo* pInfo )
1173 NextRow( pInfo );
1176 void ScHTMLLayoutParser::TableDataOff( const HtmlImportInfo* pInfo )
1178 if ( bInCell )
1179 CloseEntry( pInfo ); // Only if it really was one
1182 void ScHTMLLayoutParser::TableOn( HtmlImportInfo* pInfo )
1184 if ( ++nTableLevel > 1 )
1185 { // Table in Table
1186 sal_uInt16 nTmpColOffset = nColOffset; // Will be changed in Colonize()
1187 Colonize(mxActEntry.get());
1188 aTableStack.push( std::make_unique<ScHTMLTableStackEntry>(
1189 mxActEntry, xLockedList, xLocalColOffset, nFirstTableCell,
1190 nRowCnt, nColCntStart, nMaxCol, nTable,
1191 nTableWidth, nColOffset, nColOffsetStart,
1192 bFirstRow ) );
1193 sal_uInt16 nLastWidth = nTableWidth;
1194 nTableWidth = GetWidth(mxActEntry.get());
1195 if ( nTableWidth == nLastWidth && nMaxCol - nColCntStart > 1 )
1196 { // There must be more than one, so this one cannot be enough
1197 nTableWidth = nLastWidth / static_cast<sal_uInt16>((nMaxCol - nColCntStart));
1199 nLastWidth = nTableWidth;
1200 if ( pInfo->nToken == HtmlTokenId::TABLE_ON )
1201 { // It can still be TD or TH, if we didn't have a TABLE earlier
1202 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1203 for (const auto & rOption : rOptions)
1205 switch( rOption.GetToken() )
1207 case HtmlOptionId::WIDTH:
1208 { // Percent: of document width or outer cell
1209 nTableWidth = GetWidthPixel( rOption );
1211 break;
1212 case HtmlOptionId::BORDER:
1213 // Border is: ((pOption->GetString().Len() == 0) || (pOption->GetNumber() != 0));
1214 break;
1215 default: break;
1219 bInCell = false;
1220 if ( bTabInTabCell && (nTableWidth >= nLastWidth) )
1221 { // Multiple tables in one cell, underneath each other
1222 bTabInTabCell = false;
1223 NextRow( pInfo );
1225 else
1226 { // It start's in this cell or next to each other
1227 bTabInTabCell = false;
1228 nColCntStart = nColCnt;
1229 nColOffset = nTmpColOffset;
1230 nColOffsetStart = nColOffset;
1233 NewActEntry(!maList.empty() ? maList.back().get() : nullptr); // New free flying mxActEntry
1234 xLockedList = new ScRangeList;
1236 else
1237 { // Simple table at the document level
1238 EntryEnd(mxActEntry.get(), pInfo->aSelection);
1239 if (mxActEntry->aSel.HasRange())
1240 { // Flying text left
1241 CloseEntry( pInfo );
1242 NextRow( pInfo );
1244 aTableStack.push( std::make_unique<ScHTMLTableStackEntry>(
1245 mxActEntry, xLockedList, xLocalColOffset, nFirstTableCell,
1246 nRowCnt, nColCntStart, nMaxCol, nTable,
1247 nTableWidth, nColOffset, nColOffsetStart,
1248 bFirstRow ) );
1249 // As soon as we have multiple tables we need to be tolerant with the offsets.
1250 if (nMaxTable > 0)
1251 nOffsetTolerance = SC_HTML_OFFSET_TOLERANCE_LARGE;
1252 nTableWidth = 0;
1253 if ( pInfo->nToken == HtmlTokenId::TABLE_ON )
1255 // It can still be TD or TH, if we didn't have a TABLE earlier
1256 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1257 for (const auto & rOption : rOptions)
1259 switch( rOption.GetToken() )
1261 case HtmlOptionId::WIDTH:
1262 { // Percent: of document width or outer cell
1263 nTableWidth = GetWidthPixel( rOption );
1265 break;
1266 case HtmlOptionId::BORDER:
1267 //BorderOn is: ((pOption->GetString().Len() == 0) || (pOption->GetNumber() != 0));
1268 break;
1269 default: break;
1274 nTable = ++nMaxTable;
1275 bFirstRow = true;
1276 nFirstTableCell = maList.size();
1278 xLocalColOffset.reset(new ScHTMLColOffset);
1279 MakeColNoRef( xLocalColOffset.get(), nColOffsetStart, 0, 0, 0 );
1282 void ScHTMLLayoutParser::TableOff( const HtmlImportInfo* pInfo )
1284 if ( bInCell )
1285 CloseEntry( pInfo );
1286 if ( nColCnt > nColCntStart )
1287 TableRowOff( pInfo ); // The optional TableRowOff wasn't
1288 if ( !nTableLevel )
1290 OSL_FAIL( "dumbo doc! </TABLE> without opening <TABLE>" );
1291 return ;
1293 if ( --nTableLevel > 0 )
1294 { // Table in Table done
1295 if ( !aTableStack.empty() )
1297 std::unique_ptr<ScHTMLTableStackEntry> pS = std::move(aTableStack.top());
1298 aTableStack.pop();
1300 auto& pE = pS->xCellEntry;
1301 SCROW nRows = nRowCnt - pS->nRowCnt;
1302 if ( nRows > 1 )
1303 { // Insert size of table at this position
1304 SCROW nRow = pS->nRowCnt;
1305 sal_uInt16 nTab = pS->nTable;
1306 if ( !pTables )
1307 pTables.reset( new OuterMap );
1308 // Height of outer table
1309 OuterMap::const_iterator it = pTables->find( nTab );
1310 InnerMap* pTab1;
1311 if ( it == pTables->end() )
1313 pTab1 = new InnerMap;
1314 (*pTables)[ nTab ].reset(pTab1);
1316 else
1317 pTab1 = it->second.get();
1318 SCROW nRowSpan = pE->nRowOverlap;
1319 using SCUROW = std::make_unsigned_t<SCROW>;
1320 SCUROW nRowKGV;
1321 SCROW nRowsPerRow1; // Outer table
1322 SCROW nRowsPerRow2; // Inner table
1323 if ( nRowSpan > 1 )
1324 { // LCM to which we can map the inner and outer rows
1325 nRowKGV = std::lcm<SCUROW>(nRowSpan, nRows);
1326 nRowsPerRow1 = nRowKGV / nRowSpan;
1327 nRowsPerRow2 = nRowKGV / nRows;
1329 else
1331 nRowKGV = nRowsPerRow1 = nRows;
1332 nRowsPerRow2 = 1;
1334 InnerMap* pTab2 = nullptr;
1335 if ( nRowsPerRow2 > 1 )
1336 { // Height of the inner table
1337 pTab2 = new InnerMap;
1338 (*pTables)[ nTable ].reset(pTab2);
1340 // Abuse void* Data entry of the Table class for height mapping
1341 if ( nRowKGV > 1 )
1343 if ( nRowsPerRow1 > 1 )
1344 { // Outer
1345 for ( SCROW j=0; j < nRowSpan; j++ )
1347 sal_uLong nRowKey = nRow + j;
1348 SCROW nR = (*pTab1)[ nRowKey ];
1349 if ( !nR )
1350 (*pTab1)[ nRowKey ] = nRowsPerRow1;
1351 else if ( nRowsPerRow1 > nR )
1352 (*pTab1)[ nRowKey ] = nRowsPerRow1;
1353 //TODO: How can we improve on this?
1354 else if ( nRowsPerRow1 < nR && nRowSpan == 1
1355 && nTable == nMaxTable )
1356 { // Still some space left, merge in a better way (if possible)
1357 SCROW nAdd = nRowsPerRow1 - (nR % nRowsPerRow1);
1358 nR += nAdd;
1359 if ( (nR % nRows) == 0 )
1360 { // Only if representable
1361 SCROW nR2 = (*pTab1)[ nRowKey+1 ];
1362 if ( nR2 > nAdd )
1363 { // Only if we really have enough space
1364 (*pTab1)[ nRowKey ] = nR;
1365 (*pTab1)[ nRowKey+1 ] = nR2 - nAdd;
1366 nRowsPerRow2 = nR / nRows;
1372 if ( nRowsPerRow2 > 1 )
1373 { // Inner
1374 if ( !pTab2 )
1375 { // nRowsPerRow2 could be've been incremented
1376 pTab2 = new InnerMap;
1377 (*pTables)[ nTable ].reset(pTab2);
1379 for ( SCROW j=0; j < nRows; j++ )
1381 sal_uLong nRowKey = nRow + j;
1382 (*pTab2)[ nRowKey ] = nRowsPerRow2;
1388 SetWidths();
1390 if ( !pE->nWidth )
1391 pE->nWidth = nTableWidth;
1392 else if ( pE->nWidth < nTableWidth )
1394 sal_uInt16 nOldOffset = pE->nOffset + pE->nWidth;
1395 sal_uInt16 nNewOffset = pE->nOffset + nTableWidth;
1396 ModifyOffset( pS->xLocalColOffset.get(), nOldOffset, nNewOffset, nOffsetTolerance );
1397 sal_uInt16 nTmp = nNewOffset - pE->nOffset - pE->nWidth;
1398 pE->nWidth = nNewOffset - pE->nOffset;
1399 pS->nTableWidth = pS->nTableWidth + nTmp;
1400 if ( pS->nColOffset >= nOldOffset )
1401 pS->nColOffset = pS->nColOffset + nTmp;
1404 nColCnt = pE->nCol + pE->nColOverlap;
1405 nRowCnt = pS->nRowCnt;
1406 nColCntStart = pS->nColCntStart;
1407 nMaxCol = pS->nMaxCol;
1408 nTable = pS->nTable;
1409 nTableWidth = pS->nTableWidth;
1410 nFirstTableCell = pS->nFirstTableCell;
1411 nColOffset = pS->nColOffset;
1412 nColOffsetStart = pS->nColOffsetStart;
1413 bFirstRow = pS->bFirstRow;
1414 xLockedList = pS->xLockedList;
1415 xLocalColOffset = pS->xLocalColOffset;
1416 // mxActEntry is kept around if a table is started in the same row
1417 // (anything's possible in HTML); will be deleted by CloseEntry
1418 mxActEntry = pE;
1420 bTabInTabCell = true;
1421 bInCell = true;
1423 else
1424 { // Simple table finished
1425 SetWidths();
1426 nMaxCol = 0;
1427 nTable = 0;
1428 if ( !aTableStack.empty() )
1430 ScHTMLTableStackEntry* pS = aTableStack.top().get();
1431 xLocalColOffset = std::move(pS->xLocalColOffset);
1432 aTableStack.pop();
1437 void ScHTMLLayoutParser::Image( HtmlImportInfo* pInfo )
1439 mxActEntry->maImageList.push_back(std::make_unique<ScHTMLImage>());
1440 ScHTMLImage* pImage = mxActEntry->maImageList.back().get();
1441 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1442 for (const auto & rOption : rOptions)
1444 switch( rOption.GetToken() )
1446 case HtmlOptionId::SRC:
1448 pImage->aURL = INetURLObject::GetAbsURL( aBaseURL, rOption.GetString() );
1450 break;
1451 case HtmlOptionId::ALT:
1453 if (!mxActEntry->bHasGraphic)
1454 { // ALT text only if not any image loaded
1455 if (!mxActEntry->aAltText.isEmpty())
1456 mxActEntry->aAltText += "; ";
1458 mxActEntry->aAltText += rOption.GetString();
1461 break;
1462 case HtmlOptionId::WIDTH:
1464 pImage->aSize.setWidth( static_cast<tools::Long>(rOption.GetNumber()) );
1466 break;
1467 case HtmlOptionId::HEIGHT:
1469 pImage->aSize.setHeight( static_cast<tools::Long>(rOption.GetNumber()) );
1471 break;
1472 case HtmlOptionId::HSPACE:
1474 pImage->aSpace.setX( static_cast<tools::Long>(rOption.GetNumber()) );
1476 break;
1477 case HtmlOptionId::VSPACE:
1479 pImage->aSpace.setY( static_cast<tools::Long>(rOption.GetNumber()) );
1481 break;
1482 default: break;
1485 if (pImage->aURL.isEmpty())
1487 OSL_FAIL( "Image: graphic without URL ?!?" );
1488 return ;
1491 if (comphelper::LibreOfficeKit::isActive())
1493 INetURLObject aURL(pImage->aURL);
1494 if (HostFilter::isForbidden(aURL.GetHost()))
1495 SfxLokHelper::sendNetworkAccessError("paste");
1498 sal_uInt16 nFormat;
1499 std::optional<Graphic> oGraphic(std::in_place);
1500 GraphicFilter& rFilter = GraphicFilter::GetGraphicFilter();
1501 if ( ERRCODE_NONE != GraphicFilter::LoadGraphic( pImage->aURL, pImage->aFilterName,
1502 *oGraphic, &rFilter, &nFormat ) )
1504 return ; // Bad luck
1506 if (!mxActEntry->bHasGraphic)
1507 { // discard any ALT text in this cell if we have any image
1508 mxActEntry->bHasGraphic = true;
1509 mxActEntry->aAltText.clear();
1511 pImage->aFilterName = rFilter.GetImportFormatName( nFormat );
1512 pImage->oGraphic = std::move( oGraphic );
1513 if ( !(pImage->aSize.Width() && pImage->aSize.Height()) )
1515 OutputDevice* pDefaultDev = Application::GetDefaultDevice();
1516 pImage->aSize = pDefaultDev->LogicToPixel( pImage->oGraphic->GetPrefSize(),
1517 pImage->oGraphic->GetPrefMapMode() );
1519 if (mxActEntry->maImageList.empty())
1520 return;
1522 tools::Long nWidth = 0;
1523 for (const std::unique_ptr<ScHTMLImage> & pI : mxActEntry->maImageList)
1525 if ( pI->nDir & nHorizontal )
1526 nWidth += pI->aSize.Width() + 2 * pI->aSpace.X();
1527 else
1528 nWidth = 0;
1530 if ( mxActEntry->nWidth
1531 && (nWidth + pImage->aSize.Width() + 2 * pImage->aSpace.X()
1532 >= mxActEntry->nWidth) )
1533 mxActEntry->maImageList.back()->nDir = nVertical;
1536 void ScHTMLLayoutParser::ColOn( HtmlImportInfo* pInfo )
1538 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1539 for (const auto & rOption : rOptions)
1541 if( rOption.GetToken() == HtmlOptionId::WIDTH )
1543 sal_uInt16 nVal = GetWidthPixel( rOption );
1544 MakeCol( xLocalColOffset.get(), nColOffset, nVal, 0, 0 );
1545 nColOffset = nColOffset + nVal;
1550 sal_uInt16 ScHTMLLayoutParser::GetWidthPixel( const HTMLOption& rOption )
1552 const OUString& rOptVal = rOption.GetString();
1553 if ( rOptVal.indexOf('%') != -1 )
1554 { // Percent
1555 sal_uInt16 nW = (nTableWidth ? nTableWidth : static_cast<sal_uInt16>(aPageSize.Width()));
1556 return static_cast<sal_uInt16>((rOption.GetNumber() * nW) / 100);
1558 else
1560 if ( rOptVal.indexOf('*') != -1 )
1561 { // Relative to what?
1562 // TODO: Collect all relative values in ColArray and then MakeCol
1563 return 0;
1565 else
1566 return static_cast<sal_uInt16>(rOption.GetNumber()); // Pixel
1570 void ScHTMLLayoutParser::AnchorOn( HtmlImportInfo* pInfo )
1572 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1573 for (const auto & rOption : rOptions)
1575 if( rOption.GetToken() == HtmlOptionId::NAME )
1576 mxActEntry->pName = rOption.GetString();
1580 bool ScHTMLLayoutParser::IsAtBeginningOfText( const HtmlImportInfo* pInfo )
1582 ESelection& rSel = mxActEntry->aSel;
1583 return rSel.start.nPara == rSel.end.nPara &&
1584 rSel.start.nPara <= pInfo->aSelection.end.nPara &&
1585 pEdit->GetTextLen( rSel.start.nPara ) == 0;
1588 void ScHTMLLayoutParser::FontOn( HtmlImportInfo* pInfo )
1590 if ( !IsAtBeginningOfText( pInfo ) )
1591 return;
1593 // Only at the start of the text; applies to whole line
1594 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1595 for (const auto & rOption : rOptions)
1597 switch( rOption.GetToken() )
1599 case HtmlOptionId::FACE :
1601 const OUString& rFace = rOption.GetString();
1602 OUStringBuffer aFontName;
1603 sal_Int32 nPos = 0;
1604 while( nPos != -1 )
1606 // Font list, VCL uses the semicolon as separator
1607 // HTML uses the comma
1608 std::u16string_view aFName = o3tl::getToken(rFace, 0, ',', nPos );
1609 aFName = comphelper::string::strip(aFName, ' ');
1610 if( !aFontName.isEmpty() )
1611 aFontName.append(";");
1612 aFontName.append(aFName);
1614 if ( !aFontName.isEmpty() )
1615 mxActEntry->aItemSet.Put( SvxFontItem( FAMILY_DONTKNOW,
1616 aFontName.makeStringAndClear(), OUString(), PITCH_DONTKNOW,
1617 RTL_TEXTENCODING_DONTKNOW, ATTR_FONT ) );
1619 break;
1620 case HtmlOptionId::SIZE :
1622 sal_uInt16 nSize = static_cast<sal_uInt16>(rOption.GetNumber());
1623 if ( nSize == 0 )
1624 nSize = 1;
1625 else if ( nSize > SC_HTML_FONTSIZES )
1626 nSize = SC_HTML_FONTSIZES;
1627 mxActEntry->aItemSet.Put( SvxFontHeightItem(
1628 maFontHeights[nSize-1], 100, ATTR_FONT_HEIGHT ) );
1630 break;
1631 case HtmlOptionId::COLOR :
1633 Color aColor;
1634 rOption.GetColor( aColor );
1635 mxActEntry->aItemSet.Put( SvxColorItem( aColor, ATTR_FONT_COLOR ) );
1637 break;
1638 default: break;
1643 void ScHTMLLayoutParser::ProcToken( HtmlImportInfo* pInfo )
1645 switch ( pInfo->nToken )
1647 case HtmlTokenId::META:
1648 if (ScDocShell* pDocSh = mpDoc->GetDocumentShell())
1650 HTMLParser* pParser = static_cast<HTMLParser*>(pInfo->pParser);
1651 uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1652 static_cast<cppu::OWeakObject*>(pDocSh->GetModel()), uno::UNO_QUERY_THROW);
1653 pParser->ParseMetaOptions(
1654 xDPS->getDocumentProperties(),
1655 pDocSh->GetHeaderAttributes() );
1657 break;
1658 case HtmlTokenId::TITLE_ON:
1660 bInTitle = true;
1661 aString.clear();
1663 break;
1664 case HtmlTokenId::TITLE_OFF:
1666 ScDocShell* pDocSh = mpDoc->GetDocumentShell();
1667 if ( bInTitle && !aString.isEmpty() && pDocSh )
1669 // Remove blanks from line breaks
1670 aString = aString.trim();
1671 uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1672 static_cast<cppu::OWeakObject*>(pDocSh->GetModel()),
1673 uno::UNO_QUERY_THROW);
1674 xDPS->getDocumentProperties()->setTitle(aString);
1676 bInTitle = false;
1678 break;
1679 case HtmlTokenId::TABLE_ON:
1681 TableOn( pInfo );
1683 break;
1684 case HtmlTokenId::COL_ON:
1686 ColOn( pInfo );
1688 break;
1689 case HtmlTokenId::TABLEHEADER_ON: // Opens row
1691 if ( bInCell )
1692 CloseEntry( pInfo );
1693 // Do not set bInCell to true, TableDataOn does that
1694 mxActEntry->aItemSet.Put(
1695 SvxWeightItem( WEIGHT_BOLD, ATTR_FONT_WEIGHT) );
1696 [[fallthrough]];
1698 case HtmlTokenId::TABLEDATA_ON: // Opens cell
1700 TableDataOn( pInfo );
1702 break;
1703 case HtmlTokenId::SPAN_ON:
1705 SpanOn(pInfo);
1707 break;
1708 case HtmlTokenId::TABLEHEADER_OFF:
1709 case HtmlTokenId::TABLEDATA_OFF: // Closes cell
1711 TableDataOff( pInfo );
1713 break;
1714 case HtmlTokenId::TABLEROW_ON: // Before first cell in row
1716 TableRowOn( pInfo );
1718 break;
1719 case HtmlTokenId::TABLEROW_OFF: // After last cell in row
1721 TableRowOff( pInfo );
1723 break;
1724 case HtmlTokenId::TABLE_OFF:
1726 TableOff( pInfo );
1728 break;
1729 case HtmlTokenId::IMAGE:
1731 Image( pInfo );
1733 break;
1734 case HtmlTokenId::PARABREAK_OFF:
1735 { // We continue vertically after an image
1736 if (!mxActEntry->maImageList.empty())
1737 mxActEntry->maImageList.back()->nDir = nVertical;
1739 break;
1740 case HtmlTokenId::ANCHOR_ON:
1742 AnchorOn( pInfo );
1744 break;
1745 case HtmlTokenId::FONT_ON :
1747 FontOn( pInfo );
1749 break;
1750 case HtmlTokenId::BIGPRINT_ON :
1752 // TODO: Remember current font size and increase by 1
1753 if ( IsAtBeginningOfText( pInfo ) )
1754 mxActEntry->aItemSet.Put( SvxFontHeightItem(
1755 maFontHeights[3], 100, ATTR_FONT_HEIGHT ) );
1757 break;
1758 case HtmlTokenId::SMALLPRINT_ON :
1760 // TODO: Remember current font size and decrease by 1
1761 if ( IsAtBeginningOfText( pInfo ) )
1762 mxActEntry->aItemSet.Put( SvxFontHeightItem(
1763 maFontHeights[0], 100, ATTR_FONT_HEIGHT ) );
1765 break;
1766 case HtmlTokenId::BOLD_ON :
1767 case HtmlTokenId::STRONG_ON :
1769 if ( IsAtBeginningOfText( pInfo ) )
1770 mxActEntry->aItemSet.Put( SvxWeightItem( WEIGHT_BOLD,
1771 ATTR_FONT_WEIGHT ) );
1773 break;
1774 case HtmlTokenId::ITALIC_ON :
1775 case HtmlTokenId::EMPHASIS_ON :
1776 case HtmlTokenId::ADDRESS_ON :
1777 case HtmlTokenId::BLOCKQUOTE_ON :
1778 case HtmlTokenId::BLOCKQUOTE30_ON :
1779 case HtmlTokenId::CITATION_ON :
1780 case HtmlTokenId::VARIABLE_ON :
1782 if ( IsAtBeginningOfText( pInfo ) )
1783 mxActEntry->aItemSet.Put( SvxPostureItem( ITALIC_NORMAL,
1784 ATTR_FONT_POSTURE ) );
1786 break;
1787 case HtmlTokenId::DEFINSTANCE_ON :
1789 if ( IsAtBeginningOfText( pInfo ) )
1791 mxActEntry->aItemSet.Put( SvxWeightItem( WEIGHT_BOLD,
1792 ATTR_FONT_WEIGHT ) );
1793 mxActEntry->aItemSet.Put( SvxPostureItem( ITALIC_NORMAL,
1794 ATTR_FONT_POSTURE ) );
1797 break;
1798 case HtmlTokenId::STRIKE_ON:
1799 case HtmlTokenId::STRIKETHROUGH_ON:
1800 case HtmlTokenId::DELETEDTEXT_ON:
1802 if (IsAtBeginningOfText(pInfo))
1803 mxActEntry->aItemSet.Put(SvxCrossedOutItem(STRIKEOUT_SINGLE, ATTR_FONT_CROSSEDOUT));
1805 break;
1806 case HtmlTokenId::UNDERLINE_ON :
1808 if ( IsAtBeginningOfText( pInfo ) )
1809 mxActEntry->aItemSet.Put( SvxUnderlineItem( LINESTYLE_SINGLE,
1810 ATTR_FONT_UNDERLINE ) );
1812 break;
1813 case HtmlTokenId::TEXTTOKEN:
1815 if ( bInTitle )
1816 aString += pInfo->aText;
1818 break;
1819 default: ;
1823 // HTML DATA QUERY PARSER
1825 template< typename Type >
1826 static Type getLimitedValue( const Type& rValue, const Type& rMin, const Type& rMax )
1827 { return std::clamp( rValue, rMin, rMax ); }
1829 ScHTMLEntry::ScHTMLEntry( const SfxItemSet& rItemSet, ScHTMLTableId nTableId ) :
1830 ScEEParseEntry( rItemSet ),
1831 mbImportAlways( false )
1833 nTab = nTableId;
1834 bEntirePara = false;
1837 bool ScHTMLEntry::HasContents() const
1839 return mbImportAlways || aSel.HasRange() || !aAltText.isEmpty() || IsTable();
1842 void ScHTMLEntry::AdjustStart( const HtmlImportInfo& rInfo )
1844 // set start position
1845 aSel.start = rInfo.aSelection.start;
1846 // adjust end position
1847 if (!aSel.IsAdjusted())
1849 aSel.CollapseToStart();
1853 void ScHTMLEntry::AdjustEnd( const HtmlImportInfo& rInfo )
1855 OSL_ENSURE( !(rInfo.aSelection.end < aSel.end),
1856 "ScHTMLQueryParser::AdjustEntryEnd - invalid end position" );
1857 // set end position
1858 aSel.end = rInfo.aSelection.end;
1861 void ScHTMLEntry::Strip( const EditEngine& rEditEngine )
1863 // strip leading empty paragraphs
1864 while( (aSel.start.nPara < aSel.end.nPara) && (rEditEngine.GetTextLen( aSel.start.nPara ) <= aSel.start.nIndex) )
1866 ++aSel.start.nPara;
1867 aSel.start.nIndex = 0;
1869 // strip trailing empty paragraphs
1870 while( (aSel.start.nPara < aSel.end.nPara) && (aSel.end.nIndex == 0) )
1872 --aSel.end.nPara;
1873 aSel.end.nIndex = rEditEngine.GetTextLen( aSel.end.nPara );
1877 /** A map of ScHTMLTable objects.
1879 Organizes the tables with a unique table key. Stores nested tables inside
1880 the parent table and forms in this way a tree structure of tables. An
1881 instance of this class owns the contained table objects and deletes them
1882 on destruction.
1884 class ScHTMLTableMap final
1886 private:
1887 typedef std::shared_ptr< ScHTMLTable > ScHTMLTablePtr;
1888 typedef std::map< ScHTMLTableId, ScHTMLTablePtr > ScHTMLTableStdMap;
1890 public:
1891 typedef ScHTMLTableStdMap::iterator iterator;
1892 typedef ScHTMLTableStdMap::const_iterator const_iterator;
1894 private:
1895 ScHTMLTable& mrParentTable; /// Reference to parent table.
1896 ScHTMLTableStdMap maTables; /// Container for all table objects.
1897 mutable ScHTMLTable* mpCurrTable; /// Current table, used for fast search.
1899 public:
1900 explicit ScHTMLTableMap( ScHTMLTable& rParentTable );
1902 const_iterator begin() const { return maTables.begin(); }
1903 const_iterator end() const { return maTables.end(); }
1905 /** Returns the specified table.
1906 @param nTableId Unique identifier of the table.
1907 @param bDeep true = searches deep in all nested table; false = only in this container. */
1908 ScHTMLTable* FindTable( ScHTMLTableId nTableId, bool bDeep = true ) const;
1910 /** Inserts a new table into the container. This container owns the created table.
1911 @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
1912 ScHTMLTable* CreateTable( const HtmlImportInfo& rInfo, bool bPreFormText, const ScDocument& rDoc );
1914 private:
1915 /** Sets a working table with its index for search optimization. */
1916 void SetCurrTable( ScHTMLTable* pTable ) const
1917 { if( pTable ) mpCurrTable = pTable; }
1920 ScHTMLTableMap::ScHTMLTableMap( ScHTMLTable& rParentTable ) :
1921 mrParentTable(rParentTable),
1922 mpCurrTable(nullptr)
1926 ScHTMLTable* ScHTMLTableMap::FindTable( ScHTMLTableId nTableId, bool bDeep ) const
1928 ScHTMLTable* pResult = nullptr;
1929 if( mpCurrTable && (nTableId == mpCurrTable->GetTableId()) )
1930 pResult = mpCurrTable; // cached table
1931 else
1933 const_iterator aFind = maTables.find( nTableId );
1934 if( aFind != maTables.end() )
1935 pResult = aFind->second.get(); // table from this container
1938 // not found -> search deep in nested tables
1939 if( !pResult && bDeep )
1940 for( const_iterator aIter = begin(), aEnd = end(); !pResult && (aIter != aEnd); ++aIter )
1941 pResult = aIter->second->FindNestedTable( nTableId );
1943 SetCurrTable( pResult );
1944 return pResult;
1947 ScHTMLTable* ScHTMLTableMap::CreateTable( const HtmlImportInfo& rInfo, bool bPreFormText, const ScDocument& rDoc )
1949 ScHTMLTable* pTable = new ScHTMLTable( mrParentTable, rInfo, bPreFormText, rDoc );
1950 maTables[ pTable->GetTableId() ].reset( pTable );
1951 SetCurrTable( pTable );
1952 return pTable;
1955 namespace {
1957 /** Simplified forward iterator for convenience.
1959 Before the iterator can be dereferenced, it must be tested with the is()
1960 method. The iterator may be invalid directly after construction (e.g. empty
1961 container).
1963 class ScHTMLTableIterator
1965 public:
1966 /** Constructs the iterator for the passed table map.
1967 @param pTableMap Pointer to the table map (is allowed to be NULL). */
1968 explicit ScHTMLTableIterator( const ScHTMLTableMap* pTableMap );
1970 bool is() const { return mpTableMap && maIter != maEnd; }
1971 ScHTMLTable* operator->() { return maIter->second.get(); }
1972 ScHTMLTableIterator& operator++() { ++maIter; return *this; }
1974 private:
1975 ScHTMLTableMap::const_iterator maIter;
1976 ScHTMLTableMap::const_iterator maEnd;
1977 const ScHTMLTableMap* mpTableMap;
1982 ScHTMLTableIterator::ScHTMLTableIterator( const ScHTMLTableMap* pTableMap ) :
1983 mpTableMap(pTableMap)
1985 if( pTableMap )
1987 maIter = pTableMap->begin();
1988 maEnd = pTableMap->end();
1992 ScHTMLTableAutoId::ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId ) :
1993 mnTableId( rnUnusedId ),
1994 mrnUnusedId( rnUnusedId )
1996 ++mrnUnusedId;
1999 ScHTMLTable::ScHTMLTable( ScHTMLTable& rParentTable, const HtmlImportInfo& rInfo, bool bPreFormText, const ScDocument& rDoc ) :
2000 mpParentTable( &rParentTable ),
2001 maTableId( rParentTable.maTableId.mrnUnusedId ),
2002 maTableItemSet( rParentTable.GetCurrItemSet() ),
2003 mrEditEngine( rParentTable.mrEditEngine ),
2004 mrEEParseList( rParentTable.mrEEParseList ),
2005 mpCurrEntryVector( nullptr ),
2006 maSize( 1, 1 ),
2007 mpParser(rParentTable.mpParser),
2008 mrDoc(rDoc),
2009 mbBorderOn( false ),
2010 mbPreFormText( bPreFormText ),
2011 mbRowOn( false ),
2012 mbDataOn( false ),
2013 mbPushEmptyLine( false ),
2014 mbCaptionOn ( false )
2016 if( mbPreFormText )
2018 ImplRowOn();
2019 ImplDataOn( ScHTMLSize( 1, 1 ) );
2021 else
2023 ProcessFormatOptions( maTableItemSet, rInfo );
2024 const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
2025 for (const auto& rOption : rOptions)
2027 switch( rOption.GetToken() )
2029 case HtmlOptionId::BORDER:
2030 mbBorderOn = rOption.GetString().isEmpty() || (rOption.GetNumber() != 0);
2031 break;
2032 case HtmlOptionId::ID:
2033 maTableName = rOption.GetString();
2034 break;
2035 default: break;
2040 CreateNewEntry( rInfo );
2043 ScHTMLTable::ScHTMLTable(
2044 SfxItemPool& rPool,
2045 EditEngine& rEditEngine,
2046 std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
2047 ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser, const ScDocument& rDoc
2049 mpParentTable( nullptr ),
2050 maTableId( rnUnusedId ),
2051 maTableItemSet( rPool ),
2052 mrEditEngine( rEditEngine ),
2053 mrEEParseList( rEEParseList ),
2054 mpCurrEntryVector( nullptr ),
2055 maSize( 1, 1 ),
2056 mpParser(pParser),
2057 mrDoc(rDoc),
2058 mbBorderOn( false ),
2059 mbPreFormText( false ),
2060 mbRowOn( false ),
2061 mbDataOn( false ),
2062 mbPushEmptyLine( false ),
2063 mbCaptionOn ( false )
2065 // open the first "cell" of the document
2066 ImplRowOn();
2067 ImplDataOn( ScHTMLSize( 1, 1 ) );
2068 mxCurrEntry = CreateEntry();
2071 ScHTMLTable::~ScHTMLTable()
2075 const SfxItemSet& ScHTMLTable::GetCurrItemSet() const
2077 // first try cell item set, then row item set, then table item set
2078 return moDataItemSet ? *moDataItemSet : (moRowItemSet ? *moRowItemSet : maTableItemSet);
2081 ScHTMLSize ScHTMLTable::GetSpan( const ScHTMLPos& rCellPos ) const
2083 ScHTMLSize aSpan( 1, 1 );
2084 const ScRange* pRange = maVMergedCells.Find( rCellPos.MakeAddr() );
2085 if (!pRange)
2086 pRange = maHMergedCells.Find( rCellPos.MakeAddr() );
2087 if (pRange)
2088 aSpan.Set( pRange->aEnd.Col() - pRange->aStart.Col() + 1, pRange->aEnd.Row() - pRange->aStart.Row() + 1 );
2089 return aSpan;
2092 ScHTMLTable* ScHTMLTable::FindNestedTable( ScHTMLTableId nTableId ) const
2094 return mxNestedTables ? mxNestedTables->FindTable( nTableId ) : nullptr;
2097 void ScHTMLTable::PutItem( const SfxPoolItem& rItem )
2099 OSL_ENSURE( mxCurrEntry, "ScHTMLTable::PutItem - no current entry" );
2100 if( mxCurrEntry && mxCurrEntry->IsEmpty() )
2101 mxCurrEntry->GetItemSet().Put( rItem );
2104 void ScHTMLTable::PutText( const HtmlImportInfo& rInfo )
2106 OSL_ENSURE( mxCurrEntry, "ScHTMLTable::PutText - no current entry" );
2107 if( mxCurrEntry )
2109 if( !mxCurrEntry->HasContents() && IsSpaceCharInfo( rInfo ) )
2110 mxCurrEntry->AdjustStart( rInfo );
2111 else
2112 mxCurrEntry->AdjustEnd( rInfo );
2113 if (mbCaptionOn)
2114 maCaptionBuffer.append(rInfo.aText);
2119 void ScHTMLTable::InsertPara( const HtmlImportInfo& rInfo )
2121 if( mxCurrEntry && mbDataOn && !IsEmptyCell() )
2122 mxCurrEntry->SetImportAlways();
2123 PushEntry( rInfo );
2124 CreateNewEntry( rInfo );
2125 InsertLeadingEmptyLine();
2128 void ScHTMLTable::BreakOn()
2130 // empty line, if <br> is at start of cell
2131 mbPushEmptyLine = !mbPreFormText && mbDataOn && IsEmptyCell();
2134 void ScHTMLTable::HeadingOn()
2136 // call directly, InsertPara() has not been called before
2137 InsertLeadingEmptyLine();
2140 void ScHTMLTable::InsertLeadingEmptyLine()
2142 // empty line, if <p>, </p>, <h?>, or </h*> are not at start of cell
2143 mbPushEmptyLine = !mbPreFormText && mbDataOn && !IsEmptyCell();
2146 void ScHTMLTable::AnchorOn()
2148 OSL_ENSURE( mxCurrEntry, "ScHTMLTable::AnchorOn - no current entry" );
2149 // don't skip entries with single hyperlinks
2150 if( mxCurrEntry )
2151 mxCurrEntry->SetImportAlways();
2154 ScHTMLTable* ScHTMLTable::TableOn( const HtmlImportInfo& rInfo )
2156 PushEntry( rInfo );
2157 return InsertNestedTable( rInfo, false );
2160 ScHTMLTable* ScHTMLTable::TableOff( const HtmlImportInfo& rInfo )
2162 return mbPreFormText ? this : CloseTable( rInfo );
2165 void ScHTMLTable::CaptionOn()
2167 mbCaptionOn = true;
2168 maCaptionBuffer.setLength(0);
2171 void ScHTMLTable::CaptionOff()
2173 if (!mbCaptionOn)
2174 return;
2175 maCaption = maCaptionBuffer.makeStringAndClear().trim();
2176 mbCaptionOn = false;
2179 ScHTMLTable* ScHTMLTable::PreOn( const HtmlImportInfo& rInfo )
2181 PushEntry( rInfo );
2182 return InsertNestedTable( rInfo, true );
2185 ScHTMLTable* ScHTMLTable::PreOff( const HtmlImportInfo& rInfo )
2187 return mbPreFormText ? CloseTable( rInfo ) : this;
2190 void ScHTMLTable::RowOn( const HtmlImportInfo& rInfo )
2192 PushEntry( rInfo, true );
2193 if( mpParentTable && !mbPreFormText ) // no rows allowed in global and preformatted tables
2195 ImplRowOn();
2196 ProcessFormatOptions( *moRowItemSet, rInfo );
2198 CreateNewEntry( rInfo );
2201 void ScHTMLTable::RowOff( const HtmlImportInfo& rInfo )
2203 PushEntry( rInfo, true );
2204 if( mpParentTable && !mbPreFormText ) // no rows allowed in global and preformatted tables
2205 ImplRowOff();
2206 CreateNewEntry( rInfo );
2209 namespace {
2212 * Decode a number format string stored in Excel-generated HTML's CSS
2213 * region.
2215 OUString decodeNumberFormat(const OUString& rFmt)
2217 OUStringBuffer aBuf;
2218 const sal_Unicode* p = rFmt.getStr();
2219 sal_Int32 n = rFmt.getLength();
2220 for (sal_Int32 i = 0; i < n; ++i, ++p)
2222 if (*p == '\\')
2224 // Skip '\'.
2225 ++i;
2226 ++p;
2228 // Parse all subsequent digits until first non-digit is found.
2229 sal_Int32 nDigitCount = 0;
2230 const sal_Unicode* p1 = p;
2231 for (; i < n; ++i, ++p, ++nDigitCount)
2233 if (*p < '0' || '9' < *p)
2235 --i;
2236 --p;
2237 break;
2241 if (nDigitCount)
2243 // Hex-encoded character found. Decode it back into its
2244 // original character. An example of number format with
2245 // hex-encoded chars: "\0022$\0022\#\,\#\#0\.00"
2246 sal_uInt32 nVal = OUString(p1, nDigitCount).toUInt32(16);
2247 aBuf.append(static_cast<sal_Unicode>(nVal));
2250 else
2251 aBuf.append(*p);
2253 return aBuf.makeStringAndClear();
2258 void ScHTMLTable::DataOn( const HtmlImportInfo& rInfo )
2260 PushEntry( rInfo, true );
2261 if( mpParentTable && !mbPreFormText ) // no cells allowed in global and preformatted tables
2263 // read needed options from the <td> tag
2264 ScHTMLSize aSpanSize( 1, 1 );
2265 std::optional<OUString> pValStr, pNumStr;
2266 const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
2267 sal_uInt32 nNumberFormat = NUMBERFORMAT_ENTRY_NOT_FOUND;
2268 for (const auto& rOption : rOptions)
2270 switch (rOption.GetToken())
2272 case HtmlOptionId::COLSPAN:
2273 aSpanSize.mnCols = static_cast<SCCOL>( getLimitedValue<sal_Int32>( rOption.GetString().toInt32(), 1, 256 ) );
2274 break;
2275 case HtmlOptionId::ROWSPAN:
2276 aSpanSize.mnRows = static_cast<SCROW>( getLimitedValue<sal_Int32>( rOption.GetString().toInt32(), 1, 256 ) );
2277 break;
2278 case HtmlOptionId::SDVAL:
2279 pValStr = rOption.GetString();
2280 break;
2281 case HtmlOptionId::SDNUM:
2282 pNumStr = rOption.GetString();
2283 break;
2284 case HtmlOptionId::CLASS:
2286 // Pick up the number format associated with this class (if
2287 // any).
2288 const OUString& aClass = rOption.GetString();
2289 const ScHTMLStyles& rStyles = mpParser->GetStyles();
2290 const OUString& rVal = rStyles.getPropertyValue(u"td"_ustr, aClass, u"mso-number-format"_ustr);
2291 if (!rVal.isEmpty())
2293 OUString aNumFmt = decodeNumberFormat(rVal);
2295 nNumberFormat = GetFormatTable()->GetEntryKey(aNumFmt);
2296 if (nNumberFormat == NUMBERFORMAT_ENTRY_NOT_FOUND)
2298 sal_Int32 nErrPos = 0;
2299 SvNumFormatType nDummy;
2300 bool bValidFmt = GetFormatTable()->PutEntry(aNumFmt, nErrPos, nDummy, nNumberFormat);
2301 if (!bValidFmt)
2302 nNumberFormat = NUMBERFORMAT_ENTRY_NOT_FOUND;
2306 break;
2307 case HtmlOptionId::DSVAL:
2309 ParseDataSheetsValue(rOption.GetString(), pValStr, pNumStr);
2311 break;
2312 default: break;
2316 ImplDataOn( aSpanSize );
2318 if (nNumberFormat != NUMBERFORMAT_ENTRY_NOT_FOUND)
2319 moDataItemSet->Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nNumberFormat) );
2321 ProcessFormatOptions( *moDataItemSet, rInfo );
2322 CreateNewEntry( rInfo );
2323 mxCurrEntry->pValStr = std::move(pValStr);
2324 mxCurrEntry->pNumStr = std::move(pNumStr);
2326 else
2327 CreateNewEntry( rInfo );
2330 void ScHTMLTable::DataOff( const HtmlImportInfo& rInfo )
2332 PushEntry( rInfo, true );
2333 if( mpParentTable && !mbPreFormText ) // no cells allowed in global and preformatted tables
2334 ImplDataOff();
2335 CreateNewEntry( rInfo );
2338 void ScHTMLTable::BodyOn( const HtmlImportInfo& rInfo )
2340 bool bPushed = PushEntry( rInfo );
2341 if( !mpParentTable )
2343 // do not start new row, if nothing (no title) precedes the body.
2344 if( bPushed || !mbRowOn )
2345 ImplRowOn();
2346 if( bPushed || !mbDataOn )
2347 ImplDataOn( ScHTMLSize( 1, 1 ) );
2348 ProcessFormatOptions( *moDataItemSet, rInfo );
2350 CreateNewEntry( rInfo );
2353 void ScHTMLTable::BodyOff( const HtmlImportInfo& rInfo )
2355 PushEntry( rInfo );
2356 if( !mpParentTable )
2358 ImplDataOff();
2359 ImplRowOff();
2361 CreateNewEntry( rInfo );
2364 ScHTMLTable* ScHTMLTable::CloseTable( const HtmlImportInfo& rInfo )
2366 if( mpParentTable ) // not allowed to close global table
2368 PushEntry( rInfo, mbDataOn );
2369 ImplDataOff();
2370 ImplRowOff();
2371 mpParentTable->PushTableEntry( GetTableId() );
2372 mpParentTable->CreateNewEntry( rInfo );
2373 if( mbPreFormText ) // enclose preformatted table with empty lines in parent table
2374 mpParentTable->InsertLeadingEmptyLine();
2375 return mpParentTable;
2377 return this;
2380 SCCOLROW ScHTMLTable::GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const
2382 const ScSizeVec& rSizes = maCumSizes[ eOrient ];
2383 size_t nIndex = static_cast< size_t >( nCellPos );
2384 if( nIndex >= rSizes.size() ) return 0;
2385 return (nIndex == 0) ? rSizes.front() : (rSizes[ nIndex ] - rSizes[ nIndex - 1 ]);
2388 SCCOLROW ScHTMLTable::GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const
2390 const ScSizeVec& rSizes = maCumSizes[ eOrient ];
2391 size_t nBeginIdx = static_cast< size_t >( std::max< SCCOLROW >( nCellBegin, 0 ) );
2392 size_t nEndIdx = static_cast< size_t >( std::min< SCCOLROW >( nCellEnd, static_cast< SCCOLROW >( rSizes.size() ) ) );
2393 if (nBeginIdx >= nEndIdx ) return 0;
2394 return rSizes[ nEndIdx - 1 ] - ((nBeginIdx == 0) ? 0 : rSizes[ nBeginIdx - 1 ]);
2397 SCCOLROW ScHTMLTable::GetDocSize( ScHTMLOrient eOrient ) const
2399 const ScSizeVec& rSizes = maCumSizes[ eOrient ];
2400 return rSizes.empty() ? 0 : rSizes.back();
2403 ScHTMLSize ScHTMLTable::GetDocSize( const ScHTMLPos& rCellPos ) const
2405 ScHTMLSize aCellSpan = GetSpan( rCellPos );
2406 return ScHTMLSize(
2407 static_cast< SCCOL >( GetDocSize( tdCol, rCellPos.mnCol, rCellPos.mnCol + aCellSpan.mnCols ) ),
2408 static_cast< SCROW >( GetDocSize( tdRow, rCellPos.mnRow, rCellPos.mnRow + aCellSpan.mnRows ) ) );
2411 SCCOLROW ScHTMLTable::GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const
2413 return maDocBasePos.Get( eOrient ) + GetDocSize( eOrient, 0, nCellPos );
2416 ScHTMLPos ScHTMLTable::GetDocPos( const ScHTMLPos& rCellPos ) const
2418 return ScHTMLPos(
2419 static_cast< SCCOL >( GetDocPos( tdCol, rCellPos.mnCol ) ),
2420 static_cast< SCROW >( GetDocPos( tdRow, rCellPos.mnRow ) ) );
2423 void ScHTMLTable::GetDocRange( ScRange& rRange ) const
2425 rRange.aStart = rRange.aEnd = maDocBasePos.MakeAddr();
2426 ScAddress aErrorPos( ScAddress::UNINITIALIZED );
2427 if (!rRange.aEnd.Move( static_cast< SCCOL >( GetDocSize( tdCol ) ) - 1,
2428 static_cast< SCROW >( GetDocSize( tdRow ) ) - 1, 0, aErrorPos, mrDoc ))
2430 assert(!"can't move");
2434 void ScHTMLTable::ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const
2436 OSL_ENSURE( pDoc, "ScHTMLTable::ApplyCellBorders - no document" );
2437 if( pDoc && mbBorderOn )
2439 const SCCOL nLastCol = maSize.mnCols - 1;
2440 const SCROW nLastRow = maSize.mnRows - 1;
2441 const tools::Long nOuterLine = SvxBorderLineWidth::Medium;
2442 const tools::Long nInnerLine = SvxBorderLineWidth::Hairline;
2443 SvxBorderLine aOuterLine(nullptr, nOuterLine, SvxBorderLineStyle::SOLID);
2444 SvxBorderLine aInnerLine(nullptr, nInnerLine, SvxBorderLineStyle::SOLID);
2445 SvxBoxItem aBorderItem( ATTR_BORDER );
2447 for( SCCOL nCol = 0; nCol <= nLastCol; ++nCol )
2449 SvxBorderLine* pLeftLine = (nCol == 0) ? &aOuterLine : &aInnerLine;
2450 SvxBorderLine* pRightLine = (nCol == nLastCol) ? &aOuterLine : &aInnerLine;
2451 SCCOL nCellCol1 = static_cast< SCCOL >( GetDocPos( tdCol, nCol ) ) + rFirstPos.Col();
2452 SCCOL nCellCol2 = nCellCol1 + static_cast< SCCOL >( GetDocSize( tdCol, nCol ) ) - 1;
2453 for( SCROW nRow = 0; nRow <= nLastRow; ++nRow )
2455 SvxBorderLine* pTopLine = (nRow == 0) ? &aOuterLine : &aInnerLine;
2456 SvxBorderLine* pBottomLine = (nRow == nLastRow) ? &aOuterLine : &aInnerLine;
2457 SCROW nCellRow1 = GetDocPos( tdRow, nRow ) + rFirstPos.Row();
2458 SCROW nCellRow2 = nCellRow1 + GetDocSize( tdRow, nRow ) - 1;
2459 for( SCCOL nCellCol = nCellCol1; nCellCol <= nCellCol2; ++nCellCol )
2461 aBorderItem.SetLine( (nCellCol == nCellCol1) ? pLeftLine : nullptr, SvxBoxItemLine::LEFT );
2462 aBorderItem.SetLine( (nCellCol == nCellCol2) ? pRightLine : nullptr, SvxBoxItemLine::RIGHT );
2463 for( SCROW nCellRow = nCellRow1; nCellRow <= nCellRow2; ++nCellRow )
2465 aBorderItem.SetLine( (nCellRow == nCellRow1) ? pTopLine : nullptr, SvxBoxItemLine::TOP );
2466 aBorderItem.SetLine( (nCellRow == nCellRow2) ? pBottomLine : nullptr, SvxBoxItemLine::BOTTOM );
2467 pDoc->ApplyAttr( nCellCol, nCellRow, rFirstPos.Tab(), aBorderItem );
2474 for( ScHTMLTableIterator aIter( mxNestedTables.get() ); aIter.is(); ++aIter )
2475 aIter->ApplyCellBorders( pDoc, rFirstPos );
2478 SvNumberFormatter* ScHTMLTable::GetFormatTable()
2480 return mpParser->GetDoc().GetFormatTable();
2483 bool ScHTMLTable::IsEmptyCell() const
2485 return mpCurrEntryVector && mpCurrEntryVector->empty();
2488 bool ScHTMLTable::IsSpaceCharInfo( const HtmlImportInfo& rInfo )
2490 return (rInfo.nToken == HtmlTokenId::TEXTTOKEN) && (rInfo.aText.getLength() == 1) && (rInfo.aText[ 0 ] == ' ');
2493 ScHTMLTable::ScHTMLEntryPtr ScHTMLTable::CreateEntry() const
2495 return std::make_unique<ScHTMLEntry>( GetCurrItemSet() );
2498 void ScHTMLTable::CreateNewEntry( const HtmlImportInfo& rInfo )
2500 OSL_ENSURE( !mxCurrEntry, "ScHTMLTable::CreateNewEntry - old entry still present" );
2501 mxCurrEntry = CreateEntry();
2502 mxCurrEntry->aSel = rInfo.aSelection;
2505 void ScHTMLTable::ImplPushEntryToVector( ScHTMLEntryVector& rEntryVector, ScHTMLEntryPtr& rxEntry )
2507 // HTML entry list does not own the entries
2508 rEntryVector.push_back( rxEntry.get() );
2509 // mrEEParseList (reference to member of ScEEParser) owns the entries
2510 mrEEParseList.push_back(std::shared_ptr<ScEEParseEntry>(rxEntry.release()));
2513 bool ScHTMLTable::PushEntry( ScHTMLEntryPtr& rxEntry )
2515 bool bPushed = false;
2516 if( rxEntry && rxEntry->HasContents() )
2518 if( mpCurrEntryVector )
2520 if( mbPushEmptyLine )
2522 ScHTMLEntryPtr xEmptyEntry = CreateEntry();
2523 ImplPushEntryToVector( *mpCurrEntryVector, xEmptyEntry );
2524 mbPushEmptyLine = false;
2526 ImplPushEntryToVector( *mpCurrEntryVector, rxEntry );
2527 bPushed = true;
2529 else if( mpParentTable )
2531 bPushed = mpParentTable->PushEntry( rxEntry );
2533 else
2535 OSL_FAIL( "ScHTMLTable::PushEntry - cannot push entry, no parent found" );
2538 return bPushed;
2541 bool ScHTMLTable::PushEntry( const HtmlImportInfo& rInfo, bool bLastInCell )
2543 OSL_ENSURE( mxCurrEntry, "ScHTMLTable::PushEntry - no current entry" );
2544 bool bPushed = false;
2545 if( mxCurrEntry )
2547 mxCurrEntry->AdjustEnd( rInfo );
2548 mxCurrEntry->Strip( mrEditEngine );
2550 // import entry always, if it is the last in cell, and cell is still empty
2551 if( bLastInCell && IsEmptyCell() )
2553 mxCurrEntry->SetImportAlways();
2554 // don't insert empty lines before single empty entries
2555 if( mxCurrEntry->IsEmpty() )
2556 mbPushEmptyLine = false;
2559 bPushed = PushEntry( mxCurrEntry );
2560 mxCurrEntry.reset();
2562 return bPushed;
2565 void ScHTMLTable::PushTableEntry( ScHTMLTableId nTableId )
2567 OSL_ENSURE( nTableId != SC_HTML_GLOBAL_TABLE, "ScHTMLTable::PushTableEntry - cannot push global table" );
2568 if( nTableId != SC_HTML_GLOBAL_TABLE )
2570 ScHTMLEntryPtr xEntry( new ScHTMLEntry( maTableItemSet, nTableId ) );
2571 PushEntry( xEntry );
2575 ScHTMLTable* ScHTMLTable::GetExistingTable( ScHTMLTableId nTableId ) const
2577 ScHTMLTable* pTable = ((nTableId != SC_HTML_GLOBAL_TABLE) && mxNestedTables) ?
2578 mxNestedTables->FindTable( nTableId, false ) : nullptr;
2579 OSL_ENSURE( pTable || (nTableId == SC_HTML_GLOBAL_TABLE), "ScHTMLTable::GetExistingTable - table not found" );
2580 return pTable;
2583 ScHTMLTable* ScHTMLTable::InsertNestedTable( const HtmlImportInfo& rInfo, bool bPreFormText )
2585 if( !mxNestedTables )
2586 mxNestedTables.reset( new ScHTMLTableMap( *this ) );
2587 if( bPreFormText ) // enclose new preformatted table with empty lines
2588 InsertLeadingEmptyLine();
2589 return mxNestedTables->CreateTable( rInfo, bPreFormText, mrDoc );
2592 void ScHTMLTable::InsertNewCell( const ScHTMLSize& rSpanSize )
2594 ScRange* pRange;
2596 /* Find an unused cell by skipping all merged ranges that cover the
2597 current cell position stored in maCurrCell. */
2598 for (;;)
2600 pRange = maVMergedCells.Find( maCurrCell.MakeAddr() );
2601 if (!pRange)
2602 pRange = maHMergedCells.Find( maCurrCell.MakeAddr() );
2603 if (!pRange)
2604 break;
2605 maCurrCell.mnCol = pRange->aEnd.Col() + 1;
2607 mpCurrEntryVector = &maEntryMap[ maCurrCell ];
2609 /* If the new cell is merged horizontally, try to find collisions with
2610 other vertically merged ranges. In this case, shrink existing
2611 vertically merged ranges (do not shrink the new cell). */
2612 SCCOL nColEnd = maCurrCell.mnCol + rSpanSize.mnCols;
2613 for( ScAddress aAddr( maCurrCell.MakeAddr() ); aAddr.Col() < nColEnd; aAddr.IncCol() )
2614 if( (pRange = maVMergedCells.Find( aAddr )) != nullptr )
2615 pRange->aEnd.SetRow( maCurrCell.mnRow - 1 );
2617 // insert the new range into the cell lists
2618 ScRange aNewRange( maCurrCell.MakeAddr() );
2619 ScAddress aErrorPos( ScAddress::UNINITIALIZED );
2620 if (!aNewRange.aEnd.Move( rSpanSize.mnCols - 1, rSpanSize.mnRows - 1, 0, aErrorPos, mrDoc ))
2622 assert(!"can't move");
2624 if( rSpanSize.mnRows > 1 )
2626 maVMergedCells.push_back( aNewRange );
2627 /* Do not insert vertically merged ranges into maUsedCells yet,
2628 because they may be shrunken (see above). The final vertically
2629 merged ranges are inserted in FillEmptyCells(). */
2631 else
2633 if( rSpanSize.mnCols > 1 )
2634 maHMergedCells.push_back( aNewRange );
2635 /* Insert horizontally merged ranges and single cells into
2636 maUsedCells, they will not be changed anymore. */
2637 maUsedCells.Join( aNewRange );
2640 // adjust table size
2641 maSize.mnCols = std::max< SCCOL >( maSize.mnCols, aNewRange.aEnd.Col() + 1 );
2642 maSize.mnRows = std::max< SCROW >( maSize.mnRows, aNewRange.aEnd.Row() + 1 );
2645 void ScHTMLTable::ImplRowOn()
2647 if( mbRowOn )
2648 ImplRowOff();
2649 moRowItemSet.emplace( maTableItemSet );
2650 maCurrCell.mnCol = 0;
2651 mbRowOn = true;
2652 mbDataOn = false;
2655 void ScHTMLTable::ImplRowOff()
2657 if( mbDataOn )
2658 ImplDataOff();
2659 if( mbRowOn )
2661 moRowItemSet.reset();
2662 ++maCurrCell.mnRow;
2663 mbRowOn = mbDataOn = false;
2667 void ScHTMLTable::ImplDataOn( const ScHTMLSize& rSpanSize )
2669 if( mbDataOn )
2670 ImplDataOff();
2671 if( !mbRowOn )
2672 ImplRowOn();
2673 moDataItemSet.emplace( *moRowItemSet );
2674 InsertNewCell( rSpanSize );
2675 mbDataOn = true;
2676 mbPushEmptyLine = false;
2679 void ScHTMLTable::ImplDataOff()
2681 if( mbDataOn )
2683 moDataItemSet.reset();
2684 ++maCurrCell.mnCol;
2685 mpCurrEntryVector = nullptr;
2686 mbDataOn = false;
2690 void ScHTMLTable::ProcessFormatOptions( SfxItemSet& rItemSet, const HtmlImportInfo& rInfo )
2692 // special handling for table header cells
2693 if( rInfo.nToken == HtmlTokenId::TABLEHEADER_ON )
2695 rItemSet.Put( SvxWeightItem( WEIGHT_BOLD, ATTR_FONT_WEIGHT ) );
2696 rItemSet.Put( SvxHorJustifyItem( SvxCellHorJustify::Center, ATTR_HOR_JUSTIFY ) );
2699 const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
2700 for (const auto& rOption : rOptions)
2702 switch( rOption.GetToken() )
2704 case HtmlOptionId::ALIGN:
2706 SvxCellHorJustify eVal = SvxCellHorJustify::Standard;
2707 const OUString& rOptVal = rOption.GetString();
2708 if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
2709 eVal = SvxCellHorJustify::Right;
2710 else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_center ) )
2711 eVal = SvxCellHorJustify::Center;
2712 else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
2713 eVal = SvxCellHorJustify::Left;
2714 if( eVal != SvxCellHorJustify::Standard )
2715 rItemSet.Put( SvxHorJustifyItem( eVal, ATTR_HOR_JUSTIFY ) );
2717 break;
2719 case HtmlOptionId::VALIGN:
2721 SvxCellVerJustify eVal = SvxCellVerJustify::Standard;
2722 const OUString& rOptVal = rOption.GetString();
2723 if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_top ) )
2724 eVal = SvxCellVerJustify::Top;
2725 else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_middle ) )
2726 eVal = SvxCellVerJustify::Center;
2727 else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_bottom ) )
2728 eVal = SvxCellVerJustify::Bottom;
2729 if( eVal != SvxCellVerJustify::Standard )
2730 rItemSet.Put( SvxVerJustifyItem( eVal, ATTR_VER_JUSTIFY ) );
2732 break;
2734 case HtmlOptionId::BGCOLOR:
2736 Color aColor;
2737 rOption.GetColor( aColor );
2738 rItemSet.Put( SvxBrushItem( aColor, ATTR_BACKGROUND ) );
2740 break;
2741 default: break;
2746 void ScHTMLTable::SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize )
2748 OSL_ENSURE( nCellPos >= 0, "ScHTMLTable::SetDocSize - unexpected negative position" );
2749 ScSizeVec& rSizes = maCumSizes[ eOrient ];
2750 size_t nIndex = static_cast< size_t >( nCellPos );
2751 // expand with height/width == 1
2752 while( nIndex >= rSizes.size() )
2753 rSizes.push_back( rSizes.empty() ? 1 : (rSizes.back() + 1) );
2754 // update size of passed position and all following
2755 // #i109987# only grow, don't shrink - use the largest needed size
2756 SCCOLROW nDiff = nSize - ((nIndex == 0) ? rSizes.front() : (rSizes[ nIndex ] - rSizes[ nIndex - 1 ]));
2757 if( nDiff > 0 )
2758 std::for_each(rSizes.begin() + nIndex, rSizes.end(), [&nDiff](SCCOLROW& rSize) { rSize += nDiff; });
2761 void ScHTMLTable::CalcNeededDocSize(
2762 ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nCellSpan, SCCOLROW nRealDocSize )
2764 SCCOLROW nDiffSize = 0;
2765 // in merged columns/rows: reduce needed size by size of leading columns
2766 while( nCellSpan > 1 )
2768 nDiffSize += GetDocSize( eOrient, nCellPos );
2769 --nCellSpan;
2770 ++nCellPos;
2772 // set remaining needed size to last column/row
2773 nRealDocSize -= std::min< SCCOLROW >( nRealDocSize - 1, nDiffSize );
2774 SetDocSize( eOrient, nCellPos, nRealDocSize );
2777 void ScHTMLTable::FillEmptyCells()
2779 for( ScHTMLTableIterator aIter( mxNestedTables.get() ); aIter.is(); ++aIter )
2780 aIter->FillEmptyCells();
2782 // insert the final vertically merged ranges into maUsedCells
2783 for ( size_t i = 0, nRanges = maVMergedCells.size(); i < nRanges; ++i )
2785 ScRange & rRange = maVMergedCells[ i ];
2786 maUsedCells.Join( rRange );
2789 for( ScAddress aAddr; aAddr.Row() < maSize.mnRows; aAddr.IncRow() )
2791 for( aAddr.SetCol( 0 ); aAddr.Col() < maSize.mnCols; aAddr.IncCol() )
2793 if( !maUsedCells.Find( aAddr ) )
2795 // create a range for the lock list (used to calc. cell span)
2796 ScRange aRange( aAddr );
2799 aRange.aEnd.IncCol();
2801 while( (aRange.aEnd.Col() < maSize.mnCols) && !maUsedCells.Find( aRange.aEnd ) );
2802 aRange.aEnd.IncCol( -1 );
2803 maUsedCells.Join( aRange );
2805 // insert a dummy entry
2806 ScHTMLEntryPtr xEntry = CreateEntry();
2807 ImplPushEntryToVector( maEntryMap[ ScHTMLPos( aAddr ) ], xEntry );
2813 void ScHTMLTable::RecalcDocSize()
2815 // recalc table sizes recursively from inner to outer
2816 for( ScHTMLTableIterator aIter( mxNestedTables.get() ); aIter.is(); ++aIter )
2817 aIter->RecalcDocSize();
2819 /* Two passes: first calculates the sizes of single columns/rows, then
2820 the sizes of spanned columns/rows. This allows to fill nested tables
2821 into merged cells optimally. */
2822 static const sal_uInt16 PASS_SINGLE = 0;
2823 static const sal_uInt16 PASS_SPANNED = 1;
2824 for( sal_uInt16 nPass = PASS_SINGLE; nPass <= PASS_SPANNED; ++nPass )
2826 // iterate through every table cell
2827 for( const auto& [rCellPos, rEntryVector] : maEntryMap )
2829 ScHTMLSize aCellSpan = GetSpan( rCellPos );
2831 // process the dimension of the current cell in this pass?
2832 // (pass is single and span is 1) or (pass is not single and span is not 1)
2833 bool bProcessColWidth = ((nPass == PASS_SINGLE) == (aCellSpan.mnCols == 1));
2834 bool bProcessRowHeight = ((nPass == PASS_SINGLE) == (aCellSpan.mnRows == 1));
2835 if( bProcessColWidth || bProcessRowHeight )
2837 ScHTMLSize aDocSize( 1, 0 ); // resulting size of the cell in document
2839 // expand the cell size for each cell parse entry
2840 for( const auto& rpEntry : rEntryVector )
2842 ScHTMLTable* pTable = GetExistingTable( rpEntry->GetTableId() );
2843 // find entry with maximum width
2844 if( bProcessColWidth && pTable )
2845 aDocSize.mnCols = std::max( aDocSize.mnCols, static_cast< SCCOL >( pTable->GetDocSize( tdCol ) ) );
2846 // add up height of each entry
2847 if( bProcessRowHeight )
2848 aDocSize.mnRows += pTable ? pTable->GetDocSize( tdRow ) : 1;
2850 if( !aDocSize.mnRows )
2851 aDocSize.mnRows = 1;
2853 if( bProcessColWidth )
2854 CalcNeededDocSize( tdCol, rCellPos.mnCol, aCellSpan.mnCols, aDocSize.mnCols );
2855 if( bProcessRowHeight )
2856 CalcNeededDocSize( tdRow, rCellPos.mnRow, aCellSpan.mnRows, aDocSize.mnRows );
2862 void ScHTMLTable::RecalcDocPos( const ScHTMLPos& rBasePos )
2864 maDocBasePos = rBasePos;
2865 // after the previous assignment it is allowed to call GetDocPos() methods
2867 // iterate through every table cell
2868 for( auto& [rCellPos, rEntryVector] : maEntryMap )
2870 // fixed doc position of the entire cell (first entry)
2871 const ScHTMLPos aCellDocPos( GetDocPos( rCellPos ) );
2872 // fixed doc size of the entire cell
2873 const ScHTMLSize aCellDocSize( GetDocSize( rCellPos ) );
2875 // running doc position for single entries
2876 ScHTMLPos aEntryDocPos( aCellDocPos );
2878 ScHTMLEntry* pEntry = nullptr;
2879 for( const auto& rpEntry : rEntryVector )
2881 pEntry = rpEntry;
2882 if( ScHTMLTable* pTable = GetExistingTable( pEntry->GetTableId() ) )
2884 pTable->RecalcDocPos( aEntryDocPos ); // recalc nested table
2885 pEntry->nCol = SCCOL_MAX;
2886 pEntry->nRow = SCROW_MAX;
2887 SCROW nTableRows = static_cast< SCROW >( pTable->GetDocSize( tdRow ) );
2889 // use this entry to pad empty space right of table
2890 if( mpParentTable ) // ... but not in global table
2892 SCCOL nStartCol = aEntryDocPos.mnCol + static_cast< SCCOL >( pTable->GetDocSize( tdCol ) );
2893 SCCOL nNextCol = aEntryDocPos.mnCol + aCellDocSize.mnCols;
2894 if( nStartCol < nNextCol )
2896 pEntry->nCol = nStartCol;
2897 pEntry->nRow = aEntryDocPos.mnRow;
2898 pEntry->nColOverlap = nNextCol - nStartCol;
2899 pEntry->nRowOverlap = nTableRows;
2902 aEntryDocPos.mnRow += nTableRows;
2904 else
2906 pEntry->nCol = aEntryDocPos.mnCol;
2907 pEntry->nRow = aEntryDocPos.mnRow;
2908 if( mpParentTable ) // do not merge in global table
2909 pEntry->nColOverlap = aCellDocSize.mnCols;
2910 ++aEntryDocPos.mnRow;
2914 // pEntry points now to last entry.
2915 if( pEntry )
2917 if( (pEntry == rEntryVector.front()) && (pEntry->GetTableId() == SC_HTML_NO_TABLE) )
2919 // pEntry is the only entry in this cell - merge rows of cell with single non-table entry.
2920 pEntry->nRowOverlap = aCellDocSize.mnRows;
2922 else
2924 // fill up incomplete entry lists
2925 SCROW nFirstUnusedRow = aCellDocPos.mnRow + aCellDocSize.mnRows;
2926 while( aEntryDocPos.mnRow < nFirstUnusedRow )
2928 ScHTMLEntryPtr xDummyEntry( new ScHTMLEntry( pEntry->GetItemSet() ) );
2929 xDummyEntry->nCol = aEntryDocPos.mnCol;
2930 xDummyEntry->nRow = aEntryDocPos.mnRow;
2931 xDummyEntry->nColOverlap = aCellDocSize.mnCols;
2932 ImplPushEntryToVector( rEntryVector, xDummyEntry );
2933 ++aEntryDocPos.mnRow;
2940 ScHTMLGlobalTable::ScHTMLGlobalTable(
2941 SfxItemPool& rPool,
2942 EditEngine& rEditEngine,
2943 std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseVector,
2944 ScHTMLTableId& rnUnusedId,
2945 ScHTMLParser* pParser,
2946 const ScDocument& rDoc
2948 ScHTMLTable( rPool, rEditEngine, rEEParseVector, rnUnusedId, pParser, rDoc )
2952 ScHTMLGlobalTable::~ScHTMLGlobalTable()
2956 void ScHTMLGlobalTable::Recalc()
2958 // Fills up empty cells with a dummy entry. */
2959 FillEmptyCells();
2960 // recalc table sizes of all nested tables and this table
2961 RecalcDocSize();
2962 // recalc document positions of all entries in this table and in nested tables
2963 RecalcDocPos( GetDocPos() );
2966 ScHTMLQueryParser::ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc ) :
2967 ScHTMLParser( pEditEngine, pDoc ),
2968 mnUnusedId( SC_HTML_GLOBAL_TABLE ),
2969 mbTitleOn( false )
2971 mxGlobTable.reset(
2972 new ScHTMLGlobalTable(*pPool, *pEdit, maList, mnUnusedId, this, *pDoc));
2973 mpCurrTable = mxGlobTable.get();
2976 ScHTMLQueryParser::~ScHTMLQueryParser()
2980 ErrCode ScHTMLQueryParser::Read( SvStream& rStrm, const OUString& rBaseURL )
2982 SvKeyValueIteratorRef xValues;
2983 SvKeyValueIterator* pAttributes = nullptr;
2985 ScDocShell* pObjSh = mpDoc->GetDocumentShell();
2986 if( pObjSh && pObjSh->IsLoading() )
2988 pAttributes = pObjSh->GetHeaderAttributes();
2990 else
2992 /* When not loading, set up fake HTTP headers to force the SfxHTMLParser
2993 to use UTF8 (used when pasting from clipboard) */
2994 const char* pCharSet = rtl_getBestMimeCharsetFromTextEncoding( RTL_TEXTENCODING_UTF8 );
2995 if( pCharSet )
2997 OUString aContentType = "text/html; charset=" +
2998 OUString::createFromAscii( pCharSet );
3000 xValues = new SvKeyValueIterator;
3001 xValues->Append( SvKeyValue( u"" OOO_STRING_SVTOOLS_HTML_META_content_type ""_ustr, aContentType ) );
3002 pAttributes = xValues.get();
3006 Link<HtmlImportInfo&,void> aOldLink = pEdit->GetHtmlImportHdl();
3007 pEdit->SetHtmlImportHdl( LINK( this, ScHTMLQueryParser, HTMLImportHdl ) );
3008 ErrCode nErr = pEdit->Read( rStrm, rBaseURL, EETextFormat::Html, pAttributes );
3009 pEdit->SetHtmlImportHdl( aOldLink );
3011 mxGlobTable->Recalc();
3012 nColMax = static_cast< SCCOL >( mxGlobTable->GetDocSize( tdCol ) - 1 );
3013 nRowMax = static_cast< SCROW >( mxGlobTable->GetDocSize( tdRow ) - 1 );
3015 return nErr;
3018 const ScHTMLTable* ScHTMLQueryParser::GetGlobalTable() const
3020 return mxGlobTable.get();
3023 void ScHTMLQueryParser::ProcessToken( const HtmlImportInfo& rInfo )
3025 switch( rInfo.nToken )
3027 // --- meta data ---
3028 case HtmlTokenId::META: MetaOn( rInfo ); break; // <meta>
3030 // --- title handling ---
3031 case HtmlTokenId::TITLE_ON: TitleOn(); break; // <title>
3032 case HtmlTokenId::TITLE_OFF: TitleOff( rInfo ); break; // </title>
3034 case HtmlTokenId::STYLE_ON: break;
3035 case HtmlTokenId::STYLE_OFF: ParseStyle(rInfo.aText); break;
3037 // --- body handling ---
3038 case HtmlTokenId::BODY_ON: mpCurrTable->BodyOn( rInfo ); break; // <body>
3039 case HtmlTokenId::BODY_OFF: mpCurrTable->BodyOff( rInfo ); break; // </body>
3041 // --- insert text ---
3042 case HtmlTokenId::TEXTTOKEN: InsertText( rInfo ); break; // any text
3043 case HtmlTokenId::LINEBREAK: mpCurrTable->BreakOn(); break; // <br>
3044 case HtmlTokenId::HEAD1_ON: // <h1>
3045 case HtmlTokenId::HEAD2_ON: // <h2>
3046 case HtmlTokenId::HEAD3_ON: // <h3>
3047 case HtmlTokenId::HEAD4_ON: // <h4>
3048 case HtmlTokenId::HEAD5_ON: // <h5>
3049 case HtmlTokenId::HEAD6_ON: // <h6>
3050 case HtmlTokenId::PARABREAK_ON: mpCurrTable->HeadingOn(); break; // <p>
3052 // --- misc. contents ---
3053 case HtmlTokenId::ANCHOR_ON: mpCurrTable->AnchorOn(); break; // <a>
3055 // --- table handling ---
3056 case HtmlTokenId::TABLE_ON: TableOn( rInfo ); break; // <table>
3057 case HtmlTokenId::TABLE_OFF: TableOff( rInfo ); break; // </table>
3058 case HtmlTokenId::CAPTION_ON: mpCurrTable->CaptionOn(); break; // <caption>
3059 case HtmlTokenId::CAPTION_OFF: mpCurrTable->CaptionOff(); break; // </caption>
3060 case HtmlTokenId::TABLEROW_ON: mpCurrTable->RowOn( rInfo ); break; // <tr>
3061 case HtmlTokenId::TABLEROW_OFF: mpCurrTable->RowOff( rInfo ); break; // </tr>
3062 case HtmlTokenId::TABLEHEADER_ON: // <th>
3063 case HtmlTokenId::TABLEDATA_ON: mpCurrTable->DataOn( rInfo ); break; // <td>
3064 case HtmlTokenId::TABLEHEADER_OFF: // </th>
3065 case HtmlTokenId::TABLEDATA_OFF: mpCurrTable->DataOff( rInfo ); break; // </td>
3066 case HtmlTokenId::PREFORMTXT_ON: PreOn( rInfo ); break; // <pre>
3067 case HtmlTokenId::PREFORMTXT_OFF: PreOff( rInfo ); break; // </pre>
3069 // --- formatting ---
3070 case HtmlTokenId::FONT_ON: FontOn( rInfo ); break; // <font>
3072 case HtmlTokenId::BIGPRINT_ON: // <big>
3073 //! TODO: store current font size, use following size
3074 mpCurrTable->PutItem( SvxFontHeightItem( maFontHeights[ 3 ], 100, ATTR_FONT_HEIGHT ) );
3075 break;
3076 case HtmlTokenId::SMALLPRINT_ON: // <small>
3077 //! TODO: store current font size, use preceding size
3078 mpCurrTable->PutItem( SvxFontHeightItem( maFontHeights[ 0 ], 100, ATTR_FONT_HEIGHT ) );
3079 break;
3081 case HtmlTokenId::BOLD_ON: // <b>
3082 case HtmlTokenId::STRONG_ON: // <strong>
3083 mpCurrTable->PutItem( SvxWeightItem( WEIGHT_BOLD, ATTR_FONT_WEIGHT ) );
3084 break;
3086 case HtmlTokenId::ITALIC_ON: // <i>
3087 case HtmlTokenId::EMPHASIS_ON: // <em>
3088 case HtmlTokenId::ADDRESS_ON: // <address>
3089 case HtmlTokenId::BLOCKQUOTE_ON: // <blockquote>
3090 case HtmlTokenId::BLOCKQUOTE30_ON: // <bq>
3091 case HtmlTokenId::CITATION_ON: // <cite>
3092 case HtmlTokenId::VARIABLE_ON: // <var>
3093 mpCurrTable->PutItem( SvxPostureItem( ITALIC_NORMAL, ATTR_FONT_POSTURE ) );
3094 break;
3096 case HtmlTokenId::DEFINSTANCE_ON: // <dfn>
3097 mpCurrTable->PutItem( SvxWeightItem( WEIGHT_BOLD, ATTR_FONT_WEIGHT ) );
3098 mpCurrTable->PutItem( SvxPostureItem( ITALIC_NORMAL, ATTR_FONT_POSTURE ) );
3099 break;
3101 case HtmlTokenId::UNDERLINE_ON: // <u>
3102 mpCurrTable->PutItem( SvxUnderlineItem( LINESTYLE_SINGLE, ATTR_FONT_UNDERLINE ) );
3103 break;
3104 default: break;
3108 void ScHTMLQueryParser::InsertText( const HtmlImportInfo& rInfo )
3110 mpCurrTable->PutText( rInfo );
3111 if( mbTitleOn )
3112 maTitle.append(rInfo.aText);
3115 void ScHTMLQueryParser::FontOn( const HtmlImportInfo& rInfo )
3117 const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
3118 for (const auto& rOption : rOptions)
3120 switch( rOption.GetToken() )
3122 case HtmlOptionId::FACE :
3124 const OUString& rFace = rOption.GetString();
3125 OUString aFontName;
3126 sal_Int32 nPos = 0;
3127 while( nPos != -1 )
3129 // font list separator: VCL = ';' HTML = ','
3130 std::u16string_view aFName = comphelper::string::strip(o3tl::getToken(rFace, 0, ',', nPos), ' ');
3131 aFontName = ScGlobal::addToken(aFontName, aFName, ';');
3133 if ( !aFontName.isEmpty() )
3134 mpCurrTable->PutItem( SvxFontItem( FAMILY_DONTKNOW,
3135 aFontName, OUString(), PITCH_DONTKNOW,
3136 RTL_TEXTENCODING_DONTKNOW, ATTR_FONT ) );
3138 break;
3139 case HtmlOptionId::SIZE :
3141 sal_uInt32 nSize = getLimitedValue< sal_uInt32 >( rOption.GetNumber(), 1, SC_HTML_FONTSIZES );
3142 mpCurrTable->PutItem( SvxFontHeightItem( maFontHeights[ nSize - 1 ], 100, ATTR_FONT_HEIGHT ) );
3144 break;
3145 case HtmlOptionId::COLOR :
3147 Color aColor;
3148 rOption.GetColor( aColor );
3149 mpCurrTable->PutItem( SvxColorItem( aColor, ATTR_FONT_COLOR ) );
3151 break;
3152 default: break;
3157 void ScHTMLQueryParser::MetaOn( const HtmlImportInfo& rInfo )
3159 if( mpDoc->GetDocumentShell() )
3161 HTMLParser* pParser = static_cast< HTMLParser* >( rInfo.pParser );
3163 uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
3164 static_cast<cppu::OWeakObject*>(mpDoc->GetDocumentShell()->GetModel()), uno::UNO_QUERY_THROW);
3165 pParser->ParseMetaOptions(
3166 xDPS->getDocumentProperties(),
3167 mpDoc->GetDocumentShell()->GetHeaderAttributes() );
3171 void ScHTMLQueryParser::TitleOn()
3173 mbTitleOn = true;
3174 maTitle.setLength(0);
3177 void ScHTMLQueryParser::TitleOff( const HtmlImportInfo& rInfo )
3179 if( !mbTitleOn )
3180 return;
3182 OUString aTitle = maTitle.makeStringAndClear().trim();
3183 if (!aTitle.isEmpty() && mpDoc->GetDocumentShell())
3185 uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
3186 static_cast<cppu::OWeakObject*>(mpDoc->GetDocumentShell()->GetModel()), uno::UNO_QUERY_THROW);
3188 xDPS->getDocumentProperties()->setTitle(aTitle);
3190 InsertText( rInfo );
3191 mbTitleOn = false;
3194 void ScHTMLQueryParser::TableOn( const HtmlImportInfo& rInfo )
3196 mpCurrTable = mpCurrTable->TableOn( rInfo );
3199 void ScHTMLQueryParser::TableOff( const HtmlImportInfo& rInfo )
3201 mpCurrTable = mpCurrTable->TableOff( rInfo );
3204 void ScHTMLQueryParser::PreOn( const HtmlImportInfo& rInfo )
3206 mpCurrTable = mpCurrTable->PreOn( rInfo );
3209 void ScHTMLQueryParser::PreOff( const HtmlImportInfo& rInfo )
3211 mpCurrTable = mpCurrTable->PreOff( rInfo );
3214 void ScHTMLQueryParser::CloseTable( const HtmlImportInfo& rInfo )
3216 mpCurrTable = mpCurrTable->CloseTable( rInfo );
3219 namespace {
3222 * Handler class for the CSS parser.
3224 class CSSHandler: public orcus::css_handler
3226 typedef std::pair<std::string_view, std::string_view> SelectorName; // element : class
3227 typedef std::vector<SelectorName> SelectorNames;
3229 SelectorNames maSelectorNames; // current selector names
3230 std::string_view maPropName; // current property name.
3231 std::string_view maPropValue; // current property value.
3232 ScHTMLStyles& mrStyles;
3234 public:
3235 explicit CSSHandler(ScHTMLStyles& rStyles):
3236 mrStyles(rStyles)
3239 // selector name not starting with "." or "#" (i.e. element selectors)
3240 void simple_selector_type(std::string_view aElem)
3242 std::string_view aClass{}; // class name not given - to be added in the "element global" storage
3243 SelectorName aName(aElem, aClass);
3245 maSelectorNames.push_back(aName);
3248 // selector names starting with a "." (i.e. class selector)
3249 void simple_selector_class(std::string_view aClass)
3251 std::string_view aElem{}; // no element given - should be added in the "global" storage
3252 SelectorName aName(aElem, aClass);
3254 maSelectorNames.push_back(aName);
3257 // TODO: Add other selectors
3259 void property_name(std::string_view aPropName)
3261 maPropName = aPropName;
3264 void value(std::string_view aValue)
3266 maPropValue = aValue;
3269 void end_block()
3271 maSelectorNames.clear();
3274 void end_property()
3276 for (const auto& rSelName : maSelectorNames)
3278 // Add this property to the collection for each selector.
3279 std::string_view aElem = rSelName.first;
3280 std::string_view aClass = rSelName.second;
3281 OUString aName(maPropName.data(), maPropName.size(), RTL_TEXTENCODING_UTF8);
3282 OUString aValue(maPropValue.data(), maPropValue.size(), RTL_TEXTENCODING_UTF8);
3283 mrStyles.add(aElem.data(), aElem.size(), aClass.data(), aClass.size(), aName, aValue);
3285 maPropName = std::string_view{};
3286 maPropValue = std::string_view{};
3293 void ScHTMLQueryParser::ParseStyle(std::u16string_view rStrm)
3295 OString aStr = OUStringToOString(rStrm, RTL_TEXTENCODING_UTF8);
3296 CSSHandler aHdl(GetStyles());
3297 orcus::css_parser<CSSHandler> aParser(aStr, aHdl);
3300 aParser.parse();
3302 catch (const orcus::parse_error& rOrcusParseError)
3304 SAL_WARN("sc", "ScHTMLQueryParser::ParseStyle: " << rOrcusParseError.what());
3305 // TODO: Parsing of CSS failed. Do nothing for now.
3309 IMPL_LINK( ScHTMLQueryParser, HTMLImportHdl, HtmlImportInfo&, rInfo, void )
3311 switch( rInfo.eState )
3313 case HtmlImportState::Start:
3314 break;
3316 case HtmlImportState::NextToken:
3317 ProcessToken( rInfo );
3318 break;
3320 case HtmlImportState::InsertPara:
3321 mpCurrTable->InsertPara( rInfo );
3322 break;
3324 case HtmlImportState::SetAttr:
3325 case HtmlImportState::InsertText:
3326 case HtmlImportState::InsertField:
3327 break;
3329 case HtmlImportState::End:
3330 while( mpCurrTable->GetTableId() != SC_HTML_GLOBAL_TABLE )
3331 CloseTable( rInfo );
3332 break;
3334 default:
3335 OSL_FAIL( "ScHTMLQueryParser::HTMLImportHdl - unknown ImportInfo::eState" );
3339 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */