Version 6.4.0.3, tag libreoffice-6.4.0.3
[LibreOffice.git] / sc / source / filter / html / htmlpars.cxx
blobae46949e4b21d2c1de8bff21e089fd3864718ab9
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <memory>
21 #include <sal/config.h>
23 #include <comphelper/string.hxx>
25 #include <scitems.hxx>
27 #include <svtools/htmlcfg.hxx>
28 #include <editeng/colritem.hxx>
29 #include <editeng/brushitem.hxx>
30 #include <editeng/editeng.hxx>
31 #include <editeng/fhgtitem.hxx>
32 #include <editeng/fontitem.hxx>
33 #include <editeng/postitem.hxx>
34 #include <editeng/udlnitem.hxx>
35 #include <editeng/wghtitem.hxx>
36 #include <editeng/borderline.hxx>
37 #include <editeng/boxitem.hxx>
38 #include <editeng/justifyitem.hxx>
39 #include <sfx2/objsh.hxx>
40 #include <svl/eitem.hxx>
41 #include <svl/intitem.hxx>
42 #include <vcl/graphicfilter.hxx>
43 #include <svtools/parhtml.hxx>
44 #include <svtools/htmlkywd.hxx>
45 #include <svtools/htmltokn.h>
47 #include <vcl/outdev.hxx>
48 #include <vcl/svapp.hxx>
49 #include <tools/urlobj.hxx>
50 #include <osl/diagnose.h>
52 #include <rtl/tencinfo.h>
54 #include <htmlpars.hxx>
55 #include <global.hxx>
56 #include <document.hxx>
57 #include <rangelst.hxx>
59 #include <orcus/css_parser.hpp>
61 #include <com/sun/star/document/XDocumentProperties.hpp>
62 #include <com/sun/star/document/XDocumentPropertiesSupplier.hpp>
63 #include <utility>
65 using ::editeng::SvxBorderLine;
66 using namespace ::com::sun::star;
68 ScHTMLStyles::ScHTMLStyles() : maEmpty() {}
70 void ScHTMLStyles::add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
71 const OUString& aProp, const OUString& aValue)
73 if (pElemName)
75 OUString aElem(pElemName, nElemName, RTL_TEXTENCODING_UTF8);
76 aElem = aElem.toAsciiLowerCase();
77 if (pClassName)
79 // Both element and class names given.
80 ElemsType::iterator itrElem = m_ElemProps.find(aElem);
81 if (itrElem == m_ElemProps.end())
83 // new element
84 std::pair<ElemsType::iterator, bool> r =
85 m_ElemProps.insert(std::make_pair(aElem, std::make_unique<NamePropsType>()));
86 if (!r.second)
87 // insertion failed.
88 return;
89 itrElem = r.first;
92 NamePropsType *const pClsProps = itrElem->second.get();
93 OUString aClass(pClassName, nClassName, RTL_TEXTENCODING_UTF8);
94 aClass = aClass.toAsciiLowerCase();
95 insertProp(*pClsProps, aClass, aProp, aValue);
97 else
99 // Element name only. Add it to the element global.
100 insertProp(m_ElemGlobalProps, aElem, aProp, aValue);
103 else
105 if (pClassName)
107 // Class name only. Add it to the global.
108 OUString aClass(pClassName, nClassName, RTL_TEXTENCODING_UTF8);
109 aClass = aClass.toAsciiLowerCase();
110 insertProp(m_GlobalProps, aClass, aProp, aValue);
115 const OUString& ScHTMLStyles::getPropertyValue(
116 const OUString& rElem, const OUString& rClass, const OUString& rPropName) const
118 // First, look into the element-class storage.
120 auto const itr = m_ElemProps.find(rElem);
121 if (itr != m_ElemProps.end())
123 const NamePropsType *const pClasses = itr->second.get();
124 NamePropsType::const_iterator itr2 = pClasses->find(rClass);
125 if (itr2 != pClasses->end())
127 const PropsType *const pProps = itr2->second.get();
128 PropsType::const_iterator itr3 = pProps->find(rPropName);
129 if (itr3 != pProps->end())
130 return itr3->second;
134 // Next, look into the class global storage.
136 auto const itr = m_GlobalProps.find(rClass);
137 if (itr != m_GlobalProps.end())
139 const PropsType *const pProps = itr->second.get();
140 PropsType::const_iterator itr2 = pProps->find(rPropName);
141 if (itr2 != pProps->end())
142 return itr2->second;
145 // As the last resort, look into the element global storage.
147 auto const itr = m_ElemGlobalProps.find(rClass);
148 if (itr != m_ElemGlobalProps.end())
150 const PropsType *const pProps = itr->second.get();
151 PropsType::const_iterator itr2 = pProps->find(rPropName);
152 if (itr2 != pProps->end())
153 return itr2->second;
157 return maEmpty; // nothing found.
160 void ScHTMLStyles::insertProp(
161 NamePropsType& rStore, const OUString& aName,
162 const OUString& aProp, const OUString& aValue)
164 NamePropsType::iterator itr = rStore.find(aName);
165 if (itr == rStore.end())
167 // new element
168 std::pair<NamePropsType::iterator, bool> r =
169 rStore.insert(std::make_pair(aName, std::make_unique<PropsType>()));
170 if (!r.second)
171 // insertion failed.
172 return;
174 itr = r.first;
177 PropsType *const pProps = itr->second.get();
178 pProps->emplace(aProp, aValue);
181 // BASE class for HTML parser classes
183 ScHTMLParser::ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc ) :
184 ScEEParser( pEditEngine ),
185 mpDoc( pDoc )
187 SvxHtmlOptions& rHtmlOptions = SvxHtmlOptions::Get();
188 for( sal_uInt16 nIndex = 0; nIndex < SC_HTML_FONTSIZES; ++nIndex )
189 maFontHeights[ nIndex ] = rHtmlOptions.GetFontSize( nIndex ) * 20;
192 ScHTMLParser::~ScHTMLParser()
196 ScHTMLLayoutParser::ScHTMLLayoutParser(
197 EditEngine* pEditP, const OUString& rBaseURL, const Size& aPageSizeP,
198 ScDocument* pDocP ) :
199 ScHTMLParser( pEditP, pDocP ),
200 aPageSize( aPageSizeP ),
201 aBaseURL( rBaseURL ),
202 xLockedList( new ScRangeList ),
203 pLocalColOffset( new ScHTMLColOffset ),
204 nFirstTableCell(0),
205 nTableLevel(0),
206 nTable(0),
207 nMaxTable(0),
208 nColCntStart(0),
209 nMaxCol(0),
210 nTableWidth(0),
211 nColOffset(0),
212 nColOffsetStart(0),
213 nOffsetTolerance( SC_HTML_OFFSET_TOLERANCE_SMALL ),
214 bFirstRow( true ),
215 bTabInTabCell( false ),
216 bInCell( false ),
217 bInTitle( false )
219 MakeColNoRef( pLocalColOffset, 0, 0, 0, 0 );
220 MakeColNoRef( &maColOffset, 0, 0, 0, 0 );
223 ScHTMLLayoutParser::~ScHTMLLayoutParser()
225 while ( !aTableStack.empty() )
227 ScHTMLTableStackEntry * pS = aTableStack.top().get();
228 if ( pS->pLocalColOffset != pLocalColOffset )
229 delete pS->pLocalColOffset;
230 aTableStack.pop();
232 delete pLocalColOffset;
233 if ( pTables )
235 for( const auto& rEntry : *pTables)
236 delete rEntry.second;
237 pTables.reset();
241 ErrCode ScHTMLLayoutParser::Read( SvStream& rStream, const OUString& rBaseURL )
243 Link<HtmlImportInfo&,void> aOldLink = pEdit->GetHtmlImportHdl();
244 pEdit->SetHtmlImportHdl( LINK( this, ScHTMLLayoutParser, HTMLImportHdl ) );
246 SfxObjectShell* pObjSh = mpDoc->GetDocumentShell();
247 bool bLoading = pObjSh && pObjSh->IsLoading();
249 SvKeyValueIteratorRef xValues;
250 SvKeyValueIterator* pAttributes = nullptr;
251 if ( bLoading )
252 pAttributes = pObjSh->GetHeaderAttributes();
253 else
255 // When not loading, set up fake http headers to force the SfxHTMLParser to use UTF8
256 // (used when pasting from clipboard)
257 const sal_Char* pCharSet = rtl_getBestMimeCharsetFromTextEncoding( RTL_TEXTENCODING_UTF8 );
258 if( pCharSet )
260 OUString aContentType = "text/html; charset=" +
261 OUString::createFromAscii( pCharSet );
263 xValues = new SvKeyValueIterator;
264 xValues->Append( SvKeyValue( OOO_STRING_SVTOOLS_HTML_META_content_type, aContentType ) );
265 pAttributes = xValues.get();
269 ErrCode nErr = pEdit->Read( rStream, rBaseURL, EETextFormat::Html, pAttributes );
271 pEdit->SetHtmlImportHdl( aOldLink );
272 // Create column width
273 Adjust();
274 OutputDevice* pDefaultDev = Application::GetDefaultDevice();
275 sal_uInt16 nCount = maColOffset.size();
276 sal_uLong nOff = maColOffset[0];
277 Size aSize;
278 for ( sal_uInt16 j = 1; j < nCount; j++ )
280 aSize.setWidth( maColOffset[j] - nOff );
281 aSize = pDefaultDev->PixelToLogic( aSize, MapMode( MapUnit::MapTwip ) );
282 maColWidths[ j-1 ] = aSize.Width();
283 nOff = maColOffset[j];
285 return nErr;
288 const ScHTMLTable* ScHTMLLayoutParser::GetGlobalTable() const
290 return nullptr;
293 void ScHTMLLayoutParser::NewActEntry( const ScEEParseEntry* pE )
295 ScEEParser::NewActEntry( pE );
296 if ( pE )
298 if ( !pE->aSel.HasRange() )
299 { // Completely empty, following text ends up in the same paragraph!
300 mxActEntry->aSel.nStartPara = pE->aSel.nEndPara;
301 mxActEntry->aSel.nStartPos = pE->aSel.nEndPos;
304 mxActEntry->aSel.nEndPara = mxActEntry->aSel.nStartPara;
305 mxActEntry->aSel.nEndPos = mxActEntry->aSel.nStartPos;
308 void ScHTMLLayoutParser::EntryEnd( ScEEParseEntry* pE, const ESelection& rSel )
310 if ( rSel.nEndPara >= pE->aSel.nStartPara )
312 pE->aSel.nEndPara = rSel.nEndPara;
313 pE->aSel.nEndPos = rSel.nEndPos;
315 else if ( rSel.nStartPara == pE->aSel.nStartPara - 1 && !pE->aSel.HasRange() )
316 { // Did not attach a paragraph, but empty, do nothing
318 else
320 OSL_FAIL( "EntryEnd: EditEngine ESelection End < Start" );
324 void ScHTMLLayoutParser::NextRow( const HtmlImportInfo* pInfo )
326 if ( bInCell )
327 CloseEntry( pInfo );
328 if ( nRowMax < ++nRowCnt )
329 nRowMax = nRowCnt;
330 nColCnt = nColCntStart;
331 nColOffset = nColOffsetStart;
332 bFirstRow = false;
335 bool ScHTMLLayoutParser::SeekOffset( const ScHTMLColOffset* pOffset, sal_uInt16 nOffset,
336 SCCOL* pCol, sal_uInt16 nOffsetTol )
338 OSL_ENSURE( pOffset, "ScHTMLLayoutParser::SeekOffset - illegal call" );
339 ScHTMLColOffset::const_iterator it = pOffset->find( nOffset );
340 bool bFound = it != pOffset->end();
341 sal_uInt16 nPos = it - pOffset->begin();
342 *pCol = static_cast<SCCOL>(nPos);
343 if ( bFound )
344 return true;
345 sal_uInt16 nCount = pOffset->size();
346 if ( !nCount )
347 return false;
348 // nPos is the position of insertion, that's where the next higher one is (or isn't)
349 if ( nPos < nCount && (((*pOffset)[nPos] - nOffsetTol) <= nOffset) )
350 return true;
351 // Not smaller than everything else? Then compare with the next lower one
352 else if ( nPos && (((*pOffset)[nPos-1] + nOffsetTol) >= nOffset) )
354 (*pCol)--;
355 return true;
357 return false;
360 void ScHTMLLayoutParser::MakeCol( ScHTMLColOffset* pOffset, sal_uInt16& nOffset,
361 sal_uInt16& nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol )
363 OSL_ENSURE( pOffset, "ScHTMLLayoutParser::MakeCol - illegal call" );
364 SCCOL nPos;
365 if ( SeekOffset( pOffset, nOffset, &nPos, nOffsetTol ) )
366 nOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
367 else
368 pOffset->insert( nOffset );
369 if ( nWidth )
371 if ( SeekOffset( pOffset, nOffset + nWidth, &nPos, nWidthTol ) )
372 nWidth = static_cast<sal_uInt16>((*pOffset)[nPos]) - nOffset;
373 else
374 pOffset->insert( nOffset + nWidth );
378 void ScHTMLLayoutParser::MakeColNoRef( ScHTMLColOffset* pOffset, sal_uInt16 nOffset,
379 sal_uInt16 nWidth, sal_uInt16 nOffsetTol, sal_uInt16 nWidthTol )
381 OSL_ENSURE( pOffset, "ScHTMLLayoutParser::MakeColNoRef - illegal call" );
382 SCCOL nPos;
383 if ( SeekOffset( pOffset, nOffset, &nPos, nOffsetTol ) )
384 nOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
385 else
386 pOffset->insert( nOffset );
387 if ( nWidth )
389 if ( !SeekOffset( pOffset, nOffset + nWidth, &nPos, nWidthTol ) )
390 pOffset->insert( nOffset + nWidth );
394 void ScHTMLLayoutParser::ModifyOffset( ScHTMLColOffset* pOffset, sal_uInt16& nOldOffset,
395 sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol )
397 OSL_ENSURE( pOffset, "ScHTMLLayoutParser::ModifyOffset - illegal call" );
398 SCCOL nPos;
399 if ( !SeekOffset( pOffset, nOldOffset, &nPos, nOffsetTol ) )
401 if ( SeekOffset( pOffset, nNewOffset, &nPos, nOffsetTol ) )
402 nNewOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
403 else
404 pOffset->insert( nNewOffset );
405 return ;
407 nOldOffset = static_cast<sal_uInt16>((*pOffset)[nPos]);
408 SCCOL nPos2;
409 if ( SeekOffset( pOffset, nNewOffset, &nPos2, nOffsetTol ) )
411 nNewOffset = static_cast<sal_uInt16>((*pOffset)[nPos2]);
412 return ;
414 long nDiff = nNewOffset - nOldOffset;
415 if ( nDiff < 0 )
419 const_cast<sal_uLong&>((*pOffset)[nPos]) += nDiff;
420 } while ( nPos-- );
422 else
426 const_cast<sal_uLong&>((*pOffset)[nPos]) += nDiff;
427 } while ( ++nPos < static_cast<sal_uInt16>(pOffset->size()) );
431 void ScHTMLLayoutParser::SkipLocked( ScEEParseEntry* pE, bool bJoin )
433 if ( ValidCol(pE->nCol) )
434 { // Or else this would create a wrong value at ScAddress (chance for an infinite loop)!
435 bool bBadCol = false;
436 bool bAgain;
437 ScRange aRange( pE->nCol, pE->nRow, 0,
438 pE->nCol + pE->nColOverlap - 1, pE->nRow + pE->nRowOverlap - 1, 0 );
441 bAgain = false;
442 for ( size_t i = 0, nRanges = xLockedList->size(); i < nRanges; ++i )
444 ScRange & rR = (*xLockedList)[i];
445 if ( rR.Intersects( aRange ) )
447 pE->nCol = rR.aEnd.Col() + 1;
448 SCCOL nTmp = pE->nCol + pE->nColOverlap - 1;
449 if ( pE->nCol > mpDoc->MaxCol() || nTmp > mpDoc->MaxCol() )
450 bBadCol = true;
451 else
453 bAgain = true;
454 aRange.aStart.SetCol( pE->nCol );
455 aRange.aEnd.SetCol( nTmp );
457 break;
460 } while ( bAgain );
461 if ( bJoin && !bBadCol )
462 xLockedList->Join( aRange );
466 void ScHTMLLayoutParser::Adjust()
468 xLockedList->RemoveAll();
470 std::stack< std::unique_ptr<ScHTMLAdjustStackEntry> > aStack;
471 sal_uInt16 nTab = 0;
472 SCCOL nLastCol = SCCOL_MAX;
473 SCROW nNextRow = 0;
474 SCROW nCurRow = 0;
475 sal_uInt16 nPageWidth = static_cast<sal_uInt16>(aPageSize.Width());
476 InnerMap* pTab = nullptr;
477 for (auto& pE : maList)
479 if ( pE->nTab < nTab )
480 { // Table finished
481 if ( !aStack.empty() )
483 std::unique_ptr<ScHTMLAdjustStackEntry> pS = std::move(aStack.top());
484 aStack.pop();
486 nLastCol = pS->nLastCol;
487 nNextRow = pS->nNextRow;
488 nCurRow = pS->nCurRow;
490 nTab = pE->nTab;
491 if (pTables)
493 OuterMap::const_iterator it = pTables->find( nTab );
494 if ( it != pTables->end() )
495 pTab = it->second;
499 SCROW nRow = pE->nRow;
500 if ( pE->nCol <= nLastCol )
501 { // Next row
502 if ( pE->nRow < nNextRow )
503 pE->nRow = nCurRow = nNextRow;
504 else
505 nCurRow = nNextRow = pE->nRow;
506 SCROW nR = 0;
507 if ( pTab )
509 InnerMap::const_iterator it = pTab->find( nCurRow );
510 if ( it != pTab->end() )
511 nR = it->second;
513 if ( nR )
514 nNextRow += nR;
515 else
516 nNextRow++;
518 else
519 pE->nRow = nCurRow;
520 nLastCol = pE->nCol; // Read column
521 if ( pE->nTab > nTab )
522 { // New table
523 aStack.push( std::make_unique<ScHTMLAdjustStackEntry>(
524 nLastCol, nNextRow, nCurRow ) );
525 nTab = pE->nTab;
526 if ( pTables )
528 OuterMap::const_iterator it = pTables->find( nTab );
529 if ( it != pTables->end() )
530 pTab = it->second;
532 // New line spacing
533 SCROW nR = 0;
534 if ( pTab )
536 InnerMap::const_iterator it = pTab->find( nCurRow );
537 if ( it != pTab->end() )
538 nR = it->second;
540 if ( nR )
541 nNextRow = nCurRow + nR;
542 else
543 nNextRow = nCurRow + 1;
545 if ( nTab == 0 )
546 pE->nWidth = nPageWidth;
547 else
548 { // Real table, no paragraphs on the field
549 if ( pTab )
551 SCROW nRowSpan = pE->nRowOverlap;
552 for ( SCROW j=0; j < nRowSpan; j++ )
553 { // RowSpan resulting from merged rows
554 SCROW nRows = 0;
555 InnerMap::const_iterator it = pTab->find( nRow+j );
556 if ( it != pTab->end() )
557 nRows = it->second;
558 if ( nRows > 1 )
560 pE->nRowOverlap += nRows - 1;
561 if ( j == 0 )
562 { // Merged rows move the next row
563 SCROW nTmp = nCurRow + nRows;
564 if ( nNextRow < nTmp )
565 nNextRow = nTmp;
571 // Real column
572 (void)SeekOffset( &maColOffset, pE->nOffset, &pE->nCol, nOffsetTolerance );
573 SCCOL nColBeforeSkip = pE->nCol;
574 SkipLocked(pE.get(), false);
575 if ( pE->nCol != nColBeforeSkip )
577 SCCOL nCount = static_cast<SCCOL>(maColOffset.size());
578 if ( nCount <= pE->nCol )
580 pE->nOffset = static_cast<sal_uInt16>(maColOffset[nCount-1]);
581 MakeCol( &maColOffset, pE->nOffset, pE->nWidth, nOffsetTolerance, nOffsetTolerance );
583 else
585 pE->nOffset = static_cast<sal_uInt16>(maColOffset[pE->nCol]);
588 SCCOL nPos;
589 if ( pE->nWidth && SeekOffset( &maColOffset, pE->nOffset + pE->nWidth, &nPos, nOffsetTolerance ) )
590 pE->nColOverlap = (nPos > pE->nCol ? nPos - pE->nCol : 1);
591 else
593 //FIXME: This may not be correct, but works anyway ...
594 pE->nColOverlap = 1;
596 xLockedList->Join( ScRange( pE->nCol, pE->nRow, 0,
597 pE->nCol + pE->nColOverlap - 1, pE->nRow + pE->nRowOverlap - 1, 0 ) );
598 // Take over MaxDimensions
599 SCCOL nColTmp = pE->nCol + pE->nColOverlap;
600 if ( nColMax < nColTmp )
601 nColMax = nColTmp;
602 SCROW nRowTmp = pE->nRow + pE->nRowOverlap;
603 if ( nRowMax < nRowTmp )
604 nRowMax = nRowTmp;
608 sal_uInt16 ScHTMLLayoutParser::GetWidth( const ScEEParseEntry* pE )
610 if ( pE->nWidth )
611 return pE->nWidth;
612 sal_Int32 nTmp = std::min( static_cast<sal_Int32>( pE->nCol -
613 nColCntStart + pE->nColOverlap),
614 static_cast<sal_Int32>( pLocalColOffset->size() - 1));
615 SCCOL nPos = (nTmp < 0 ? 0 : static_cast<SCCOL>(nTmp));
616 sal_uInt16 nOff2 = static_cast<sal_uInt16>((*pLocalColOffset)[nPos]);
617 if ( pE->nOffset < nOff2 )
618 return nOff2 - pE->nOffset;
619 return 0;
622 void ScHTMLLayoutParser::SetWidths()
624 SCCOL nCol;
625 if ( !nTableWidth )
626 nTableWidth = static_cast<sal_uInt16>(aPageSize.Width());
627 SCCOL nColsPerRow = nMaxCol - nColCntStart;
628 if ( nColsPerRow <= 0 )
629 nColsPerRow = 1;
630 if ( pLocalColOffset->size() <= 2 )
631 { // Only PageSize, there was no width setting
632 sal_uInt16 nWidth = nTableWidth / static_cast<sal_uInt16>(nColsPerRow);
633 sal_uInt16 nOff = nColOffsetStart;
634 pLocalColOffset->clear();
635 for ( nCol = 0; nCol <= nColsPerRow; ++nCol, nOff = nOff + nWidth )
637 MakeColNoRef( pLocalColOffset, nOff, 0, 0, 0 );
639 nTableWidth = static_cast<sal_uInt16>(pLocalColOffset->back() - pLocalColOffset->front());
640 for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
642 auto& pE = maList[ i ];
643 if ( pE->nTab == nTable )
645 pE->nOffset = static_cast<sal_uInt16>((*pLocalColOffset)[pE->nCol - nColCntStart]);
646 pE->nWidth = 0; // to be recalculated later
650 else
651 { // Some without width
652 // Why actually no pE?
653 if ( nFirstTableCell < maList.size() )
655 std::unique_ptr<sal_uInt16[]> pOffsets(new sal_uInt16[ nColsPerRow+1 ]);
656 memset( pOffsets.get(), 0, (nColsPerRow+1) * sizeof(sal_uInt16) );
657 std::unique_ptr<sal_uInt16[]> pWidths(new sal_uInt16[ nColsPerRow ]);
658 memset( pWidths.get(), 0, nColsPerRow * sizeof(sal_uInt16) );
659 pOffsets[0] = nColOffsetStart;
660 for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
662 auto& pE = maList[ i ];
663 if ( pE->nTab == nTable && pE->nWidth )
665 nCol = pE->nCol - nColCntStart;
666 if ( nCol < nColsPerRow )
668 if ( pE->nColOverlap == 1 )
670 if ( pWidths[nCol] < pE->nWidth )
671 pWidths[nCol] = pE->nWidth;
673 else
674 { // try to find a single undefined width
675 sal_uInt16 nTotal = 0;
676 bool bFound = false;
677 SCCOL nHere = 0;
678 SCCOL nStop = std::min( static_cast<SCCOL>(nCol + pE->nColOverlap), nColsPerRow );
679 for ( ; nCol < nStop; nCol++ )
681 if ( pWidths[nCol] )
682 nTotal = nTotal + pWidths[nCol];
683 else
685 if ( bFound )
687 bFound = false;
688 break; // for
690 bFound = true;
691 nHere = nCol;
694 if ( bFound && pE->nWidth > nTotal )
695 pWidths[nHere] = pE->nWidth - nTotal;
700 sal_uInt16 nWidths = 0;
701 sal_uInt16 nUnknown = 0;
702 for ( nCol = 0; nCol < nColsPerRow; nCol++ )
704 if ( pWidths[nCol] )
705 nWidths = nWidths + pWidths[nCol];
706 else
707 nUnknown++;
709 if ( nUnknown )
711 sal_uInt16 nW = ((nWidths < nTableWidth) ?
712 ((nTableWidth - nWidths) / nUnknown) :
713 (nTableWidth / nUnknown));
714 for ( nCol = 0; nCol < nColsPerRow; nCol++ )
716 if ( !pWidths[nCol] )
717 pWidths[nCol] = nW;
720 for ( nCol = 1; nCol <= nColsPerRow; nCol++ )
722 pOffsets[nCol] = pOffsets[nCol-1] + pWidths[nCol-1];
724 pLocalColOffset->clear();
725 for ( nCol = 0; nCol <= nColsPerRow; nCol++ )
727 MakeColNoRef( pLocalColOffset, pOffsets[nCol], 0, 0, 0 );
729 nTableWidth = pOffsets[nColsPerRow] - pOffsets[0];
731 for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
733 auto& pE = maList[ i ];
734 if ( pE->nTab == nTable )
736 nCol = pE->nCol - nColCntStart;
737 OSL_ENSURE( nCol < nColsPerRow, "ScHTMLLayoutParser::SetWidths: column overflow" );
738 if ( nCol < nColsPerRow )
740 pE->nOffset = pOffsets[nCol];
741 nCol = nCol + pE->nColOverlap;
742 if ( nCol > nColsPerRow )
743 nCol = nColsPerRow;
744 pE->nWidth = pOffsets[nCol] - pE->nOffset;
750 if ( !pLocalColOffset->empty() )
752 sal_uInt16 nMax = static_cast<sal_uInt16>(pLocalColOffset->back());
753 if ( aPageSize.Width() < nMax )
754 aPageSize.setWidth( nMax );
756 for ( size_t i = nFirstTableCell, nListSize = maList.size(); i < nListSize; ++i )
758 auto& pE = maList[ i ];
759 if ( pE->nTab == nTable )
761 if ( !pE->nWidth )
763 pE->nWidth = GetWidth(pE.get());
764 OSL_ENSURE( pE->nWidth, "SetWidths: pE->nWidth == 0" );
766 MakeCol( &maColOffset, pE->nOffset, pE->nWidth, nOffsetTolerance, nOffsetTolerance );
771 void ScHTMLLayoutParser::Colonize( ScEEParseEntry* pE )
773 if ( pE->nCol == SCCOL_MAX )
774 pE->nCol = nColCnt;
775 if ( pE->nRow == SCROW_MAX )
776 pE->nRow = nRowCnt;
777 SCCOL nCol = pE->nCol;
778 SkipLocked( pE ); // Change of columns to the right
780 if ( nCol < pE->nCol )
781 { // Replaced
782 nCol = pE->nCol - nColCntStart;
783 SCCOL nCount = static_cast<SCCOL>(pLocalColOffset->size());
784 if ( nCol < nCount )
785 nColOffset = static_cast<sal_uInt16>((*pLocalColOffset)[nCol]);
786 else
787 nColOffset = static_cast<sal_uInt16>((*pLocalColOffset)[nCount - 1]);
789 pE->nOffset = nColOffset;
790 sal_uInt16 nWidth = GetWidth( pE );
791 MakeCol( pLocalColOffset, pE->nOffset, nWidth, nOffsetTolerance, nOffsetTolerance );
792 if ( pE->nWidth )
793 pE->nWidth = nWidth;
794 nColOffset = pE->nOffset + nWidth;
795 if ( nTableWidth < nColOffset - nColOffsetStart )
796 nTableWidth = nColOffset - nColOffsetStart;
799 void ScHTMLLayoutParser::CloseEntry( const HtmlImportInfo* pInfo )
801 bInCell = false;
802 if ( bTabInTabCell )
803 { // From the stack in TableOff
804 bTabInTabCell = false;
805 NewActEntry(maList.back().get()); // New free flying mxActEntry
806 return ;
808 if (mxActEntry->nTab == 0)
809 mxActEntry->nWidth = static_cast<sal_uInt16>(aPageSize.Width());
810 Colonize(mxActEntry.get());
811 nColCnt = mxActEntry->nCol + mxActEntry->nColOverlap;
812 if ( nMaxCol < nColCnt )
813 nMaxCol = nColCnt; // TableStack MaxCol
814 if ( nColMax < nColCnt )
815 nColMax = nColCnt; // Global MaxCol for ScEEParser GetDimensions!
816 EntryEnd(mxActEntry.get(), pInfo->aSelection);
817 ESelection& rSel = mxActEntry->aSel;
818 while ( rSel.nStartPara < rSel.nEndPara
819 && pEdit->GetTextLen( rSel.nStartPara ) == 0 )
820 { // Strip preceding empty paragraphs
821 rSel.nStartPara++;
823 while ( rSel.nEndPos == 0 && rSel.nEndPara > rSel.nStartPara )
824 { // Strip successive empty paragraphs
825 rSel.nEndPara--;
826 rSel.nEndPos = pEdit->GetTextLen( rSel.nEndPara );
828 if ( rSel.nStartPara > rSel.nEndPara )
829 { // Gives GPF in CreateTextObject
830 OSL_FAIL( "CloseEntry: EditEngine ESelection Start > End" );
831 rSel.nEndPara = rSel.nStartPara;
833 if ( rSel.HasRange() )
834 mxActEntry->aItemSet.Put( SfxBoolItem( ATTR_LINEBREAK, true ) );
835 maList.push_back(mxActEntry);
836 NewActEntry(mxActEntry.get()); // New free flying mxActEntry
839 IMPL_LINK( ScHTMLLayoutParser, HTMLImportHdl, HtmlImportInfo&, rInfo, void )
841 switch ( rInfo.eState )
843 case HtmlImportState::NextToken:
844 ProcToken( &rInfo );
845 break;
846 case HtmlImportState::Start:
847 break;
848 case HtmlImportState::End:
849 if ( rInfo.aSelection.nEndPos )
851 // If text remains: create paragraph, without calling CloseEntry().
852 if( bInCell ) // ...but only in opened table cells.
854 bInCell = false;
855 NextRow( &rInfo );
856 bInCell = true;
858 CloseEntry( &rInfo );
860 while ( nTableLevel > 0 )
861 TableOff( &rInfo ); // close tables, if </TABLE> missing
862 break;
863 case HtmlImportState::SetAttr:
864 break;
865 case HtmlImportState::InsertText:
866 break;
867 case HtmlImportState::InsertPara:
868 if ( nTableLevel < 1 )
870 CloseEntry( &rInfo );
871 NextRow( &rInfo );
873 break;
874 case HtmlImportState::InsertField:
875 break;
876 default:
877 OSL_FAIL("HTMLImportHdl: unknown ImportInfo.eState");
881 // Greatest common divisor (Euclid)
882 // Special case: 0 and something gives 1
883 static SCROW lcl_GGT( SCROW a, SCROW b )
885 if ( !a || !b )
886 return 1;
889 if ( a > b )
890 a -= SCROW(a / b) * b;
891 else
892 b -= SCROW(b / a) * a;
893 } while ( a && b );
894 return ((a != 0) ? a : b);
897 // Lowest common multiple: a * b / GCD(a,b)
898 static SCROW lcl_KGV( SCROW a, SCROW b )
900 if ( a > b ) // Make overflow even less likely
901 return (a / lcl_GGT(a,b)) * b;
902 else
903 return (b / lcl_GGT(a,b)) * a;
906 void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo )
908 if ( bInCell )
909 CloseEntry( pInfo );
910 if ( !nTableLevel )
912 OSL_FAIL( "dumbo doc! <TH> or <TD> without previous <TABLE>" );
913 TableOn( pInfo );
915 bInCell = true;
916 bool bHorJustifyCenterTH = (pInfo->nToken == HtmlTokenId::TABLEHEADER_ON);
917 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
918 for (const auto & rOption : rOptions)
920 switch( rOption.GetToken() )
922 case HtmlOptionId::COLSPAN:
924 mxActEntry->nColOverlap = static_cast<SCCOL>(rOption.GetString().toInt32());
926 break;
927 case HtmlOptionId::ROWSPAN:
929 mxActEntry->nRowOverlap = static_cast<SCROW>(rOption.GetString().toInt32());
931 break;
932 case HtmlOptionId::ALIGN:
934 bHorJustifyCenterTH = false;
935 SvxCellHorJustify eVal;
936 const OUString& rOptVal = rOption.GetString();
937 if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
938 eVal = SvxCellHorJustify::Right;
939 else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_center ) )
940 eVal = SvxCellHorJustify::Center;
941 else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
942 eVal = SvxCellHorJustify::Left;
943 else
944 eVal = SvxCellHorJustify::Standard;
945 if ( eVal != SvxCellHorJustify::Standard )
946 mxActEntry->aItemSet.Put(SvxHorJustifyItem(eVal, ATTR_HOR_JUSTIFY));
948 break;
949 case HtmlOptionId::VALIGN:
951 SvxCellVerJustify eVal;
952 const OUString& rOptVal = rOption.GetString();
953 if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_top ) )
954 eVal = SvxCellVerJustify::Top;
955 else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_middle ) )
956 eVal = SvxCellVerJustify::Center;
957 else if ( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_bottom ) )
958 eVal = SvxCellVerJustify::Bottom;
959 else
960 eVal = SvxCellVerJustify::Standard;
961 mxActEntry->aItemSet.Put(SvxVerJustifyItem(eVal, ATTR_VER_JUSTIFY));
963 break;
964 case HtmlOptionId::WIDTH:
966 mxActEntry->nWidth = GetWidthPixel(rOption);
968 break;
969 case HtmlOptionId::BGCOLOR:
971 Color aColor;
972 rOption.GetColor( aColor );
973 mxActEntry->aItemSet.Put(SvxBrushItem(aColor, ATTR_BACKGROUND));
975 break;
976 case HtmlOptionId::SDVAL:
978 mxActEntry->pValStr = rOption.GetString();
980 break;
981 case HtmlOptionId::SDNUM:
983 mxActEntry->pNumStr = rOption.GetString();
985 break;
986 default: break;
990 mxActEntry->nCol = nColCnt;
991 mxActEntry->nRow = nRowCnt;
992 mxActEntry->nTab = nTable;
994 if ( bHorJustifyCenterTH )
995 mxActEntry->aItemSet.Put(
996 SvxHorJustifyItem( SvxCellHorJustify::Center, ATTR_HOR_JUSTIFY) );
999 void ScHTMLLayoutParser::TableRowOn( const HtmlImportInfo* pInfo )
1001 if ( nColCnt > nColCntStart )
1002 NextRow( pInfo ); // The optional TableRowOff wasn't there
1003 nColOffset = nColOffsetStart;
1006 void ScHTMLLayoutParser::TableRowOff( const HtmlImportInfo* pInfo )
1008 NextRow( pInfo );
1011 void ScHTMLLayoutParser::TableDataOff( const HtmlImportInfo* pInfo )
1013 if ( bInCell )
1014 CloseEntry( pInfo ); // Only if it really was one
1017 void ScHTMLLayoutParser::TableOn( HtmlImportInfo* pInfo )
1019 if ( ++nTableLevel > 1 )
1020 { // Table in Table
1021 sal_uInt16 nTmpColOffset = nColOffset; // Will be changed in Colonize()
1022 Colonize(mxActEntry.get());
1023 aTableStack.push( std::make_unique<ScHTMLTableStackEntry>(
1024 mxActEntry, xLockedList, pLocalColOffset, nFirstTableCell,
1025 nRowCnt, nColCntStart, nMaxCol, nTable,
1026 nTableWidth, nColOffset, nColOffsetStart,
1027 bFirstRow ) );
1028 sal_uInt16 nLastWidth = nTableWidth;
1029 nTableWidth = GetWidth(mxActEntry.get());
1030 if ( nTableWidth == nLastWidth && nMaxCol - nColCntStart > 1 )
1031 { // There must be more than one, so this one cannot be enough
1032 nTableWidth = nLastWidth / static_cast<sal_uInt16>((nMaxCol - nColCntStart));
1034 nLastWidth = nTableWidth;
1035 if ( pInfo->nToken == HtmlTokenId::TABLE_ON )
1036 { // It can still be TD or TH, if we didn't have a TABLE earlier
1037 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1038 for (const auto & rOption : rOptions)
1040 switch( rOption.GetToken() )
1042 case HtmlOptionId::WIDTH:
1043 { // Percent: of document width or outer cell
1044 nTableWidth = GetWidthPixel( rOption );
1046 break;
1047 case HtmlOptionId::BORDER:
1048 // Border is: ((pOption->GetString().Len() == 0) || (pOption->GetNumber() != 0));
1049 break;
1050 default: break;
1054 bInCell = false;
1055 if ( bTabInTabCell && (nTableWidth >= nLastWidth) )
1056 { // Multiple tables in one cell, underneath each other
1057 bTabInTabCell = false;
1058 NextRow( pInfo );
1060 else
1061 { // It start's in this cell or next to each other
1062 bTabInTabCell = false;
1063 nColCntStart = nColCnt;
1064 nColOffset = nTmpColOffset;
1065 nColOffsetStart = nColOffset;
1068 NewActEntry(!maList.empty() ? maList.back().get() : nullptr); // New free flying mxActEntry
1069 xLockedList = new ScRangeList;
1071 else
1072 { // Simple table at the document level
1073 EntryEnd(mxActEntry.get(), pInfo->aSelection);
1074 if (mxActEntry->aSel.HasRange())
1075 { // Flying text left
1076 CloseEntry( pInfo );
1077 NextRow( pInfo );
1079 aTableStack.push( std::make_unique<ScHTMLTableStackEntry>(
1080 mxActEntry, xLockedList, pLocalColOffset, nFirstTableCell,
1081 nRowCnt, nColCntStart, nMaxCol, nTable,
1082 nTableWidth, nColOffset, nColOffsetStart,
1083 bFirstRow ) );
1084 // As soon as we have multiple tables we need to be tolerant with the offsets.
1085 if (nMaxTable > 0)
1086 nOffsetTolerance = SC_HTML_OFFSET_TOLERANCE_LARGE;
1087 nTableWidth = 0;
1088 if ( pInfo->nToken == HtmlTokenId::TABLE_ON )
1090 // It can still be TD or TH, if we didn't have a TABLE earlier
1091 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1092 for (const auto & rOption : rOptions)
1094 switch( rOption.GetToken() )
1096 case HtmlOptionId::WIDTH:
1097 { // Percent: of document width or outer cell
1098 nTableWidth = GetWidthPixel( rOption );
1100 break;
1101 case HtmlOptionId::BORDER:
1102 //BorderOn is: ((pOption->GetString().Len() == 0) || (pOption->GetNumber() != 0));
1103 break;
1104 default: break;
1109 nTable = ++nMaxTable;
1110 bFirstRow = true;
1111 nFirstTableCell = maList.size();
1113 pLocalColOffset = new ScHTMLColOffset;
1114 MakeColNoRef( pLocalColOffset, nColOffsetStart, 0, 0, 0 );
1117 void ScHTMLLayoutParser::TableOff( const HtmlImportInfo* pInfo )
1119 if ( bInCell )
1120 CloseEntry( pInfo );
1121 if ( nColCnt > nColCntStart )
1122 TableRowOff( pInfo ); // The optional TableRowOff wasn't
1123 if ( !nTableLevel )
1125 OSL_FAIL( "dumbo doc! </TABLE> without opening <TABLE>" );
1126 return ;
1128 if ( --nTableLevel > 0 )
1129 { // Table in Table done
1130 if ( !aTableStack.empty() )
1132 std::unique_ptr<ScHTMLTableStackEntry> pS = std::move(aTableStack.top());
1133 aTableStack.pop();
1135 auto& pE = pS->xCellEntry;
1136 SCROW nRows = nRowCnt - pS->nRowCnt;
1137 if ( nRows > 1 )
1138 { // Insert size of table at this position
1139 SCROW nRow = pS->nRowCnt;
1140 sal_uInt16 nTab = pS->nTable;
1141 if ( !pTables )
1142 pTables.reset( new OuterMap );
1143 // Height of outer table
1144 OuterMap::const_iterator it = pTables->find( nTab );
1145 InnerMap* pTab1;
1146 if ( it == pTables->end() )
1148 pTab1 = new InnerMap;
1149 (*pTables)[ nTab ] = pTab1;
1151 else
1152 pTab1 = it->second;
1153 SCROW nRowSpan = pE->nRowOverlap;
1154 SCROW nRowKGV;
1155 SCROW nRowsPerRow1; // Outer table
1156 SCROW nRowsPerRow2; // Inner table
1157 if ( nRowSpan > 1 )
1158 { // LCM to which we can map the inner and outer rows
1159 nRowKGV = lcl_KGV( nRowSpan, nRows );
1160 nRowsPerRow1 = nRowKGV / nRowSpan;
1161 nRowsPerRow2 = nRowKGV / nRows;
1163 else
1165 nRowKGV = nRowsPerRow1 = nRows;
1166 nRowsPerRow2 = 1;
1168 InnerMap* pTab2 = nullptr;
1169 if ( nRowsPerRow2 > 1 )
1170 { // Height of the inner table
1171 pTab2 = new InnerMap;
1172 (*pTables)[ nTable ] = pTab2;
1174 // Abuse void* Data entry of the Table class for height mapping
1175 if ( nRowKGV > 1 )
1177 if ( nRowsPerRow1 > 1 )
1178 { // Outer
1179 for ( SCROW j=0; j < nRowSpan; j++ )
1181 sal_uLong nRowKey = nRow + j;
1182 SCROW nR = (*pTab1)[ nRowKey ];
1183 if ( !nR )
1184 (*pTab1)[ nRowKey ] = nRowsPerRow1;
1185 else if ( nRowsPerRow1 > nR )
1186 (*pTab1)[ nRowKey ] = nRowsPerRow1;
1187 //TODO: How can we improve on this?
1188 else if ( nRowsPerRow1 < nR && nRowSpan == 1
1189 && nTable == nMaxTable )
1190 { // Still some space left, merge in a better way (if possible)
1191 SCROW nAdd = nRowsPerRow1 - (nR % nRowsPerRow1);
1192 nR += nAdd;
1193 if ( (nR % nRows) == 0 )
1194 { // Only if representable
1195 SCROW nR2 = (*pTab1)[ nRowKey+1 ];
1196 if ( nR2 > nAdd )
1197 { // Only if we really have enough space
1198 (*pTab1)[ nRowKey ] = nR;
1199 (*pTab1)[ nRowKey+1 ] = nR2 - nAdd;
1200 nRowsPerRow2 = nR / nRows;
1206 if ( nRowsPerRow2 > 1 )
1207 { // Inner
1208 if ( !pTab2 )
1209 { // nRowsPerRow2 could be've been incremented
1210 pTab2 = new InnerMap;
1211 (*pTables)[ nTable ] = pTab2;
1213 for ( SCROW j=0; j < nRows; j++ )
1215 sal_uLong nRowKey = nRow + j;
1216 (*pTab2)[ nRowKey ] = nRowsPerRow2;
1222 SetWidths();
1224 if ( !pE->nWidth )
1225 pE->nWidth = nTableWidth;
1226 else if ( pE->nWidth < nTableWidth )
1228 sal_uInt16 nOldOffset = pE->nOffset + pE->nWidth;
1229 sal_uInt16 nNewOffset = pE->nOffset + nTableWidth;
1230 ModifyOffset( pS->pLocalColOffset, nOldOffset, nNewOffset, nOffsetTolerance );
1231 sal_uInt16 nTmp = nNewOffset - pE->nOffset - pE->nWidth;
1232 pE->nWidth = nNewOffset - pE->nOffset;
1233 pS->nTableWidth = pS->nTableWidth + nTmp;
1234 if ( pS->nColOffset >= nOldOffset )
1235 pS->nColOffset = pS->nColOffset + nTmp;
1238 nColCnt = pE->nCol + pE->nColOverlap;
1239 nRowCnt = pS->nRowCnt;
1240 nColCntStart = pS->nColCntStart;
1241 nMaxCol = pS->nMaxCol;
1242 nTable = pS->nTable;
1243 nTableWidth = pS->nTableWidth;
1244 nFirstTableCell = pS->nFirstTableCell;
1245 nColOffset = pS->nColOffset;
1246 nColOffsetStart = pS->nColOffsetStart;
1247 bFirstRow = pS->bFirstRow;
1248 xLockedList = pS->xLockedList;
1249 pLocalColOffset = pS->pLocalColOffset;
1250 // mxActEntry is kept around if a table is started in the same row
1251 // (anything's possible in HTML); will be deleted by CloseEntry
1252 mxActEntry = pE;
1254 bTabInTabCell = true;
1255 bInCell = true;
1257 else
1258 { // Simple table finished
1259 SetWidths();
1260 nMaxCol = 0;
1261 nTable = 0;
1262 if ( !aTableStack.empty() )
1264 ScHTMLTableStackEntry* pS = aTableStack.top().get();
1265 delete pLocalColOffset;
1266 pLocalColOffset = pS->pLocalColOffset;
1267 aTableStack.pop();
1272 void ScHTMLLayoutParser::Image( HtmlImportInfo* pInfo )
1274 mxActEntry->maImageList.push_back(std::make_unique<ScHTMLImage>());
1275 ScHTMLImage* pImage = mxActEntry->maImageList.back().get();
1276 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1277 for (const auto & rOption : rOptions)
1279 switch( rOption.GetToken() )
1281 case HtmlOptionId::SRC:
1283 pImage->aURL = INetURLObject::GetAbsURL( aBaseURL, rOption.GetString() );
1285 break;
1286 case HtmlOptionId::ALT:
1288 if (!mxActEntry->bHasGraphic)
1289 { // ALT text only if not any image loaded
1290 if (!mxActEntry->aAltText.isEmpty())
1291 mxActEntry->aAltText += "; ";
1293 mxActEntry->aAltText += rOption.GetString();
1296 break;
1297 case HtmlOptionId::WIDTH:
1299 pImage->aSize.setWidth( static_cast<long>(rOption.GetNumber()) );
1301 break;
1302 case HtmlOptionId::HEIGHT:
1304 pImage->aSize.setHeight( static_cast<long>(rOption.GetNumber()) );
1306 break;
1307 case HtmlOptionId::HSPACE:
1309 pImage->aSpace.setX( static_cast<long>(rOption.GetNumber()) );
1311 break;
1312 case HtmlOptionId::VSPACE:
1314 pImage->aSpace.setY( static_cast<long>(rOption.GetNumber()) );
1316 break;
1317 default: break;
1320 if (pImage->aURL.isEmpty())
1322 OSL_FAIL( "Image: graphic without URL ?!?" );
1323 return ;
1326 sal_uInt16 nFormat;
1327 std::unique_ptr<Graphic> pGraphic(new Graphic);
1328 GraphicFilter& rFilter = GraphicFilter::GetGraphicFilter();
1329 if ( ERRCODE_NONE != GraphicFilter::LoadGraphic( pImage->aURL, pImage->aFilterName,
1330 *pGraphic, &rFilter, &nFormat ) )
1332 return ; // Bad luck
1334 if (!mxActEntry->bHasGraphic)
1335 { // discard any ALT text in this cell if we have any image
1336 mxActEntry->bHasGraphic = true;
1337 mxActEntry->aAltText.clear();
1339 pImage->aFilterName = rFilter.GetImportFormatName( nFormat );
1340 pImage->pGraphic = std::move( pGraphic );
1341 if ( !(pImage->aSize.Width() && pImage->aSize.Height()) )
1343 OutputDevice* pDefaultDev = Application::GetDefaultDevice();
1344 pImage->aSize = pDefaultDev->LogicToPixel( pImage->pGraphic->GetPrefSize(),
1345 pImage->pGraphic->GetPrefMapMode() );
1347 if (!mxActEntry->maImageList.empty())
1349 long nWidth = 0;
1350 for (const std::unique_ptr<ScHTMLImage> & pI : mxActEntry->maImageList)
1352 if ( pI->nDir & nHorizontal )
1353 nWidth += pI->aSize.Width() + 2 * pI->aSpace.X();
1354 else
1355 nWidth = 0;
1357 if ( mxActEntry->nWidth
1358 && (nWidth + pImage->aSize.Width() + 2 * pImage->aSpace.X()
1359 >= mxActEntry->nWidth) )
1360 mxActEntry->maImageList.back()->nDir = nVertical;
1364 void ScHTMLLayoutParser::ColOn( HtmlImportInfo* pInfo )
1366 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1367 for (const auto & rOption : rOptions)
1369 if( rOption.GetToken() == HtmlOptionId::WIDTH )
1371 sal_uInt16 nVal = GetWidthPixel( rOption );
1372 MakeCol( pLocalColOffset, nColOffset, nVal, 0, 0 );
1373 nColOffset = nColOffset + nVal;
1378 sal_uInt16 ScHTMLLayoutParser::GetWidthPixel( const HTMLOption& rOption )
1380 const OUString& rOptVal = rOption.GetString();
1381 if ( rOptVal.indexOf('%') != -1 )
1382 { // Percent
1383 sal_uInt16 nW = (nTableWidth ? nTableWidth : static_cast<sal_uInt16>(aPageSize.Width()));
1384 return static_cast<sal_uInt16>((rOption.GetNumber() * nW) / 100);
1386 else
1388 if ( rOptVal.indexOf('*') != -1 )
1389 { // Relative to what?
1390 // TODO: Collect all relative values in ColArray and then MakeCol
1391 return 0;
1393 else
1394 return static_cast<sal_uInt16>(rOption.GetNumber()); // Pixel
1398 void ScHTMLLayoutParser::AnchorOn( HtmlImportInfo* pInfo )
1400 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1401 for (const auto & rOption : rOptions)
1403 if( rOption.GetToken() == HtmlOptionId::NAME )
1404 mxActEntry->pName = rOption.GetString();
1408 bool ScHTMLLayoutParser::IsAtBeginningOfText( const HtmlImportInfo* pInfo )
1410 ESelection& rSel = mxActEntry->aSel;
1411 return rSel.nStartPara == rSel.nEndPara &&
1412 rSel.nStartPara <= pInfo->aSelection.nEndPara &&
1413 pEdit->GetTextLen( rSel.nStartPara ) == 0;
1416 void ScHTMLLayoutParser::FontOn( HtmlImportInfo* pInfo )
1418 if ( IsAtBeginningOfText( pInfo ) )
1419 { // Only at the start of the text; applies to whole line
1420 const HTMLOptions& rOptions = static_cast<HTMLParser*>(pInfo->pParser)->GetOptions();
1421 for (const auto & rOption : rOptions)
1423 switch( rOption.GetToken() )
1425 case HtmlOptionId::FACE :
1427 const OUString& rFace = rOption.GetString();
1428 OUStringBuffer aFontName;
1429 sal_Int32 nPos = 0;
1430 while( nPos != -1 )
1432 // Font list, VCL uses the semicolon as separator
1433 // HTML uses the comma
1434 OUString aFName = rFace.getToken( 0, ',', nPos );
1435 aFName = comphelper::string::strip(aFName, ' ');
1436 if( !aFontName.isEmpty() )
1437 aFontName.append(";");
1438 aFontName.append(aFName);
1440 if ( !aFontName.isEmpty() )
1441 mxActEntry->aItemSet.Put( SvxFontItem( FAMILY_DONTKNOW,
1442 aFontName.makeStringAndClear(), EMPTY_OUSTRING, PITCH_DONTKNOW,
1443 RTL_TEXTENCODING_DONTKNOW, ATTR_FONT ) );
1445 break;
1446 case HtmlOptionId::SIZE :
1448 sal_uInt16 nSize = static_cast<sal_uInt16>(rOption.GetNumber());
1449 if ( nSize == 0 )
1450 nSize = 1;
1451 else if ( nSize > SC_HTML_FONTSIZES )
1452 nSize = SC_HTML_FONTSIZES;
1453 mxActEntry->aItemSet.Put( SvxFontHeightItem(
1454 maFontHeights[nSize-1], 100, ATTR_FONT_HEIGHT ) );
1456 break;
1457 case HtmlOptionId::COLOR :
1459 Color aColor;
1460 rOption.GetColor( aColor );
1461 mxActEntry->aItemSet.Put( SvxColorItem( aColor, ATTR_FONT_COLOR ) );
1463 break;
1464 default: break;
1470 void ScHTMLLayoutParser::ProcToken( HtmlImportInfo* pInfo )
1472 switch ( pInfo->nToken )
1474 case HtmlTokenId::META:
1476 HTMLParser* pParser = static_cast<HTMLParser*>(pInfo->pParser);
1477 uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1478 mpDoc->GetDocumentShell()->GetModel(), uno::UNO_QUERY_THROW);
1479 pParser->ParseMetaOptions(
1480 xDPS->getDocumentProperties(),
1481 mpDoc->GetDocumentShell()->GetHeaderAttributes() );
1483 break;
1484 case HtmlTokenId::TITLE_ON:
1486 bInTitle = true;
1487 aString.clear();
1489 break;
1490 case HtmlTokenId::TITLE_OFF:
1492 if ( bInTitle && !aString.isEmpty() )
1494 // Remove blanks from line brakes
1495 aString = aString.trim();
1496 uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
1497 mpDoc->GetDocumentShell()->GetModel(),
1498 uno::UNO_QUERY_THROW);
1499 xDPS->getDocumentProperties()->setTitle(aString);
1501 bInTitle = false;
1503 break;
1504 case HtmlTokenId::TABLE_ON:
1506 TableOn( pInfo );
1508 break;
1509 case HtmlTokenId::COL_ON:
1511 ColOn( pInfo );
1513 break;
1514 case HtmlTokenId::TABLEHEADER_ON: // Opens row
1516 if ( bInCell )
1517 CloseEntry( pInfo );
1518 // Do not set bInCell to true, TableDataOn does that
1519 mxActEntry->aItemSet.Put(
1520 SvxWeightItem( WEIGHT_BOLD, ATTR_FONT_WEIGHT) );
1521 [[fallthrough]];
1523 case HtmlTokenId::TABLEDATA_ON: // Opens cell
1525 TableDataOn( pInfo );
1527 break;
1528 case HtmlTokenId::TABLEHEADER_OFF:
1529 case HtmlTokenId::TABLEDATA_OFF: // Closes cell
1531 TableDataOff( pInfo );
1533 break;
1534 case HtmlTokenId::TABLEROW_ON: // Before first cell in row
1536 TableRowOn( pInfo );
1538 break;
1539 case HtmlTokenId::TABLEROW_OFF: // After last cell in row
1541 TableRowOff( pInfo );
1543 break;
1544 case HtmlTokenId::TABLE_OFF:
1546 TableOff( pInfo );
1548 break;
1549 case HtmlTokenId::IMAGE:
1551 Image( pInfo );
1553 break;
1554 case HtmlTokenId::PARABREAK_OFF:
1555 { // We continue vertically after an image
1556 if (!mxActEntry->maImageList.empty())
1557 mxActEntry->maImageList.back()->nDir = nVertical;
1559 break;
1560 case HtmlTokenId::ANCHOR_ON:
1562 AnchorOn( pInfo );
1564 break;
1565 case HtmlTokenId::FONT_ON :
1567 FontOn( pInfo );
1569 break;
1570 case HtmlTokenId::BIGPRINT_ON :
1572 // TODO: Remember current font size and increase by 1
1573 if ( IsAtBeginningOfText( pInfo ) )
1574 mxActEntry->aItemSet.Put( SvxFontHeightItem(
1575 maFontHeights[3], 100, ATTR_FONT_HEIGHT ) );
1577 break;
1578 case HtmlTokenId::SMALLPRINT_ON :
1580 // TODO: Remember current font size and decrease by 1
1581 if ( IsAtBeginningOfText( pInfo ) )
1582 mxActEntry->aItemSet.Put( SvxFontHeightItem(
1583 maFontHeights[0], 100, ATTR_FONT_HEIGHT ) );
1585 break;
1586 case HtmlTokenId::BOLD_ON :
1587 case HtmlTokenId::STRONG_ON :
1589 if ( IsAtBeginningOfText( pInfo ) )
1590 mxActEntry->aItemSet.Put( SvxWeightItem( WEIGHT_BOLD,
1591 ATTR_FONT_WEIGHT ) );
1593 break;
1594 case HtmlTokenId::ITALIC_ON :
1595 case HtmlTokenId::EMPHASIS_ON :
1596 case HtmlTokenId::ADDRESS_ON :
1597 case HtmlTokenId::BLOCKQUOTE_ON :
1598 case HtmlTokenId::BLOCKQUOTE30_ON :
1599 case HtmlTokenId::CITIATION_ON :
1600 case HtmlTokenId::VARIABLE_ON :
1602 if ( IsAtBeginningOfText( pInfo ) )
1603 mxActEntry->aItemSet.Put( SvxPostureItem( ITALIC_NORMAL,
1604 ATTR_FONT_POSTURE ) );
1606 break;
1607 case HtmlTokenId::DEFINSTANCE_ON :
1609 if ( IsAtBeginningOfText( pInfo ) )
1611 mxActEntry->aItemSet.Put( SvxWeightItem( WEIGHT_BOLD,
1612 ATTR_FONT_WEIGHT ) );
1613 mxActEntry->aItemSet.Put( SvxPostureItem( ITALIC_NORMAL,
1614 ATTR_FONT_POSTURE ) );
1617 break;
1618 case HtmlTokenId::UNDERLINE_ON :
1620 if ( IsAtBeginningOfText( pInfo ) )
1621 mxActEntry->aItemSet.Put( SvxUnderlineItem( LINESTYLE_SINGLE,
1622 ATTR_FONT_UNDERLINE ) );
1624 break;
1625 case HtmlTokenId::TEXTTOKEN:
1627 if ( bInTitle )
1628 aString += pInfo->aText;
1630 break;
1631 default: ;
1635 // HTML DATA QUERY PARSER
1637 template< typename Type >
1638 static Type getLimitedValue( const Type& rValue, const Type& rMin, const Type& rMax )
1639 { return std::max( std::min( rValue, rMax ), rMin ); }
1641 ScHTMLEntry::ScHTMLEntry( const SfxItemSet& rItemSet, ScHTMLTableId nTableId ) :
1642 ScEEParseEntry( rItemSet ),
1643 mbImportAlways( false )
1645 nTab = nTableId;
1646 bEntirePara = false;
1649 bool ScHTMLEntry::HasContents() const
1651 return mbImportAlways || aSel.HasRange() || !aAltText.isEmpty() || IsTable();
1654 void ScHTMLEntry::AdjustStart( const HtmlImportInfo& rInfo )
1656 // set start position
1657 aSel.nStartPara = rInfo.aSelection.nStartPara;
1658 aSel.nStartPos = rInfo.aSelection.nStartPos;
1659 // adjust end position
1660 if( (aSel.nEndPara < aSel.nStartPara) || ((aSel.nEndPara == aSel.nStartPara) && (aSel.nEndPos < aSel.nStartPos)) )
1662 aSel.nEndPara = aSel.nStartPara;
1663 aSel.nEndPos = aSel.nStartPos;
1667 void ScHTMLEntry::AdjustEnd( const HtmlImportInfo& rInfo )
1669 OSL_ENSURE( (aSel.nEndPara < rInfo.aSelection.nEndPara) ||
1670 ((aSel.nEndPara == rInfo.aSelection.nEndPara) && (aSel.nEndPos <= rInfo.aSelection.nEndPos)),
1671 "ScHTMLQueryParser::AdjustEntryEnd - invalid end position" );
1672 // set end position
1673 aSel.nEndPara = rInfo.aSelection.nEndPara;
1674 aSel.nEndPos = rInfo.aSelection.nEndPos;
1677 void ScHTMLEntry::Strip( const EditEngine& rEditEngine )
1679 // strip leading empty paragraphs
1680 while( (aSel.nStartPara < aSel.nEndPara) && (rEditEngine.GetTextLen( aSel.nStartPara ) <= aSel.nStartPos) )
1682 ++aSel.nStartPara;
1683 aSel.nStartPos = 0;
1685 // strip trailing empty paragraphs
1686 while( (aSel.nStartPara < aSel.nEndPara) && (aSel.nEndPos == 0) )
1688 --aSel.nEndPara;
1689 aSel.nEndPos = rEditEngine.GetTextLen( aSel.nEndPara );
1693 /** A map of ScHTMLTable objects.
1695 Organizes the tables with a unique table key. Stores nested tables inside
1696 the parent table and forms in this way a tree structure of tables. An
1697 instance of this class owns the contained table objects and deletes them
1698 on destruction.
1700 class ScHTMLTableMap final
1702 private:
1703 typedef std::shared_ptr< ScHTMLTable > ScHTMLTablePtr;
1704 typedef std::map< ScHTMLTableId, ScHTMLTablePtr > ScHTMLTableStdMap;
1706 public:
1707 typedef ScHTMLTableStdMap::iterator iterator;
1708 typedef ScHTMLTableStdMap::const_iterator const_iterator;
1710 private:
1711 ScHTMLTable& mrParentTable; /// Reference to parent table.
1712 ScHTMLTableStdMap maTables; /// Container for all table objects.
1713 mutable ScHTMLTable* mpCurrTable; /// Current table, used for fast search.
1715 public:
1716 explicit ScHTMLTableMap( ScHTMLTable& rParentTable );
1718 const_iterator begin() const { return maTables.begin(); }
1719 const_iterator end() const { return maTables.end(); }
1721 /** Returns the specified table.
1722 @param nTableId Unique identifier of the table.
1723 @param bDeep true = searches deep in all nested table; false = only in this container. */
1724 ScHTMLTable* FindTable( ScHTMLTableId nTableId, bool bDeep = true ) const;
1726 /** Inserts a new table into the container. This container owns the created table.
1727 @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
1728 ScHTMLTable* CreateTable( const HtmlImportInfo& rInfo, bool bPreFormText );
1730 private:
1731 /** Sets a working table with its index for search optimization. */
1732 void SetCurrTable( ScHTMLTable* pTable ) const
1733 { if( pTable ) mpCurrTable = pTable; }
1736 ScHTMLTableMap::ScHTMLTableMap( ScHTMLTable& rParentTable ) :
1737 mrParentTable(rParentTable),
1738 mpCurrTable(nullptr)
1742 ScHTMLTable* ScHTMLTableMap::FindTable( ScHTMLTableId nTableId, bool bDeep ) const
1744 ScHTMLTable* pResult = nullptr;
1745 if( mpCurrTable && (nTableId == mpCurrTable->GetTableId()) )
1746 pResult = mpCurrTable; // cached table
1747 else
1749 const_iterator aFind = maTables.find( nTableId );
1750 if( aFind != maTables.end() )
1751 pResult = aFind->second.get(); // table from this container
1754 // not found -> search deep in nested tables
1755 if( !pResult && bDeep )
1756 for( const_iterator aIter = begin(), aEnd = end(); !pResult && (aIter != aEnd); ++aIter )
1757 pResult = aIter->second->FindNestedTable( nTableId );
1759 SetCurrTable( pResult );
1760 return pResult;
1763 ScHTMLTable* ScHTMLTableMap::CreateTable( const HtmlImportInfo& rInfo, bool bPreFormText )
1765 ScHTMLTable* pTable = new ScHTMLTable( mrParentTable, rInfo, bPreFormText );
1766 maTables[ pTable->GetTableId() ].reset( pTable );
1767 SetCurrTable( pTable );
1768 return pTable;
1771 /** Simplified forward iterator for convenience.
1773 Before the iterator can be dereferenced, it must be tested with the is()
1774 method. The iterator may be invalid directly after construction (e.g. empty
1775 container).
1777 class ScHTMLTableIterator
1779 public:
1780 /** Constructs the iterator for the passed table map.
1781 @param pTableMap Pointer to the table map (is allowed to be NULL). */
1782 explicit ScHTMLTableIterator( const ScHTMLTableMap* pTableMap );
1784 bool is() const { return mpTableMap && maIter != maEnd; }
1785 ScHTMLTable* operator->() { return maIter->second.get(); }
1786 ScHTMLTableIterator& operator++() { ++maIter; return *this; }
1788 private:
1789 ScHTMLTableMap::const_iterator maIter;
1790 ScHTMLTableMap::const_iterator maEnd;
1791 const ScHTMLTableMap* mpTableMap;
1794 ScHTMLTableIterator::ScHTMLTableIterator( const ScHTMLTableMap* pTableMap ) :
1795 mpTableMap(pTableMap)
1797 if( pTableMap )
1799 maIter = pTableMap->begin();
1800 maEnd = pTableMap->end();
1804 ScHTMLTableAutoId::ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId ) :
1805 mnTableId( rnUnusedId ),
1806 mrnUnusedId( rnUnusedId )
1808 ++mrnUnusedId;
1811 ScHTMLTable::ScHTMLTable( ScHTMLTable& rParentTable, const HtmlImportInfo& rInfo, bool bPreFormText ) :
1812 mpParentTable( &rParentTable ),
1813 maTableId( rParentTable.maTableId.mrnUnusedId ),
1814 maTableItemSet( rParentTable.GetCurrItemSet() ),
1815 mrEditEngine( rParentTable.mrEditEngine ),
1816 mrEEParseList( rParentTable.mrEEParseList ),
1817 mpCurrEntryVector( nullptr ),
1818 maSize( 1, 1 ),
1819 mpParser(rParentTable.mpParser),
1820 mbBorderOn( false ),
1821 mbPreFormText( bPreFormText ),
1822 mbRowOn( false ),
1823 mbDataOn( false ),
1824 mbPushEmptyLine( false )
1826 if( mbPreFormText )
1828 ImplRowOn();
1829 ImplDataOn( ScHTMLSize( 1, 1 ) );
1831 else
1833 ProcessFormatOptions( maTableItemSet, rInfo );
1834 const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
1835 for (const auto& rOption : rOptions)
1837 switch( rOption.GetToken() )
1839 case HtmlOptionId::BORDER:
1840 mbBorderOn = rOption.GetString().isEmpty() || (rOption.GetNumber() != 0);
1841 break;
1842 case HtmlOptionId::ID:
1843 maTableName = rOption.GetString();
1844 break;
1845 default: break;
1850 CreateNewEntry( rInfo );
1853 ScHTMLTable::ScHTMLTable(
1854 SfxItemPool& rPool,
1855 EditEngine& rEditEngine,
1856 std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
1857 ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser
1859 mpParentTable( nullptr ),
1860 maTableId( rnUnusedId ),
1861 maTableItemSet( rPool ),
1862 mrEditEngine( rEditEngine ),
1863 mrEEParseList( rEEParseList ),
1864 mpCurrEntryVector( nullptr ),
1865 maSize( 1, 1 ),
1866 mpParser(pParser),
1867 mbBorderOn( false ),
1868 mbPreFormText( false ),
1869 mbRowOn( false ),
1870 mbDataOn( false ),
1871 mbPushEmptyLine( false )
1873 // open the first "cell" of the document
1874 ImplRowOn();
1875 ImplDataOn( ScHTMLSize( 1, 1 ) );
1876 mxCurrEntry = CreateEntry();
1879 ScHTMLTable::~ScHTMLTable()
1883 const SfxItemSet& ScHTMLTable::GetCurrItemSet() const
1885 // first try cell item set, then row item set, then table item set
1886 return mxDataItemSet.get() ? *mxDataItemSet : (mxRowItemSet.get() ? *mxRowItemSet : maTableItemSet);
1889 ScHTMLSize ScHTMLTable::GetSpan( const ScHTMLPos& rCellPos ) const
1891 ScHTMLSize aSpan( 1, 1 );
1892 const ScRange* pRange = nullptr;
1893 if( ( (pRange = maVMergedCells.Find( rCellPos.MakeAddr() ) ) != nullptr)
1894 || ( (pRange = maHMergedCells.Find( rCellPos.MakeAddr() ) ) != nullptr)
1896 aSpan.Set( pRange->aEnd.Col() - pRange->aStart.Col() + 1, pRange->aEnd.Row() - pRange->aStart.Row() + 1 );
1897 return aSpan;
1900 ScHTMLTable* ScHTMLTable::FindNestedTable( ScHTMLTableId nTableId ) const
1902 return mxNestedTables.get() ? mxNestedTables->FindTable( nTableId ) : nullptr;
1905 void ScHTMLTable::PutItem( const SfxPoolItem& rItem )
1907 OSL_ENSURE( mxCurrEntry.get(), "ScHTMLTable::PutItem - no current entry" );
1908 if( mxCurrEntry.get() && mxCurrEntry->IsEmpty() )
1909 mxCurrEntry->GetItemSet().Put( rItem );
1912 void ScHTMLTable::PutText( const HtmlImportInfo& rInfo )
1914 OSL_ENSURE( mxCurrEntry.get(), "ScHTMLTable::PutText - no current entry" );
1915 if( mxCurrEntry.get() )
1917 if( !mxCurrEntry->HasContents() && IsSpaceCharInfo( rInfo ) )
1918 mxCurrEntry->AdjustStart( rInfo );
1919 else
1920 mxCurrEntry->AdjustEnd( rInfo );
1924 void ScHTMLTable::InsertPara( const HtmlImportInfo& rInfo )
1926 if( mxCurrEntry.get() && mbDataOn && !IsEmptyCell() )
1927 mxCurrEntry->SetImportAlways();
1928 PushEntry( rInfo );
1929 CreateNewEntry( rInfo );
1930 InsertLeadingEmptyLine();
1933 void ScHTMLTable::BreakOn()
1935 // empty line, if <br> is at start of cell
1936 mbPushEmptyLine = !mbPreFormText && mbDataOn && IsEmptyCell();
1939 void ScHTMLTable::HeadingOn()
1941 // call directly, InsertPara() has not been called before
1942 InsertLeadingEmptyLine();
1945 void ScHTMLTable::InsertLeadingEmptyLine()
1947 // empty line, if <p>, </p>, <h?>, or </h*> are not at start of cell
1948 mbPushEmptyLine = !mbPreFormText && mbDataOn && !IsEmptyCell();
1951 void ScHTMLTable::AnchorOn()
1953 OSL_ENSURE( mxCurrEntry.get(), "ScHTMLTable::AnchorOn - no current entry" );
1954 // don't skip entries with single hyperlinks
1955 if( mxCurrEntry.get() )
1956 mxCurrEntry->SetImportAlways();
1959 ScHTMLTable* ScHTMLTable::TableOn( const HtmlImportInfo& rInfo )
1961 PushEntry( rInfo );
1962 return InsertNestedTable( rInfo, false );
1965 ScHTMLTable* ScHTMLTable::TableOff( const HtmlImportInfo& rInfo )
1967 return mbPreFormText ? this : CloseTable( rInfo );
1970 ScHTMLTable* ScHTMLTable::PreOn( const HtmlImportInfo& rInfo )
1972 PushEntry( rInfo );
1973 return InsertNestedTable( rInfo, true );
1976 ScHTMLTable* ScHTMLTable::PreOff( const HtmlImportInfo& rInfo )
1978 return mbPreFormText ? CloseTable( rInfo ) : this;
1981 void ScHTMLTable::RowOn( const HtmlImportInfo& rInfo )
1983 PushEntry( rInfo, true );
1984 if( mpParentTable && !mbPreFormText ) // no rows allowed in global and preformatted tables
1986 ImplRowOn();
1987 ProcessFormatOptions( *mxRowItemSet, rInfo );
1989 CreateNewEntry( rInfo );
1992 void ScHTMLTable::RowOff( const HtmlImportInfo& rInfo )
1994 PushEntry( rInfo, true );
1995 if( mpParentTable && !mbPreFormText ) // no rows allowed in global and preformatted tables
1996 ImplRowOff();
1997 CreateNewEntry( rInfo );
2000 namespace {
2003 * Decode a number format string stored in Excel-generated HTML's CSS
2004 * region.
2006 OUString decodeNumberFormat(const OUString& rFmt)
2008 OUStringBuffer aBuf;
2009 const sal_Unicode* p = rFmt.getStr();
2010 sal_Int32 n = rFmt.getLength();
2011 for (sal_Int32 i = 0; i < n; ++i, ++p)
2013 if (*p == '\\')
2015 // Skip '\'.
2016 ++i;
2017 ++p;
2019 // Parse all subsequent digits until first non-digit is found.
2020 sal_Int32 nDigitCount = 0;
2021 const sal_Unicode* p1 = p;
2022 for (; i < n; ++i, ++p, ++nDigitCount)
2024 if (*p < '0' || '9' < *p)
2026 --i;
2027 --p;
2028 break;
2032 if (nDigitCount)
2034 // Hex-encoded character found. Decode it back into its
2035 // original character. An example of number format with
2036 // hex-encoded chars: "\0022$\0022\#\,\#\#0\.00"
2037 sal_uInt32 nVal = OUString(p1, nDigitCount).toUInt32(16);
2038 aBuf.append(static_cast<sal_Unicode>(nVal));
2041 else
2042 aBuf.append(*p);
2044 return aBuf.makeStringAndClear();
2049 void ScHTMLTable::DataOn( const HtmlImportInfo& rInfo )
2051 PushEntry( rInfo, true );
2052 if( mpParentTable && !mbPreFormText ) // no cells allowed in global and preformatted tables
2054 // read needed options from the <td> tag
2055 ScHTMLSize aSpanSize( 1, 1 );
2056 boost::optional<OUString> pValStr, pNumStr;
2057 const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
2058 sal_uInt32 nNumberFormat = NUMBERFORMAT_ENTRY_NOT_FOUND;
2059 for (const auto& rOption : rOptions)
2061 switch (rOption.GetToken())
2063 case HtmlOptionId::COLSPAN:
2064 aSpanSize.mnCols = static_cast<SCCOL>( getLimitedValue<sal_Int32>( rOption.GetString().toInt32(), 1, 256 ) );
2065 break;
2066 case HtmlOptionId::ROWSPAN:
2067 aSpanSize.mnRows = static_cast<SCROW>( getLimitedValue<sal_Int32>( rOption.GetString().toInt32(), 1, 256 ) );
2068 break;
2069 case HtmlOptionId::SDVAL:
2070 pValStr = rOption.GetString();
2071 break;
2072 case HtmlOptionId::SDNUM:
2073 pNumStr = rOption.GetString();
2074 break;
2075 case HtmlOptionId::CLASS:
2077 // Pick up the number format associated with this class (if
2078 // any).
2079 OUString aClass = rOption.GetString();
2080 const ScHTMLStyles& rStyles = mpParser->GetStyles();
2081 const OUString& rVal = rStyles.getPropertyValue("td", aClass, "mso-number-format");
2082 if (!rVal.isEmpty())
2084 OUString aNumFmt = decodeNumberFormat(rVal);
2086 nNumberFormat = GetFormatTable()->GetEntryKey(aNumFmt);
2087 if (nNumberFormat == NUMBERFORMAT_ENTRY_NOT_FOUND)
2089 sal_Int32 nErrPos = 0;
2090 SvNumFormatType nDummy;
2091 bool bValidFmt = GetFormatTable()->PutEntry(aNumFmt, nErrPos, nDummy, nNumberFormat);
2092 if (!bValidFmt)
2093 nNumberFormat = NUMBERFORMAT_ENTRY_NOT_FOUND;
2097 break;
2098 default: break;
2102 ImplDataOn( aSpanSize );
2104 if (nNumberFormat != NUMBERFORMAT_ENTRY_NOT_FOUND)
2105 mxDataItemSet->Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nNumberFormat) );
2107 ProcessFormatOptions( *mxDataItemSet, rInfo );
2108 CreateNewEntry( rInfo );
2109 mxCurrEntry->pValStr = std::move(pValStr);
2110 mxCurrEntry->pNumStr = std::move(pNumStr);
2112 else
2113 CreateNewEntry( rInfo );
2116 void ScHTMLTable::DataOff( const HtmlImportInfo& rInfo )
2118 PushEntry( rInfo, true );
2119 if( mpParentTable && !mbPreFormText ) // no cells allowed in global and preformatted tables
2120 ImplDataOff();
2121 CreateNewEntry( rInfo );
2124 void ScHTMLTable::BodyOn( const HtmlImportInfo& rInfo )
2126 bool bPushed = PushEntry( rInfo );
2127 if( !mpParentTable )
2129 // do not start new row, if nothing (no title) precedes the body.
2130 if( bPushed || !mbRowOn )
2131 ImplRowOn();
2132 if( bPushed || !mbDataOn )
2133 ImplDataOn( ScHTMLSize( 1, 1 ) );
2134 ProcessFormatOptions( *mxDataItemSet, rInfo );
2136 CreateNewEntry( rInfo );
2139 void ScHTMLTable::BodyOff( const HtmlImportInfo& rInfo )
2141 PushEntry( rInfo );
2142 if( !mpParentTable )
2144 ImplDataOff();
2145 ImplRowOff();
2147 CreateNewEntry( rInfo );
2150 ScHTMLTable* ScHTMLTable::CloseTable( const HtmlImportInfo& rInfo )
2152 if( mpParentTable ) // not allowed to close global table
2154 PushEntry( rInfo, mbDataOn );
2155 ImplDataOff();
2156 ImplRowOff();
2157 mpParentTable->PushTableEntry( GetTableId() );
2158 mpParentTable->CreateNewEntry( rInfo );
2159 if( mbPreFormText ) // enclose preformatted table with empty lines in parent table
2160 mpParentTable->InsertLeadingEmptyLine();
2161 return mpParentTable;
2163 return this;
2166 SCCOLROW ScHTMLTable::GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const
2168 const ScSizeVec& rSizes = maCumSizes[ eOrient ];
2169 size_t nIndex = static_cast< size_t >( nCellPos );
2170 if( nIndex >= rSizes.size() ) return 0;
2171 return (nIndex == 0) ? rSizes.front() : (rSizes[ nIndex ] - rSizes[ nIndex - 1 ]);
2174 SCCOLROW ScHTMLTable::GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const
2176 const ScSizeVec& rSizes = maCumSizes[ eOrient ];
2177 size_t nBeginIdx = static_cast< size_t >( std::max< SCCOLROW >( nCellBegin, 0 ) );
2178 size_t nEndIdx = static_cast< size_t >( std::min< SCCOLROW >( nCellEnd, static_cast< SCCOLROW >( rSizes.size() ) ) );
2179 if (nBeginIdx >= nEndIdx ) return 0;
2180 return rSizes[ nEndIdx - 1 ] - ((nBeginIdx == 0) ? 0 : rSizes[ nBeginIdx - 1 ]);
2183 SCCOLROW ScHTMLTable::GetDocSize( ScHTMLOrient eOrient ) const
2185 const ScSizeVec& rSizes = maCumSizes[ eOrient ];
2186 return rSizes.empty() ? 0 : rSizes.back();
2189 ScHTMLSize ScHTMLTable::GetDocSize( const ScHTMLPos& rCellPos ) const
2191 ScHTMLSize aCellSpan = GetSpan( rCellPos );
2192 return ScHTMLSize(
2193 static_cast< SCCOL >( GetDocSize( tdCol, rCellPos.mnCol, rCellPos.mnCol + aCellSpan.mnCols ) ),
2194 static_cast< SCROW >( GetDocSize( tdRow, rCellPos.mnRow, rCellPos.mnRow + aCellSpan.mnRows ) ) );
2197 SCCOLROW ScHTMLTable::GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const
2199 return maDocBasePos.Get( eOrient ) + GetDocSize( eOrient, 0, nCellPos );
2202 ScHTMLPos ScHTMLTable::GetDocPos( const ScHTMLPos& rCellPos ) const
2204 return ScHTMLPos(
2205 static_cast< SCCOL >( GetDocPos( tdCol, rCellPos.mnCol ) ),
2206 static_cast< SCROW >( GetDocPos( tdRow, rCellPos.mnRow ) ) );
2209 void ScHTMLTable::GetDocRange( ScRange& rRange ) const
2211 rRange.aStart = rRange.aEnd = maDocBasePos.MakeAddr();
2212 ScAddress aErrorPos( ScAddress::UNINITIALIZED );
2213 if (!rRange.aEnd.Move( static_cast< SCCOL >( GetDocSize( tdCol ) ) - 1,
2214 static_cast< SCROW >( GetDocSize( tdRow ) ) - 1, 0, aErrorPos))
2216 assert(!"can't move");
2220 void ScHTMLTable::ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const
2222 OSL_ENSURE( pDoc, "ScHTMLTable::ApplyCellBorders - no document" );
2223 if( pDoc && mbBorderOn )
2225 const SCCOL nLastCol = maSize.mnCols - 1;
2226 const SCROW nLastRow = maSize.mnRows - 1;
2227 const long nOuterLine = DEF_LINE_WIDTH_2;
2228 const long nInnerLine = DEF_LINE_WIDTH_0;
2229 SvxBorderLine aOuterLine(nullptr, nOuterLine, SvxBorderLineStyle::SOLID);
2230 SvxBorderLine aInnerLine(nullptr, nInnerLine, SvxBorderLineStyle::SOLID);
2231 SvxBoxItem aBorderItem( ATTR_BORDER );
2233 for( SCCOL nCol = 0; nCol <= nLastCol; ++nCol )
2235 SvxBorderLine* pLeftLine = (nCol == 0) ? &aOuterLine : &aInnerLine;
2236 SvxBorderLine* pRightLine = (nCol == nLastCol) ? &aOuterLine : &aInnerLine;
2237 SCCOL nCellCol1 = static_cast< SCCOL >( GetDocPos( tdCol, nCol ) ) + rFirstPos.Col();
2238 SCCOL nCellCol2 = nCellCol1 + static_cast< SCCOL >( GetDocSize( tdCol, nCol ) ) - 1;
2239 for( SCROW nRow = 0; nRow <= nLastRow; ++nRow )
2241 SvxBorderLine* pTopLine = (nRow == 0) ? &aOuterLine : &aInnerLine;
2242 SvxBorderLine* pBottomLine = (nRow == nLastRow) ? &aOuterLine : &aInnerLine;
2243 SCROW nCellRow1 = GetDocPos( tdRow, nRow ) + rFirstPos.Row();
2244 SCROW nCellRow2 = nCellRow1 + GetDocSize( tdRow, nRow ) - 1;
2245 for( SCCOL nCellCol = nCellCol1; nCellCol <= nCellCol2; ++nCellCol )
2247 aBorderItem.SetLine( (nCellCol == nCellCol1) ? pLeftLine : nullptr, SvxBoxItemLine::LEFT );
2248 aBorderItem.SetLine( (nCellCol == nCellCol2) ? pRightLine : nullptr, SvxBoxItemLine::RIGHT );
2249 for( SCROW nCellRow = nCellRow1; nCellRow <= nCellRow2; ++nCellRow )
2251 aBorderItem.SetLine( (nCellRow == nCellRow1) ? pTopLine : nullptr, SvxBoxItemLine::TOP );
2252 aBorderItem.SetLine( (nCellRow == nCellRow2) ? pBottomLine : nullptr, SvxBoxItemLine::BOTTOM );
2253 pDoc->ApplyAttr( nCellCol, nCellRow, rFirstPos.Tab(), aBorderItem );
2260 for( ScHTMLTableIterator aIter( mxNestedTables.get() ); aIter.is(); ++aIter )
2261 aIter->ApplyCellBorders( pDoc, rFirstPos );
2264 SvNumberFormatter* ScHTMLTable::GetFormatTable()
2266 return mpParser->GetDoc().GetFormatTable();
2269 bool ScHTMLTable::IsEmptyCell() const
2271 return mpCurrEntryVector && mpCurrEntryVector->empty();
2274 bool ScHTMLTable::IsSpaceCharInfo( const HtmlImportInfo& rInfo )
2276 return (rInfo.nToken == HtmlTokenId::TEXTTOKEN) && (rInfo.aText.getLength() == 1) && (rInfo.aText[ 0 ] == ' ');
2279 ScHTMLTable::ScHTMLEntryPtr ScHTMLTable::CreateEntry() const
2281 return std::make_unique<ScHTMLEntry>( GetCurrItemSet() );
2284 void ScHTMLTable::CreateNewEntry( const HtmlImportInfo& rInfo )
2286 OSL_ENSURE( !mxCurrEntry.get(), "ScHTMLTable::CreateNewEntry - old entry still present" );
2287 mxCurrEntry = CreateEntry();
2288 mxCurrEntry->aSel = rInfo.aSelection;
2291 void ScHTMLTable::ImplPushEntryToVector( ScHTMLEntryVector& rEntryVector, ScHTMLEntryPtr& rxEntry )
2293 // HTML entry list does not own the entries
2294 rEntryVector.push_back( rxEntry.get() );
2295 // mrEEParseList (reference to member of ScEEParser) owns the entries
2296 mrEEParseList.push_back(std::shared_ptr<ScEEParseEntry>(rxEntry.release()));
2299 bool ScHTMLTable::PushEntry( ScHTMLEntryPtr& rxEntry )
2301 bool bPushed = false;
2302 if( rxEntry.get() && rxEntry->HasContents() )
2304 if( mpCurrEntryVector )
2306 if( mbPushEmptyLine )
2308 ScHTMLEntryPtr xEmptyEntry = CreateEntry();
2309 ImplPushEntryToVector( *mpCurrEntryVector, xEmptyEntry );
2310 mbPushEmptyLine = false;
2312 ImplPushEntryToVector( *mpCurrEntryVector, rxEntry );
2313 bPushed = true;
2315 else if( mpParentTable )
2317 bPushed = mpParentTable->PushEntry( rxEntry );
2319 else
2321 OSL_FAIL( "ScHTMLTable::PushEntry - cannot push entry, no parent found" );
2324 return bPushed;
2327 bool ScHTMLTable::PushEntry( const HtmlImportInfo& rInfo, bool bLastInCell )
2329 OSL_ENSURE( mxCurrEntry.get(), "ScHTMLTable::PushEntry - no current entry" );
2330 bool bPushed = false;
2331 if( mxCurrEntry.get() )
2333 mxCurrEntry->AdjustEnd( rInfo );
2334 mxCurrEntry->Strip( mrEditEngine );
2336 // import entry always, if it is the last in cell, and cell is still empty
2337 if( bLastInCell && IsEmptyCell() )
2339 mxCurrEntry->SetImportAlways();
2340 // don't insert empty lines before single empty entries
2341 if( mxCurrEntry->IsEmpty() )
2342 mbPushEmptyLine = false;
2345 bPushed = PushEntry( mxCurrEntry );
2346 mxCurrEntry.reset();
2348 return bPushed;
2351 void ScHTMLTable::PushTableEntry( ScHTMLTableId nTableId )
2353 OSL_ENSURE( nTableId != SC_HTML_GLOBAL_TABLE, "ScHTMLTable::PushTableEntry - cannot push global table" );
2354 if( nTableId != SC_HTML_GLOBAL_TABLE )
2356 ScHTMLEntryPtr xEntry( new ScHTMLEntry( maTableItemSet, nTableId ) );
2357 PushEntry( xEntry );
2361 ScHTMLTable* ScHTMLTable::GetExistingTable( ScHTMLTableId nTableId ) const
2363 ScHTMLTable* pTable = ((nTableId != SC_HTML_GLOBAL_TABLE) && mxNestedTables.get()) ?
2364 mxNestedTables->FindTable( nTableId, false ) : nullptr;
2365 OSL_ENSURE( pTable || (nTableId == SC_HTML_GLOBAL_TABLE), "ScHTMLTable::GetExistingTable - table not found" );
2366 return pTable;
2369 ScHTMLTable* ScHTMLTable::InsertNestedTable( const HtmlImportInfo& rInfo, bool bPreFormText )
2371 if( !mxNestedTables.get() )
2372 mxNestedTables.reset( new ScHTMLTableMap( *this ) );
2373 if( bPreFormText ) // enclose new preformatted table with empty lines
2374 InsertLeadingEmptyLine();
2375 return mxNestedTables->CreateTable( rInfo, bPreFormText );
2378 void ScHTMLTable::InsertNewCell( const ScHTMLSize& rSpanSize )
2380 ScRange* pRange;
2382 /* Find an unused cell by skipping all merged ranges that cover the
2383 current cell position stored in maCurrCell. */
2384 while( ((pRange = maVMergedCells.Find( maCurrCell.MakeAddr() )) != nullptr) || ((pRange = maHMergedCells.Find( maCurrCell.MakeAddr() )) != nullptr) )
2385 maCurrCell.mnCol = pRange->aEnd.Col() + 1;
2386 mpCurrEntryVector = &maEntryMap[ maCurrCell ];
2388 /* If the new cell is merged horizontally, try to find collisions with
2389 other vertically merged ranges. In this case, shrink existing
2390 vertically merged ranges (do not shrink the new cell). */
2391 SCCOL nColEnd = maCurrCell.mnCol + rSpanSize.mnCols;
2392 for( ScAddress aAddr( maCurrCell.MakeAddr() ); aAddr.Col() < nColEnd; aAddr.IncCol() )
2393 if( (pRange = maVMergedCells.Find( aAddr )) != nullptr )
2394 pRange->aEnd.SetRow( maCurrCell.mnRow - 1 );
2396 // insert the new range into the cell lists
2397 ScRange aNewRange( maCurrCell.MakeAddr() );
2398 ScAddress aErrorPos( ScAddress::UNINITIALIZED );
2399 if (!aNewRange.aEnd.Move( rSpanSize.mnCols - 1, rSpanSize.mnRows - 1, 0, aErrorPos))
2401 assert(!"can't move");
2403 if( rSpanSize.mnRows > 1 )
2405 maVMergedCells.push_back( aNewRange );
2406 /* Do not insert vertically merged ranges into maUsedCells yet,
2407 because they may be shrunken (see above). The final vertically
2408 merged ranges are inserted in FillEmptyCells(). */
2410 else
2412 if( rSpanSize.mnCols > 1 )
2413 maHMergedCells.push_back( aNewRange );
2414 /* Insert horizontally merged ranges and single cells into
2415 maUsedCells, they will not be changed anymore. */
2416 maUsedCells.Join( aNewRange );
2419 // adjust table size
2420 maSize.mnCols = std::max< SCCOL >( maSize.mnCols, aNewRange.aEnd.Col() + 1 );
2421 maSize.mnRows = std::max< SCROW >( maSize.mnRows, aNewRange.aEnd.Row() + 1 );
2424 void ScHTMLTable::ImplRowOn()
2426 if( mbRowOn )
2427 ImplRowOff();
2428 mxRowItemSet.reset( new SfxItemSet( maTableItemSet ) );
2429 maCurrCell.mnCol = 0;
2430 mbRowOn = true;
2431 mbDataOn = false;
2434 void ScHTMLTable::ImplRowOff()
2436 if( mbDataOn )
2437 ImplDataOff();
2438 if( mbRowOn )
2440 mxRowItemSet.reset();
2441 ++maCurrCell.mnRow;
2442 mbRowOn = mbDataOn = false;
2446 void ScHTMLTable::ImplDataOn( const ScHTMLSize& rSpanSize )
2448 if( mbDataOn )
2449 ImplDataOff();
2450 if( !mbRowOn )
2451 ImplRowOn();
2452 mxDataItemSet.reset( new SfxItemSet( *mxRowItemSet ) );
2453 InsertNewCell( rSpanSize );
2454 mbDataOn = true;
2455 mbPushEmptyLine = false;
2458 void ScHTMLTable::ImplDataOff()
2460 if( mbDataOn )
2462 mxDataItemSet.reset();
2463 ++maCurrCell.mnCol;
2464 mpCurrEntryVector = nullptr;
2465 mbDataOn = false;
2469 void ScHTMLTable::ProcessFormatOptions( SfxItemSet& rItemSet, const HtmlImportInfo& rInfo )
2471 // special handling for table header cells
2472 if( rInfo.nToken == HtmlTokenId::TABLEHEADER_ON )
2474 rItemSet.Put( SvxWeightItem( WEIGHT_BOLD, ATTR_FONT_WEIGHT ) );
2475 rItemSet.Put( SvxHorJustifyItem( SvxCellHorJustify::Center, ATTR_HOR_JUSTIFY ) );
2478 const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
2479 for (const auto& rOption : rOptions)
2481 switch( rOption.GetToken() )
2483 case HtmlOptionId::ALIGN:
2485 SvxCellHorJustify eVal = SvxCellHorJustify::Standard;
2486 const OUString& rOptVal = rOption.GetString();
2487 if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ) )
2488 eVal = SvxCellHorJustify::Right;
2489 else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_center ) )
2490 eVal = SvxCellHorJustify::Center;
2491 else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ) )
2492 eVal = SvxCellHorJustify::Left;
2493 if( eVal != SvxCellHorJustify::Standard )
2494 rItemSet.Put( SvxHorJustifyItem( eVal, ATTR_HOR_JUSTIFY ) );
2496 break;
2498 case HtmlOptionId::VALIGN:
2500 SvxCellVerJustify eVal = SvxCellVerJustify::Standard;
2501 const OUString& rOptVal = rOption.GetString();
2502 if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_top ) )
2503 eVal = SvxCellVerJustify::Top;
2504 else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_middle ) )
2505 eVal = SvxCellVerJustify::Center;
2506 else if( rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_VA_bottom ) )
2507 eVal = SvxCellVerJustify::Bottom;
2508 if( eVal != SvxCellVerJustify::Standard )
2509 rItemSet.Put( SvxVerJustifyItem( eVal, ATTR_VER_JUSTIFY ) );
2511 break;
2513 case HtmlOptionId::BGCOLOR:
2515 Color aColor;
2516 rOption.GetColor( aColor );
2517 rItemSet.Put( SvxBrushItem( aColor, ATTR_BACKGROUND ) );
2519 break;
2520 default: break;
2525 void ScHTMLTable::SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize )
2527 OSL_ENSURE( nCellPos >= 0, "ScHTMLTable::SetDocSize - unexpected negative position" );
2528 ScSizeVec& rSizes = maCumSizes[ eOrient ];
2529 size_t nIndex = static_cast< size_t >( nCellPos );
2530 // expand with height/width == 1
2531 while( nIndex >= rSizes.size() )
2532 rSizes.push_back( rSizes.empty() ? 1 : (rSizes.back() + 1) );
2533 // update size of passed position and all following
2534 // #i109987# only grow, don't shrink - use the largest needed size
2535 SCCOLROW nDiff = nSize - ((nIndex == 0) ? rSizes.front() : (rSizes[ nIndex ] - rSizes[ nIndex - 1 ]));
2536 if( nDiff > 0 )
2537 std::for_each(rSizes.begin() + nIndex, rSizes.end(), [&nDiff](SCCOLROW& rSize) { rSize += nDiff; });
2540 void ScHTMLTable::CalcNeededDocSize(
2541 ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nCellSpan, SCCOLROW nRealDocSize )
2543 SCCOLROW nDiffSize = 0;
2544 // in merged columns/rows: reduce needed size by size of leading columns
2545 while( nCellSpan > 1 )
2547 nDiffSize += GetDocSize( eOrient, nCellPos );
2548 --nCellSpan;
2549 ++nCellPos;
2551 // set remaining needed size to last column/row
2552 nRealDocSize -= std::min< SCCOLROW >( nRealDocSize - 1, nDiffSize );
2553 SetDocSize( eOrient, nCellPos, nRealDocSize );
2556 void ScHTMLTable::FillEmptyCells()
2558 for( ScHTMLTableIterator aIter( mxNestedTables.get() ); aIter.is(); ++aIter )
2559 aIter->FillEmptyCells();
2561 // insert the final vertically merged ranges into maUsedCells
2562 for ( size_t i = 0, nRanges = maVMergedCells.size(); i < nRanges; ++i )
2564 ScRange & rRange = maVMergedCells[ i ];
2565 maUsedCells.Join( rRange );
2568 for( ScAddress aAddr; aAddr.Row() < maSize.mnRows; aAddr.IncRow() )
2570 for( aAddr.SetCol( 0 ); aAddr.Col() < maSize.mnCols; aAddr.IncCol() )
2572 if( !maUsedCells.Find( aAddr ) )
2574 // create a range for the lock list (used to calc. cell span)
2575 ScRange aRange( aAddr );
2578 aRange.aEnd.IncCol();
2580 while( (aRange.aEnd.Col() < maSize.mnCols) && !maUsedCells.Find( aRange.aEnd ) );
2581 aRange.aEnd.IncCol( -1 );
2582 maUsedCells.Join( aRange );
2584 // insert a dummy entry
2585 ScHTMLEntryPtr xEntry = CreateEntry();
2586 ImplPushEntryToVector( maEntryMap[ ScHTMLPos( aAddr ) ], xEntry );
2592 void ScHTMLTable::RecalcDocSize()
2594 // recalc table sizes recursively from inner to outer
2595 for( ScHTMLTableIterator aIter( mxNestedTables.get() ); aIter.is(); ++aIter )
2596 aIter->RecalcDocSize();
2598 /* Two passes: first calculates the sizes of single columns/rows, then
2599 the sizes of spanned columns/rows. This allows to fill nested tables
2600 into merged cells optimally. */
2601 static const sal_uInt16 PASS_SINGLE = 0;
2602 static const sal_uInt16 PASS_SPANNED = 1;
2603 for( sal_uInt16 nPass = PASS_SINGLE; nPass <= PASS_SPANNED; ++nPass )
2605 // iterate through every table cell
2606 for( const auto& [rCellPos, rEntryVector] : maEntryMap )
2608 ScHTMLSize aCellSpan = GetSpan( rCellPos );
2610 // process the dimension of the current cell in this pass?
2611 // (pass is single and span is 1) or (pass is not single and span is not 1)
2612 bool bProcessColWidth = ((nPass == PASS_SINGLE) == (aCellSpan.mnCols == 1));
2613 bool bProcessRowHeight = ((nPass == PASS_SINGLE) == (aCellSpan.mnRows == 1));
2614 if( bProcessColWidth || bProcessRowHeight )
2616 ScHTMLSize aDocSize( 1, 0 ); // resulting size of the cell in document
2618 // expand the cell size for each cell parse entry
2619 for( const auto& rpEntry : rEntryVector )
2621 ScHTMLTable* pTable = GetExistingTable( rpEntry->GetTableId() );
2622 // find entry with maximum width
2623 if( bProcessColWidth && pTable )
2624 aDocSize.mnCols = std::max( aDocSize.mnCols, static_cast< SCCOL >( pTable->GetDocSize( tdCol ) ) );
2625 // add up height of each entry
2626 if( bProcessRowHeight )
2627 aDocSize.mnRows += pTable ? pTable->GetDocSize( tdRow ) : 1;
2629 if( !aDocSize.mnRows )
2630 aDocSize.mnRows = 1;
2632 if( bProcessColWidth )
2633 CalcNeededDocSize( tdCol, rCellPos.mnCol, aCellSpan.mnCols, aDocSize.mnCols );
2634 if( bProcessRowHeight )
2635 CalcNeededDocSize( tdRow, rCellPos.mnRow, aCellSpan.mnRows, aDocSize.mnRows );
2641 void ScHTMLTable::RecalcDocPos( const ScHTMLPos& rBasePos )
2643 maDocBasePos = rBasePos;
2644 // after the previous assignment it is allowed to call GetDocPos() methods
2646 // iterate through every table cell
2647 for( auto& [rCellPos, rEntryVector] : maEntryMap )
2649 // fixed doc position of the entire cell (first entry)
2650 const ScHTMLPos aCellDocPos( GetDocPos( rCellPos ) );
2651 // fixed doc size of the entire cell
2652 const ScHTMLSize aCellDocSize( GetDocSize( rCellPos ) );
2654 // running doc position for single entries
2655 ScHTMLPos aEntryDocPos( aCellDocPos );
2657 ScHTMLEntry* pEntry = nullptr;
2658 for( const auto& rpEntry : rEntryVector )
2660 pEntry = rpEntry;
2661 if( ScHTMLTable* pTable = GetExistingTable( pEntry->GetTableId() ) )
2663 pTable->RecalcDocPos( aEntryDocPos ); // recalc nested table
2664 pEntry->nCol = SCCOL_MAX;
2665 pEntry->nRow = SCROW_MAX;
2666 SCROW nTableRows = static_cast< SCROW >( pTable->GetDocSize( tdRow ) );
2668 // use this entry to pad empty space right of table
2669 if( mpParentTable ) // ... but not in global table
2671 SCCOL nStartCol = aEntryDocPos.mnCol + static_cast< SCCOL >( pTable->GetDocSize( tdCol ) );
2672 SCCOL nNextCol = aEntryDocPos.mnCol + aCellDocSize.mnCols;
2673 if( nStartCol < nNextCol )
2675 pEntry->nCol = nStartCol;
2676 pEntry->nRow = aEntryDocPos.mnRow;
2677 pEntry->nColOverlap = nNextCol - nStartCol;
2678 pEntry->nRowOverlap = nTableRows;
2681 aEntryDocPos.mnRow += nTableRows;
2683 else
2685 pEntry->nCol = aEntryDocPos.mnCol;
2686 pEntry->nRow = aEntryDocPos.mnRow;
2687 if( mpParentTable ) // do not merge in global table
2688 pEntry->nColOverlap = aCellDocSize.mnCols;
2689 ++aEntryDocPos.mnRow;
2693 // pEntry points now to last entry.
2694 if( pEntry )
2696 if( (pEntry == rEntryVector.front()) && (pEntry->GetTableId() == SC_HTML_NO_TABLE) )
2698 // pEntry is the only entry in this cell - merge rows of cell with single non-table entry.
2699 pEntry->nRowOverlap = aCellDocSize.mnRows;
2701 else
2703 // fill up incomplete entry lists
2704 SCROW nFirstUnusedRow = aCellDocPos.mnRow + aCellDocSize.mnRows;
2705 while( aEntryDocPos.mnRow < nFirstUnusedRow )
2707 ScHTMLEntryPtr xDummyEntry( new ScHTMLEntry( pEntry->GetItemSet() ) );
2708 xDummyEntry->nCol = aEntryDocPos.mnCol;
2709 xDummyEntry->nRow = aEntryDocPos.mnRow;
2710 xDummyEntry->nColOverlap = aCellDocSize.mnCols;
2711 ImplPushEntryToVector( rEntryVector, xDummyEntry );
2712 ++aEntryDocPos.mnRow;
2719 ScHTMLGlobalTable::ScHTMLGlobalTable(
2720 SfxItemPool& rPool,
2721 EditEngine& rEditEngine,
2722 std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseVector,
2723 ScHTMLTableId& rnUnusedId,
2724 ScHTMLParser* pParser
2726 ScHTMLTable( rPool, rEditEngine, rEEParseVector, rnUnusedId, pParser )
2730 ScHTMLGlobalTable::~ScHTMLGlobalTable()
2734 void ScHTMLGlobalTable::Recalc()
2736 // Fills up empty cells with a dummy entry. */
2737 FillEmptyCells();
2738 // recalc table sizes of all nested tables and this table
2739 RecalcDocSize();
2740 // recalc document positions of all entries in this table and in nested tables
2741 RecalcDocPos( GetDocPos() );
2744 ScHTMLQueryParser::ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc ) :
2745 ScHTMLParser( pEditEngine, pDoc ),
2746 mnUnusedId( SC_HTML_GLOBAL_TABLE ),
2747 mbTitleOn( false )
2749 mxGlobTable.reset(
2750 new ScHTMLGlobalTable(*pPool, *pEdit, maList, mnUnusedId, this));
2751 mpCurrTable = mxGlobTable.get();
2754 ScHTMLQueryParser::~ScHTMLQueryParser()
2758 ErrCode ScHTMLQueryParser::Read( SvStream& rStrm, const OUString& rBaseURL )
2760 SvKeyValueIteratorRef xValues;
2761 SvKeyValueIterator* pAttributes = nullptr;
2763 SfxObjectShell* pObjSh = mpDoc->GetDocumentShell();
2764 if( pObjSh && pObjSh->IsLoading() )
2766 pAttributes = pObjSh->GetHeaderAttributes();
2768 else
2770 /* When not loading, set up fake HTTP headers to force the SfxHTMLParser
2771 to use UTF8 (used when pasting from clipboard) */
2772 const sal_Char* pCharSet = rtl_getBestMimeCharsetFromTextEncoding( RTL_TEXTENCODING_UTF8 );
2773 if( pCharSet )
2775 OUString aContentType = "text/html; charset=" +
2776 OUString::createFromAscii( pCharSet );
2778 xValues = new SvKeyValueIterator;
2779 xValues->Append( SvKeyValue( OOO_STRING_SVTOOLS_HTML_META_content_type, aContentType ) );
2780 pAttributes = xValues.get();
2784 Link<HtmlImportInfo&,void> aOldLink = pEdit->GetHtmlImportHdl();
2785 pEdit->SetHtmlImportHdl( LINK( this, ScHTMLQueryParser, HTMLImportHdl ) );
2786 ErrCode nErr = pEdit->Read( rStrm, rBaseURL, EETextFormat::Html, pAttributes );
2787 pEdit->SetHtmlImportHdl( aOldLink );
2789 mxGlobTable->Recalc();
2790 nColMax = static_cast< SCCOL >( mxGlobTable->GetDocSize( tdCol ) - 1 );
2791 nRowMax = static_cast< SCROW >( mxGlobTable->GetDocSize( tdRow ) - 1 );
2793 return nErr;
2796 const ScHTMLTable* ScHTMLQueryParser::GetGlobalTable() const
2798 return mxGlobTable.get();
2801 void ScHTMLQueryParser::ProcessToken( const HtmlImportInfo& rInfo )
2803 switch( rInfo.nToken )
2805 // --- meta data ---
2806 case HtmlTokenId::META: MetaOn( rInfo ); break; // <meta>
2808 // --- title handling ---
2809 case HtmlTokenId::TITLE_ON: TitleOn(); break; // <title>
2810 case HtmlTokenId::TITLE_OFF: TitleOff( rInfo ); break; // </title>
2812 case HtmlTokenId::STYLE_ON: break;
2813 case HtmlTokenId::STYLE_OFF: ParseStyle(rInfo.aText); break;
2815 // --- body handling ---
2816 case HtmlTokenId::BODY_ON: mpCurrTable->BodyOn( rInfo ); break; // <body>
2817 case HtmlTokenId::BODY_OFF: mpCurrTable->BodyOff( rInfo ); break; // </body>
2819 // --- insert text ---
2820 case HtmlTokenId::TEXTTOKEN: InsertText( rInfo ); break; // any text
2821 case HtmlTokenId::LINEBREAK: mpCurrTable->BreakOn(); break; // <br>
2822 case HtmlTokenId::HEAD1_ON: // <h1>
2823 case HtmlTokenId::HEAD2_ON: // <h2>
2824 case HtmlTokenId::HEAD3_ON: // <h3>
2825 case HtmlTokenId::HEAD4_ON: // <h4>
2826 case HtmlTokenId::HEAD5_ON: // <h5>
2827 case HtmlTokenId::HEAD6_ON: // <h6>
2828 case HtmlTokenId::PARABREAK_ON: mpCurrTable->HeadingOn(); break; // <p>
2830 // --- misc. contents ---
2831 case HtmlTokenId::ANCHOR_ON: mpCurrTable->AnchorOn(); break; // <a>
2833 // --- table handling ---
2834 case HtmlTokenId::TABLE_ON: TableOn( rInfo ); break; // <table>
2835 case HtmlTokenId::TABLE_OFF: TableOff( rInfo ); break; // </table>
2836 case HtmlTokenId::TABLEROW_ON: mpCurrTable->RowOn( rInfo ); break; // <tr>
2837 case HtmlTokenId::TABLEROW_OFF: mpCurrTable->RowOff( rInfo ); break; // </tr>
2838 case HtmlTokenId::TABLEHEADER_ON: // <th>
2839 case HtmlTokenId::TABLEDATA_ON: mpCurrTable->DataOn( rInfo ); break; // <td>
2840 case HtmlTokenId::TABLEHEADER_OFF: // </th>
2841 case HtmlTokenId::TABLEDATA_OFF: mpCurrTable->DataOff( rInfo ); break; // </td>
2842 case HtmlTokenId::PREFORMTXT_ON: PreOn( rInfo ); break; // <pre>
2843 case HtmlTokenId::PREFORMTXT_OFF: PreOff( rInfo ); break; // </pre>
2845 // --- formatting ---
2846 case HtmlTokenId::FONT_ON: FontOn( rInfo ); break; // <font>
2848 case HtmlTokenId::BIGPRINT_ON: // <big>
2849 //! TODO: store current font size, use following size
2850 mpCurrTable->PutItem( SvxFontHeightItem( maFontHeights[ 3 ], 100, ATTR_FONT_HEIGHT ) );
2851 break;
2852 case HtmlTokenId::SMALLPRINT_ON: // <small>
2853 //! TODO: store current font size, use preceding size
2854 mpCurrTable->PutItem( SvxFontHeightItem( maFontHeights[ 0 ], 100, ATTR_FONT_HEIGHT ) );
2855 break;
2857 case HtmlTokenId::BOLD_ON: // <b>
2858 case HtmlTokenId::STRONG_ON: // <strong>
2859 mpCurrTable->PutItem( SvxWeightItem( WEIGHT_BOLD, ATTR_FONT_WEIGHT ) );
2860 break;
2862 case HtmlTokenId::ITALIC_ON: // <i>
2863 case HtmlTokenId::EMPHASIS_ON: // <em>
2864 case HtmlTokenId::ADDRESS_ON: // <address>
2865 case HtmlTokenId::BLOCKQUOTE_ON: // <blockquote>
2866 case HtmlTokenId::BLOCKQUOTE30_ON: // <bq>
2867 case HtmlTokenId::CITIATION_ON: // <cite>
2868 case HtmlTokenId::VARIABLE_ON: // <var>
2869 mpCurrTable->PutItem( SvxPostureItem( ITALIC_NORMAL, ATTR_FONT_POSTURE ) );
2870 break;
2872 case HtmlTokenId::DEFINSTANCE_ON: // <dfn>
2873 mpCurrTable->PutItem( SvxWeightItem( WEIGHT_BOLD, ATTR_FONT_WEIGHT ) );
2874 mpCurrTable->PutItem( SvxPostureItem( ITALIC_NORMAL, ATTR_FONT_POSTURE ) );
2875 break;
2877 case HtmlTokenId::UNDERLINE_ON: // <u>
2878 mpCurrTable->PutItem( SvxUnderlineItem( LINESTYLE_SINGLE, ATTR_FONT_UNDERLINE ) );
2879 break;
2880 default: break;
2884 void ScHTMLQueryParser::InsertText( const HtmlImportInfo& rInfo )
2886 mpCurrTable->PutText( rInfo );
2887 if( mbTitleOn )
2888 maTitle.append(rInfo.aText);
2891 void ScHTMLQueryParser::FontOn( const HtmlImportInfo& rInfo )
2893 const HTMLOptions& rOptions = static_cast<HTMLParser*>(rInfo.pParser)->GetOptions();
2894 for (const auto& rOption : rOptions)
2896 switch( rOption.GetToken() )
2898 case HtmlOptionId::FACE :
2900 const OUString& rFace = rOption.GetString();
2901 OUString aFontName;
2902 sal_Int32 nPos = 0;
2903 while( nPos != -1 )
2905 // font list separator: VCL = ';' HTML = ','
2906 OUString aFName = comphelper::string::strip(rFace.getToken(0, ',', nPos), ' ');
2907 aFontName = ScGlobal::addToken(aFontName, aFName, ';');
2909 if ( !aFontName.isEmpty() )
2910 mpCurrTable->PutItem( SvxFontItem( FAMILY_DONTKNOW,
2911 aFontName, EMPTY_OUSTRING, PITCH_DONTKNOW,
2912 RTL_TEXTENCODING_DONTKNOW, ATTR_FONT ) );
2914 break;
2915 case HtmlOptionId::SIZE :
2917 sal_uInt32 nSize = getLimitedValue< sal_uInt32 >( rOption.GetNumber(), 1, SC_HTML_FONTSIZES );
2918 mpCurrTable->PutItem( SvxFontHeightItem( maFontHeights[ nSize - 1 ], 100, ATTR_FONT_HEIGHT ) );
2920 break;
2921 case HtmlOptionId::COLOR :
2923 Color aColor;
2924 rOption.GetColor( aColor );
2925 mpCurrTable->PutItem( SvxColorItem( aColor, ATTR_FONT_COLOR ) );
2927 break;
2928 default: break;
2933 void ScHTMLQueryParser::MetaOn( const HtmlImportInfo& rInfo )
2935 if( mpDoc->GetDocumentShell() )
2937 HTMLParser* pParser = static_cast< HTMLParser* >( rInfo.pParser );
2939 uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
2940 mpDoc->GetDocumentShell()->GetModel(), uno::UNO_QUERY_THROW);
2941 pParser->ParseMetaOptions(
2942 xDPS->getDocumentProperties(),
2943 mpDoc->GetDocumentShell()->GetHeaderAttributes() );
2947 void ScHTMLQueryParser::TitleOn()
2949 mbTitleOn = true;
2950 maTitle.setLength(0);
2953 void ScHTMLQueryParser::TitleOff( const HtmlImportInfo& rInfo )
2955 if( mbTitleOn )
2957 OUString aTitle = maTitle.makeStringAndClear().trim();
2958 if (!aTitle.isEmpty() && mpDoc->GetDocumentShell())
2960 uno::Reference<document::XDocumentPropertiesSupplier> xDPS(
2961 mpDoc->GetDocumentShell()->GetModel(), uno::UNO_QUERY_THROW);
2963 xDPS->getDocumentProperties()->setTitle(aTitle);
2965 InsertText( rInfo );
2966 mbTitleOn = false;
2970 void ScHTMLQueryParser::TableOn( const HtmlImportInfo& rInfo )
2972 mpCurrTable = mpCurrTable->TableOn( rInfo );
2975 void ScHTMLQueryParser::TableOff( const HtmlImportInfo& rInfo )
2977 mpCurrTable = mpCurrTable->TableOff( rInfo );
2980 void ScHTMLQueryParser::PreOn( const HtmlImportInfo& rInfo )
2982 mpCurrTable = mpCurrTable->PreOn( rInfo );
2985 void ScHTMLQueryParser::PreOff( const HtmlImportInfo& rInfo )
2987 mpCurrTable = mpCurrTable->PreOff( rInfo );
2990 void ScHTMLQueryParser::CloseTable( const HtmlImportInfo& rInfo )
2992 mpCurrTable = mpCurrTable->CloseTable( rInfo );
2995 namespace {
2998 * Handler class for the CSS parser.
3000 class CSSHandler
3002 struct MemStr
3004 const char* mp;
3005 size_t mn;
3007 MemStr() : mp(nullptr), mn(0) {}
3008 MemStr(const char* p, size_t n) : mp(p), mn(n) {}
3009 MemStr& operator=(const MemStr& r) = default;
3012 MemStr maPropName; /// current property name.
3013 MemStr maPropValue; /// current property value.
3015 public:
3016 explicit CSSHandler() {}
3018 static void at_rule_name(const char* /*p*/, size_t /*n*/)
3020 // TODO: For now, we ignore at-rule properties
3023 void property_name(const char* p, size_t n)
3025 maPropName = MemStr(p, n);
3028 void value(const char* p, size_t n)
3030 maPropValue = MemStr(p, n);
3033 static void begin_parse() {}
3035 static void end_parse() {}
3037 static void begin_block() {}
3039 static void end_block() {}
3041 static void begin_property() {}
3043 void end_property()
3045 maPropName = MemStr();
3046 maPropValue = MemStr();
3049 // new members
3050 static void simple_selector_type(const char* /*p*/, size_t /*n*/) {}
3052 static void simple_selector_class(const char* /*p*/, size_t /*n*/) {}
3054 static void simple_selector_pseudo_element(orcus::css::pseudo_element_t /*pe*/) {}
3056 static void simple_selector_pseudo_class(orcus::css::pseudo_class_t /*pc*/) {}
3058 static void simple_selector_id(const char* /*p*/, size_t /*n*/) {}
3060 static void end_simple_selector() {}
3062 static void end_selector() {}
3064 static void combinator(orcus::css::combinator_t /*combinator*/) {}
3066 static void rgb(uint8_t /*red*/ , uint8_t /*green*/ , uint8_t /*blue*/ ) {}
3068 static void rgba(uint8_t /*red*/ , uint8_t /*green*/ , uint8_t /*blue*/ , double /*alpha*/ ) {}
3070 static void hsl(uint8_t /*hue*/ , uint8_t /*sat*/ , uint8_t /*light*/ ) {}
3072 static void hsla(uint8_t /*hue*/ , uint8_t /*sat*/ , uint8_t /*light*/ , double /*alpha*/ ) {}
3074 static void url(const char* /*p*/, size_t /*n*/) {}
3080 void ScHTMLQueryParser::ParseStyle(const OUString& rStrm)
3082 OString aStr = OUStringToOString(rStrm, RTL_TEXTENCODING_UTF8);
3083 CSSHandler aHdl;
3084 orcus::css_parser<CSSHandler> aParser(aStr.getStr(), aStr.getLength(), aHdl);
3087 aParser.parse();
3089 catch (const orcus::css::parse_error&)
3091 // TODO: Parsing of CSS failed. Do nothing for now.
3095 IMPL_LINK( ScHTMLQueryParser, HTMLImportHdl, HtmlImportInfo&, rInfo, void )
3097 switch( rInfo.eState )
3099 case HtmlImportState::Start:
3100 break;
3102 case HtmlImportState::NextToken:
3103 ProcessToken( rInfo );
3104 break;
3106 case HtmlImportState::InsertPara:
3107 mpCurrTable->InsertPara( rInfo );
3108 break;
3110 case HtmlImportState::SetAttr:
3111 case HtmlImportState::InsertText:
3112 case HtmlImportState::InsertField:
3113 break;
3115 case HtmlImportState::End:
3116 while( mpCurrTable->GetTableId() != SC_HTML_GLOBAL_TABLE )
3117 CloseTable( rInfo );
3118 break;
3120 default:
3121 OSL_FAIL( "ScHTMLQueryParser::HTMLImportHdl - unknown ImportInfo::eState" );
3125 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */