Version 7.5.1.1, tag libreoffice-7.5.1.1
[LibreOffice.git] / sc / source / filter / inc / htmlpars.hxx
blobfcdf6b4443fa42ea85938bebdf70458974567bec
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #pragma once
22 #include <memory>
23 #include <map>
24 #include <optional>
25 #include <stack>
26 #include <string_view>
27 #include <unordered_map>
28 #include <utility>
29 #include <vector>
30 #include <o3tl/sorted_vector.hxx>
32 #include <rangelst.hxx>
33 #include "eeparser.hxx"
35 const sal_uInt32 SC_HTML_FONTSIZES = 7; // like export, HTML options
37 // Pixel tolerance for SeekOffset and related.
38 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL = 1; // single table
39 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE = 10; // nested
41 // BASE class for HTML parser classes
43 class ScHTMLTable;
45 /**
46 * Collection of HTML style data parsed from the content of <style>
47 * elements.
49 class ScHTMLStyles
51 typedef std::unordered_map<OUString, OUString> PropsType;
52 typedef ::std::map<OUString, PropsType> NamePropsType;
53 typedef ::std::map<OUString, NamePropsType> ElemsType;
55 NamePropsType m_GlobalProps; /// global properties (for a given class for all elements)
56 NamePropsType m_ElemGlobalProps; /// element global properties (no class specified)
57 ElemsType m_ElemProps; /// element to class to properties (both element and class are given)
58 const OUString maEmpty; /// just a persistent empty string.
59 public:
60 ScHTMLStyles();
62 void add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
63 const OUString& aProp, const OUString& aValue);
65 /**
66 * Find best-matching property value for given element and class names.
68 const OUString& getPropertyValue(
69 const OUString& rElem, const OUString& rClass, const OUString& rPropName) const;
71 private:
72 static void insertProp(
73 NamePropsType& rProps, const OUString& aName,
74 const OUString& aProp, const OUString& aValue);
77 /** Base class for HTML parser classes. */
78 class ScHTMLParser : public ScEEParser
80 ScHTMLStyles maStyles;
81 protected:
82 sal_uInt32 maFontHeights[ SC_HTML_FONTSIZES ];
83 ScDocument* mpDoc; /// The destination document.
85 public:
86 explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
87 virtual ~ScHTMLParser() override;
89 virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override = 0;
91 ScHTMLStyles& GetStyles() { return maStyles;}
92 ScDocument& GetDoc() { return *mpDoc;}
94 /** Returns the "global table" which contains the entire HTML document. */
95 virtual const ScHTMLTable* GetGlobalTable() const = 0;
98 typedef o3tl::sorted_vector<sal_uLong> ScHTMLColOffset;
100 struct ScHTMLTableStackEntry
102 ScRangeListRef xLockedList;
103 std::shared_ptr<ScEEParseEntry> xCellEntry;
104 ScHTMLColOffset* pLocalColOffset;
105 sal_uLong nFirstTableCell;
106 SCROW nRowCnt;
107 SCCOL nColCntStart;
108 SCCOL nMaxCol;
109 sal_uInt16 nTable;
110 sal_uInt16 nTableWidth;
111 sal_uInt16 nColOffset;
112 sal_uInt16 nColOffsetStart;
113 bool bFirstRow;
114 ScHTMLTableStackEntry( std::shared_ptr<ScEEParseEntry> xE,
115 ScRangeListRef xL, ScHTMLColOffset* pTO,
116 sal_uLong nFTC,
117 SCROW nRow,
118 SCCOL nStart, SCCOL nMax, sal_uInt16 nTab,
119 sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS,
120 bool bFR )
121 : xLockedList(std::move( xL )), xCellEntry(std::move(xE)),
122 pLocalColOffset( pTO ),
123 nFirstTableCell( nFTC ),
124 nRowCnt( nRow ),
125 nColCntStart( nStart ), nMaxCol( nMax ),
126 nTable( nTab ), nTableWidth( nTW ),
127 nColOffset( nCO ), nColOffsetStart( nCOS ),
128 bFirstRow( bFR )
132 struct ScHTMLAdjustStackEntry
134 SCCOL nLastCol;
135 SCROW nNextRow;
136 SCROW nCurRow;
137 ScHTMLAdjustStackEntry( SCCOL nLCol, SCROW nNRow,
138 SCROW nCRow )
139 : nLastCol( nLCol ), nNextRow( nNRow ),
140 nCurRow( nCRow )
144 class EditEngine;
145 class ScDocument;
146 class HTMLOption;
148 // TODO these need better names
149 typedef ::std::map<SCROW, SCROW> InnerMap;
150 typedef ::std::map<sal_uInt16, InnerMap*> OuterMap;
152 class ScHTMLLayoutParser : public ScHTMLParser
154 private:
155 Size aPageSize;
156 OUString aBaseURL;
157 ::std::stack< std::unique_ptr<ScHTMLTableStackEntry> >
158 aTableStack;
159 OUString aString;
160 ScRangeListRef xLockedList; // per table
161 std::unique_ptr<OuterMap> pTables;
162 ScHTMLColOffset maColOffset;
163 ScHTMLColOffset* pLocalColOffset; // per table
164 sal_uLong nFirstTableCell; // per table
165 short nTableLevel;
166 sal_uInt16 nTable;
167 sal_uInt16 nMaxTable;
168 SCCOL nColCntStart; // first Col per table
169 SCCOL nMaxCol; // per table
170 sal_uInt16 nTableWidth; // per table
171 sal_uInt16 nColOffset; // current, pixel
172 sal_uInt16 nColOffsetStart; // start value per table, in pixel
173 sal_uInt16 nOffsetTolerance; // for use with SeekOffset and related
174 bool bFirstRow; // per table, whether in first row
175 bool bTabInTabCell:1;
176 bool bInCell:1;
177 bool bInTitle:1;
179 DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void );
180 void NewActEntry( const ScEEParseEntry* );
181 static void EntryEnd( ScEEParseEntry*, const ESelection& );
182 void ProcToken( HtmlImportInfo* );
183 void CloseEntry( const HtmlImportInfo* );
184 void NextRow( const HtmlImportInfo* );
185 void SkipLocked( ScEEParseEntry*, bool bJoin = true );
186 static bool SeekOffset( const ScHTMLColOffset*, sal_uInt16 nOffset,
187 SCCOL* pCol, sal_uInt16 nOffsetTol );
188 static void MakeCol( ScHTMLColOffset*, sal_uInt16& nOffset,
189 sal_uInt16& nWidth, sal_uInt16 nOffsetTol,
190 sal_uInt16 nWidthTol );
191 static void MakeColNoRef( ScHTMLColOffset*, sal_uInt16 nOffset,
192 sal_uInt16 nWidth, sal_uInt16 nOffsetTol,
193 sal_uInt16 nWidthTol );
194 static void ModifyOffset( ScHTMLColOffset*, sal_uInt16& nOldOffset,
195 sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol );
196 void Colonize( ScEEParseEntry* );
197 sal_uInt16 GetWidth( const ScEEParseEntry* );
198 void SetWidths();
199 void Adjust();
201 sal_uInt16 GetWidthPixel( const HTMLOption& );
202 bool IsAtBeginningOfText( const HtmlImportInfo* );
204 void TableOn( HtmlImportInfo* );
205 void ColOn( HtmlImportInfo* );
206 void TableRowOn( const HtmlImportInfo* );
207 void TableRowOff( const HtmlImportInfo* );
208 void TableDataOn( HtmlImportInfo* );
209 void TableDataOff( const HtmlImportInfo* );
210 void TableOff( const HtmlImportInfo* );
211 void Image( HtmlImportInfo* );
212 void AnchorOn( HtmlImportInfo* );
213 void FontOn( HtmlImportInfo* );
215 public:
216 ScHTMLLayoutParser( EditEngine*, OUString aBaseURL, const Size& aPageSize, ScDocument* );
217 virtual ~ScHTMLLayoutParser() override;
218 virtual ErrCode Read( SvStream&, const OUString& rBaseURL ) override;
219 virtual const ScHTMLTable* GetGlobalTable() const override;
222 // HTML DATA QUERY PARSER
224 /** Declares the orientation in or for a table: column or row. */
225 enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
227 /** Type for a unique identifier for each table. */
228 typedef sal_uInt16 ScHTMLTableId;
229 /** Identifier of the "global table" (the entire HTML document). */
230 const ScHTMLTableId SC_HTML_GLOBAL_TABLE = 0;
231 /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
232 const ScHTMLTableId SC_HTML_NO_TABLE = 0;
234 /** A 2D cell position in an HTML table. */
235 struct ScHTMLPos
237 SCCOL mnCol;
238 SCROW mnRow;
240 explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
241 explicit ScHTMLPos( SCCOL nCol, SCROW nRow ) :
242 mnCol( nCol ), mnRow( nRow ) {}
243 explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
245 SCCOLROW Get( ScHTMLOrient eOrient ) const
246 { return (eOrient == tdCol) ? mnCol : mnRow; }
247 void Set( SCCOL nCol, SCROW nRow )
248 { mnCol = nCol; mnRow = nRow; }
249 void Set( const ScAddress& rAddr )
250 { Set( rAddr.Col(), rAddr.Row() ); }
251 ScAddress MakeAddr() const
252 { return ScAddress( mnCol, mnRow, 0 ); }
255 inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
257 return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
260 /** A 2D cell size in an HTML table. */
261 struct ScHTMLSize
263 SCCOL mnCols;
264 SCROW mnRows;
266 explicit ScHTMLSize( SCCOL nCols, SCROW nRows ) :
267 mnCols( nCols ), mnRows( nRows ) {}
268 void Set( SCCOL nCols, SCROW nRows )
269 { mnCols = nCols; mnRows = nRows; }
272 /** A single entry containing a line of text or representing a table. */
273 struct ScHTMLEntry : public ScEEParseEntry
275 public:
276 explicit ScHTMLEntry(
277 const SfxItemSet& rItemSet,
278 ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
280 /** Returns true, if the selection of the entry is empty. */
281 bool IsEmpty() const { return !aSel.HasRange(); }
282 /** Returns true, if the entry has any content to be imported. */
283 bool HasContents() const;
284 /** Returns true, if the entry represents a table. */
285 bool IsTable() const { return nTab != SC_HTML_NO_TABLE; }
286 /** Returns true, if the entry represents a table. */
287 ScHTMLTableId GetTableId() const { return nTab; }
289 /** Sets or clears the import always state. */
290 void SetImportAlways() { mbImportAlways = true; }
291 /** Sets start point of the entry selection to the start of the import info object. */
292 void AdjustStart( const HtmlImportInfo& rInfo );
293 /** Sets end point of the entry selection to the end of the import info object. */
294 void AdjustEnd( const HtmlImportInfo& rInfo );
295 /** Deletes leading and trailing empty paragraphs from the entry. */
296 void Strip( const EditEngine& rEditEngine );
298 /** Returns read/write access to the item set of this entry. */
299 SfxItemSet& GetItemSet() { return aItemSet; }
300 /** Returns read-only access to the item set of this entry. */
301 const SfxItemSet& GetItemSet() const { return aItemSet; }
303 private:
304 bool mbImportAlways; /// true = Always import this entry.
307 /** This struct handles creation of unique table identifiers. */
308 struct ScHTMLTableAutoId
310 const ScHTMLTableId mnTableId; /// The created unique table identifier.
311 ScHTMLTableId& mrnUnusedId; /// Reference to global unused identifier variable.
313 /** The constructor assigns an unused identifier to member mnTableId. */
314 explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
317 class ScHTMLTableMap;
319 /** Stores data for one table in an HTML document.
321 This class does the main work for importing an HTML document. It manages
322 the correct insertion of parse entries into the correct cells and the
323 creation of nested tables. Recalculation of resulting document size and
324 position is done recursively in all nested tables.
326 class ScHTMLTable
328 public:
329 /** Creates a new HTML table without content.
330 @descr Internally handles a current cell position. This position is
331 invalid until first calls of RowOn() and DataOn().
332 @param rParentTable Reference to the parent table that owns this table.
333 @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
334 explicit ScHTMLTable(
335 ScHTMLTable& rParentTable,
336 const HtmlImportInfo& rInfo,
337 bool bPreFormText,
338 const ScDocument& rDoc );
340 virtual ~ScHTMLTable();
342 /** Returns the name of the table, specified in the TABLE tag. */
343 const OUString& GetTableName() const { return maTableName; }
344 /** Returns the caption of the table, specified in the <caption> tag. */
345 const OUString& GetTableCaption() const { return maCaption; }
346 /** Returns the unique identifier of the table. */
347 ScHTMLTableId GetTableId() const { return maTableId.mnTableId; }
348 /** Returns the cell spanning of the specified cell. */
349 ScHTMLSize GetSpan( const ScHTMLPos& rCellPos ) const;
351 /** Searches in all nested tables for the specified table.
352 @param nTableId Unique identifier of the table. */
353 ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const;
355 /** Puts the item into the item set of the current entry. */
356 void PutItem( const SfxPoolItem& rItem );
357 /** Inserts a text portion into current entry. */
358 void PutText( const HtmlImportInfo& rInfo );
359 /** Inserts a new line, if in preformatted text, else does nothing. */
360 void InsertPara( const HtmlImportInfo& rInfo );
362 /** Inserts a line break (<br> tag).
363 @descr Inserts the current entry regardless if it is empty. */
364 void BreakOn();
365 /** Inserts a heading line (<p> and <h*> tags). */
366 void HeadingOn();
367 /** Processes a hyperlink (<a> tag). */
368 void AnchorOn();
370 /** Starts a *new* table nested in this table (<table> tag).
371 @return Pointer to the new table. */
372 ScHTMLTable* TableOn( const HtmlImportInfo& rInfo );
373 /** Closes *this* table (</table> tag).
374 @return Pointer to the parent table. */
375 ScHTMLTable* TableOff( const HtmlImportInfo& rInfo );
376 /** Processes the caption of the table (<caption> tag). */
377 void CaptionOn();
378 /** Processes the caption of the table (</caption> tag). */
379 void CaptionOff();
380 /** Starts a *new* table based on preformatted text (<pre> tag).
381 @return Pointer to the new table. */
382 ScHTMLTable* PreOn( const HtmlImportInfo& rInfo );
383 /** Closes *this* table based on preformatted text (</pre> tag).
384 @return Pointer to the parent table. */
385 ScHTMLTable* PreOff( const HtmlImportInfo& rInfo );
387 /** Starts next row (<tr> tag).
388 @descr Cell address is invalid until first call of DataOn(). */
389 void RowOn( const HtmlImportInfo& rInfo );
390 /** Closes the current row (<tr> tag).
391 @descr Cell address is invalid until call of RowOn() and DataOn(). */
392 void RowOff( const HtmlImportInfo& rInfo );
393 /** Starts the next cell (<td> or <th> tag). */
394 void DataOn( const HtmlImportInfo& rInfo );
395 /** Closes the current cell (</td> or </th> tag).
396 @descr Cell address is invalid until next call of DataOn(). */
397 void DataOff( const HtmlImportInfo& rInfo );
399 /** Starts the body of the HTML document (<body> tag). */
400 void BodyOn( const HtmlImportInfo& rInfo );
401 /** Closes the body of the HTML document (</body> tag). */
402 void BodyOff( const HtmlImportInfo& rInfo );
404 /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
405 @descr Used to close this table object regardless on opening tag type.
406 @return Pointer to the parent table, or this, if no parent found. */
407 ScHTMLTable* CloseTable( const HtmlImportInfo& rInfo );
409 /** Returns the resulting document row/column count of the specified HTML row/column. */
410 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
411 /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
412 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
413 /** Returns the total document row/column count in the specified direction. */
414 SCCOLROW GetDocSize( ScHTMLOrient eOrient ) const;
415 /** Returns the total document row/column count of the specified HTML cell. */
416 ScHTMLSize GetDocSize( const ScHTMLPos& rCellPos ) const;
418 /** Returns the resulting Calc position of the top left edge of the table. */
419 const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
420 /** Calculates the resulting Calc position of the specified HTML column/row. */
421 SCCOLROW GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
422 /** Calculates the resulting Calc position of the specified HTML cell. */
423 ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const;
425 /** Calculates the current Calc document area of this table. */
426 void GetDocRange( ScRange& rRange ) const;
428 /** Applies border formatting to the passed document. */
429 void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
431 SvNumberFormatter* GetFormatTable();
433 protected:
434 /** Creates a new HTML table without parent.
435 @descr This constructor is used to create the "global table". */
436 explicit ScHTMLTable(
437 SfxItemPool& rPool,
438 EditEngine& rEditEngine,
439 std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
440 ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser,
441 const ScDocument& rDoc );
443 /** Fills all empty cells in this and nested tables with dummy parse entries. */
444 void FillEmptyCells();
445 /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
446 void RecalcDocSize();
447 /** Recalculates the position of all cell entries and nested tables.
448 @param rBasePos The origin of the table in the Calc document. */
449 void RecalcDocPos( const ScHTMLPos& rBasePos );
451 private:
452 typedef ::std::unique_ptr< ScHTMLTableMap > ScHTMLTableMapPtr;
453 typedef ::std::vector< SCCOLROW > ScSizeVec;
454 typedef ::std::vector< ScHTMLEntry* > ScHTMLEntryVector;
455 typedef ::std::unique_ptr< ScHTMLEntry > ScHTMLEntryPtr;
457 /** Returns true, if the current cell does not contain an entry yet. */
458 bool IsEmptyCell() const;
459 /** Returns the item set from cell, row, or table, depending on current state. */
460 const SfxItemSet& GetCurrItemSet() const;
462 /** Returns true, if import info represents a space character. */
463 static bool IsSpaceCharInfo( const HtmlImportInfo& rInfo );
465 /** Creates and returns a new empty flying entry at position (0,0). */
466 ScHTMLEntryPtr CreateEntry() const;
467 /** Creates a new flying entry.
468 @param rInfo Contains the initial edit engine selection for the entry. */
469 void CreateNewEntry( const HtmlImportInfo& rInfo );
471 /** Inserts an empty line in front of the next entry. */
472 void InsertLeadingEmptyLine();
474 /** Pushes the passed entry into the list of the current cell. */
475 void ImplPushEntryToVector( ScHTMLEntryVector& rEntryVector, ScHTMLEntryPtr& rxEntry );
476 /** Tries to insert the entry into the current cell.
477 @descr If insertion is not possible (i.e., currently no cell open), the
478 entry will be inserted into the parent table.
479 @return true = Entry has been pushed into the current cell; false = Entry dropped. */
480 bool PushEntry( ScHTMLEntryPtr& rxEntry );
481 /** Puts the current entry into the entry list, if it is not empty.
482 @param rInfo The import info struct containing the end position of the current entry.
483 @param bLastInCell true = If cell is still empty, put this entry always.
484 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
485 bool PushEntry( const HtmlImportInfo& rInfo, bool bLastInCell = false );
486 /** Pushes a new entry into current cell which references a nested table.*/
487 void PushTableEntry( ScHTMLTableId nTableId );
489 /** Tries to find a table from the table container.
490 @descr Assumes that the table is located in the current container or
491 that the passed table identifier is 0.
492 @param nTableId Unique identifier of the table or 0. */
493 ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const;
494 /** Inserts a nested table in the current cell at the specified position.
495 @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
496 ScHTMLTable* InsertNestedTable( const HtmlImportInfo& rInfo, bool bPreFormText );
498 /** Inserts a new cell in an unused position, starting from current cell position. */
499 void InsertNewCell( const ScHTMLSize& rSpanSize );
501 /** Set internal states for a new table row. */
502 void ImplRowOn();
503 /** Set internal states for leaving a table row. */
504 void ImplRowOff();
505 /** Set internal states for entering a new table cell. */
506 void ImplDataOn( const ScHTMLSize& rSpanSize );
507 /** Set internal states for leaving a table cell. */
508 void ImplDataOff();
510 /** Inserts additional formatting options from import info into the item set. */
511 static void ProcessFormatOptions( SfxItemSet& rItemSet, const HtmlImportInfo& rInfo );
513 /** Updates the document column/row size of the specified column or row.
514 @descr Only increases the present count, never decreases. */
515 void SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
516 /** Calculates and sets the resulting size the cell needs in the document.
517 @descr Reduces the needed size in merged cells.
518 @param nCellPos The first column/row position of the (merged) cell.
519 @param nCellSpan The cell spanning in the specified orientation.
520 @param nRealDocSize The raw document size of all entries of the cell. */
521 void CalcNeededDocSize(
522 ScHTMLOrient eOrient, SCCOLROW nCellPos,
523 SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
525 private:
526 ScHTMLTable* mpParentTable; /// Pointer to parent table.
527 ScHTMLTableMapPtr mxNestedTables; /// Table of nested HTML tables.
528 OUString maTableName; /// Table name from <table id> option.
529 OUString maCaption; /// Caption name of the table from <caption> </caption>
530 OUStringBuffer maCaptionBuffer; /// Caption buffer of the table from <caption> </caption>
531 ScHTMLTableAutoId maTableId; /// Unique identifier of this table.
532 SfxItemSet maTableItemSet; /// Items for the entire table.
533 std::optional<SfxItemSet> moRowItemSet; /// Items for the current table row.
534 std::optional<SfxItemSet> moDataItemSet; /// Items for the current cell.
535 ScRangeList maHMergedCells; /// List of all horizontally merged cells.
536 ScRangeList maVMergedCells; /// List of all vertically merged cells.
537 ScRangeList maUsedCells; /// List of all used cells.
538 EditEngine& mrEditEngine; /// Edit engine (from ScEEParser).
539 std::vector<std::shared_ptr<ScEEParseEntry>>& mrEEParseList; /// List that owns the parse entries (from ScEEParser).
540 std::map< ScHTMLPos, ScHTMLEntryVector > maEntryMap; /// List of entries for each cell.
541 ScHTMLEntryVector* mpCurrEntryVector; /// Current entry vector from map for faster access.
542 ScHTMLEntryPtr mxCurrEntry; /// Working entry, not yet inserted in a list.
543 ScSizeVec maCumSizes[ 2 ]; /// Cumulated cell counts for each HTML table column/row.
544 ScHTMLSize maSize; /// Size of the table.
545 ScHTMLPos maCurrCell; /// Address of current cell to fill.
546 ScHTMLPos maDocBasePos; /// Resulting base address in a Calc document.
547 ScHTMLParser* mpParser;
548 const ScDocument& mrDoc;
549 bool mbBorderOn:1; /// true = Table borders on.
550 bool mbPreFormText:1; /// true = Table from preformatted text (<pre> tag).
551 bool mbRowOn:1; /// true = Inside of <tr> </tr>.
552 bool mbDataOn:1; /// true = Inside of <td> </td> or <th> </th>.
553 bool mbPushEmptyLine:1; /// true = Insert empty line before current entry.
554 bool mbCaptionOn:1; /// true = Inside of <caption> </caption>
557 /** The "global table" representing the entire HTML document. */
558 class ScHTMLGlobalTable : public ScHTMLTable
560 public:
561 explicit ScHTMLGlobalTable(
562 SfxItemPool& rPool,
563 EditEngine& rEditEngine,
564 std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
565 ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser,
566 const ScDocument& rDoc );
568 virtual ~ScHTMLGlobalTable() override;
570 /** Recalculates sizes and resulting positions of all document entries. */
571 void Recalc();
574 /** The HTML parser for data queries. Focuses on data import, not on layout.
576 Builds the table structure correctly, ignores extended formatting like
577 pictures or column widths.
579 class ScHTMLQueryParser : public ScHTMLParser
581 public:
582 explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
583 virtual ~ScHTMLQueryParser() override;
585 virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override;
587 /** Returns the "global table" which contains the entire HTML document. */
588 virtual const ScHTMLTable* GetGlobalTable() const override;
590 private:
591 /** Handles all possible tags in the HTML document. */
592 void ProcessToken( const HtmlImportInfo& rInfo );
593 /** Inserts a text portion into current entry. */
594 void InsertText( const HtmlImportInfo& rInfo );
595 /** Processes the <font> tag. */
596 void FontOn( const HtmlImportInfo& rInfo );
598 /** Processes the <meta> tag. */
599 void MetaOn( const HtmlImportInfo& rInfo );
600 /** Opens the title of the HTML document (<title> tag). */
601 void TitleOn();
602 /** Closes the title of the HTML document (</title> tag). */
603 void TitleOff( const HtmlImportInfo& rInfo );
605 /** Opens a new table at the current position. */
606 void TableOn( const HtmlImportInfo& rInfo );
607 /** Closes the current table. */
608 void TableOff( const HtmlImportInfo& rInfo );
609 /** Opens a new table based on preformatted text. */
610 void PreOn( const HtmlImportInfo& rInfo );
611 /** Closes the current preformatted text table. */
612 void PreOff( const HtmlImportInfo& rInfo );
614 /** Closes the current table, regardless on opening tag. */
615 void CloseTable( const HtmlImportInfo& rInfo );
617 void ParseStyle(std::u16string_view rStrm);
619 DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void );
621 private:
622 typedef ::std::unique_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr;
624 OUStringBuffer maTitle; /// The title of the document.
625 ScHTMLGlobalTablePtr mxGlobTable; /// Contains the entire imported document.
626 ScHTMLTable* mpCurrTable; /// Pointer to current table (performance).
627 ScHTMLTableId mnUnusedId; /// First unused table identifier.
628 bool mbTitleOn; /// true = Inside of <title> </title>.
631 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */