Version 6.4.0.3, tag libreoffice-6.4.0.3
[LibreOffice.git] / sc / source / filter / inc / htmlpars.hxx
blob36801bba7fb20b281eaf94a6c8c6a952c499895d
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_SC_SOURCE_FILTER_INC_HTMLPARS_HXX
21 #define INCLUDED_SC_SOURCE_FILTER_INC_HTMLPARS_HXX
23 #include <memory>
24 #include <map>
25 #include <stack>
26 #include <unordered_map>
27 #include <vector>
28 #include <o3tl/sorted_vector.hxx>
30 #include <rangelst.hxx>
31 #include "eeparser.hxx"
33 const sal_uInt32 SC_HTML_FONTSIZES = 7; // like export, HTML options
35 // Pixel tolerance for SeekOffset and related.
36 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL = 1; // single table
37 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE = 10; // nested
39 // BASE class for HTML parser classes
41 class ScHTMLTable;
43 /**
44 * Collection of HTML style data parsed from the content of <style>
45 * elements.
47 class ScHTMLStyles
49 typedef std::unordered_map<OUString, OUString> PropsType;
50 typedef ::std::map<OUString, std::unique_ptr<PropsType>> NamePropsType;
51 typedef ::std::map<OUString, std::unique_ptr<NamePropsType>> ElemsType;
53 NamePropsType m_GlobalProps; /// global properties (for a given class for all elements)
54 NamePropsType m_ElemGlobalProps; /// element global properties (no class specified)
55 ElemsType m_ElemProps; /// element to class to properties (both element and class are given)
56 const OUString maEmpty; /// just a persistent empty string.
57 public:
58 ScHTMLStyles();
60 void add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
61 const OUString& aProp, const OUString& aValue);
63 /**
64 * Find best-matching property value for given element and class names.
66 const OUString& getPropertyValue(
67 const OUString& rElem, const OUString& rClass, const OUString& rPropName) const;
69 private:
70 static void insertProp(
71 NamePropsType& rProps, const OUString& aName,
72 const OUString& aProp, const OUString& aValue);
75 /** Base class for HTML parser classes. */
76 class ScHTMLParser : public ScEEParser
78 ScHTMLStyles maStyles;
79 protected:
80 sal_uInt32 maFontHeights[ SC_HTML_FONTSIZES ];
81 ScDocument* mpDoc; /// The destination document.
83 public:
84 explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
85 virtual ~ScHTMLParser() override;
87 virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override = 0;
89 ScHTMLStyles& GetStyles() { return maStyles;}
90 ScDocument& GetDoc() { return *mpDoc;}
92 /** Returns the "global table" which contains the entire HTML document. */
93 virtual const ScHTMLTable* GetGlobalTable() const = 0;
96 typedef o3tl::sorted_vector<sal_uLong> ScHTMLColOffset;
98 struct ScHTMLTableStackEntry
100 ScRangeListRef const xLockedList;
101 std::shared_ptr<ScEEParseEntry> xCellEntry;
102 ScHTMLColOffset* pLocalColOffset;
103 sal_uLong const nFirstTableCell;
104 SCROW const nRowCnt;
105 SCCOL const nColCntStart;
106 SCCOL const nMaxCol;
107 sal_uInt16 const nTable;
108 sal_uInt16 nTableWidth;
109 sal_uInt16 nColOffset;
110 sal_uInt16 const nColOffsetStart;
111 bool const bFirstRow;
112 ScHTMLTableStackEntry( const std::shared_ptr<ScEEParseEntry>& rE,
113 const ScRangeListRef& rL, ScHTMLColOffset* pTO,
114 sal_uLong nFTC,
115 SCROW nRow,
116 SCCOL nStart, SCCOL nMax, sal_uInt16 nTab,
117 sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS,
118 bool bFR )
119 : xLockedList( rL ), xCellEntry(rE),
120 pLocalColOffset( pTO ),
121 nFirstTableCell( nFTC ),
122 nRowCnt( nRow ),
123 nColCntStart( nStart ), nMaxCol( nMax ),
124 nTable( nTab ), nTableWidth( nTW ),
125 nColOffset( nCO ), nColOffsetStart( nCOS ),
126 bFirstRow( bFR )
130 struct ScHTMLAdjustStackEntry
132 SCCOL const nLastCol;
133 SCROW const nNextRow;
134 SCROW const nCurRow;
135 ScHTMLAdjustStackEntry( SCCOL nLCol, SCROW nNRow,
136 SCROW nCRow )
137 : nLastCol( nLCol ), nNextRow( nNRow ),
138 nCurRow( nCRow )
142 class EditEngine;
143 class ScDocument;
144 class HTMLOption;
146 // TODO these need better names
147 typedef ::std::map<SCROW, SCROW> InnerMap;
148 typedef ::std::map<sal_uInt16, InnerMap*> OuterMap;
150 class ScHTMLLayoutParser : public ScHTMLParser
152 private:
153 Size aPageSize;
154 OUString const aBaseURL;
155 ::std::stack< std::unique_ptr<ScHTMLTableStackEntry> >
156 aTableStack;
157 OUString aString;
158 ScRangeListRef xLockedList; // per table
159 std::unique_ptr<OuterMap> pTables;
160 ScHTMLColOffset maColOffset;
161 ScHTMLColOffset* pLocalColOffset; // per table
162 sal_uLong nFirstTableCell; // per table
163 short nTableLevel;
164 sal_uInt16 nTable;
165 sal_uInt16 nMaxTable;
166 SCCOL nColCntStart; // first Col per table
167 SCCOL nMaxCol; // per table
168 sal_uInt16 nTableWidth; // per table
169 sal_uInt16 nColOffset; // current, pixel
170 sal_uInt16 nColOffsetStart; // start value per table, in pixel
171 sal_uInt16 nOffsetTolerance; // for use with SeekOffset and related
172 bool bFirstRow; // per table, whether in first row
173 bool bTabInTabCell:1;
174 bool bInCell:1;
175 bool bInTitle:1;
177 DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void );
178 void NewActEntry( const ScEEParseEntry* );
179 static void EntryEnd( ScEEParseEntry*, const ESelection& );
180 void ProcToken( HtmlImportInfo* );
181 void CloseEntry( const HtmlImportInfo* );
182 void NextRow( const HtmlImportInfo* );
183 void SkipLocked( ScEEParseEntry*, bool bJoin = true );
184 static bool SeekOffset( const ScHTMLColOffset*, sal_uInt16 nOffset,
185 SCCOL* pCol, sal_uInt16 nOffsetTol );
186 static void MakeCol( ScHTMLColOffset*, sal_uInt16& nOffset,
187 sal_uInt16& nWidth, sal_uInt16 nOffsetTol,
188 sal_uInt16 nWidthTol );
189 static void MakeColNoRef( ScHTMLColOffset*, sal_uInt16 nOffset,
190 sal_uInt16 nWidth, sal_uInt16 nOffsetTol,
191 sal_uInt16 nWidthTol );
192 static void ModifyOffset( ScHTMLColOffset*, sal_uInt16& nOldOffset,
193 sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol );
194 void Colonize( ScEEParseEntry* );
195 sal_uInt16 GetWidth( const ScEEParseEntry* );
196 void SetWidths();
197 void Adjust();
199 sal_uInt16 GetWidthPixel( const HTMLOption& );
200 bool IsAtBeginningOfText( const HtmlImportInfo* );
202 void TableOn( HtmlImportInfo* );
203 void ColOn( HtmlImportInfo* );
204 void TableRowOn( const HtmlImportInfo* );
205 void TableRowOff( const HtmlImportInfo* );
206 void TableDataOn( HtmlImportInfo* );
207 void TableDataOff( const HtmlImportInfo* );
208 void TableOff( const HtmlImportInfo* );
209 void Image( HtmlImportInfo* );
210 void AnchorOn( HtmlImportInfo* );
211 void FontOn( HtmlImportInfo* );
213 public:
214 ScHTMLLayoutParser( EditEngine*, const OUString& rBaseURL, const Size& aPageSize, ScDocument* );
215 virtual ~ScHTMLLayoutParser() override;
216 virtual ErrCode Read( SvStream&, const OUString& rBaseURL ) override;
217 virtual const ScHTMLTable* GetGlobalTable() const override;
220 // HTML DATA QUERY PARSER
222 /** Declares the orientation in or for a table: column or row. */
223 enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
225 /** Type for a unique identifier for each table. */
226 typedef sal_uInt16 ScHTMLTableId;
227 /** Identifier of the "global table" (the entire HTML document). */
228 const ScHTMLTableId SC_HTML_GLOBAL_TABLE = 0;
229 /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
230 const ScHTMLTableId SC_HTML_NO_TABLE = 0;
232 /** A 2D cell position in an HTML table. */
233 struct ScHTMLPos
235 SCCOL mnCol;
236 SCROW mnRow;
238 explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
239 explicit ScHTMLPos( SCCOL nCol, SCROW nRow ) :
240 mnCol( nCol ), mnRow( nRow ) {}
241 explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
243 SCCOLROW Get( ScHTMLOrient eOrient ) const
244 { return (eOrient == tdCol) ? mnCol : mnRow; }
245 void Set( SCCOL nCol, SCROW nRow )
246 { mnCol = nCol; mnRow = nRow; }
247 void Set( const ScAddress& rAddr )
248 { Set( rAddr.Col(), rAddr.Row() ); }
249 ScAddress MakeAddr() const
250 { return ScAddress( mnCol, mnRow, 0 ); }
253 inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
255 return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
258 /** A 2D cell size in an HTML table. */
259 struct ScHTMLSize
261 SCCOL mnCols;
262 SCROW mnRows;
264 explicit ScHTMLSize( SCCOL nCols, SCROW nRows ) :
265 mnCols( nCols ), mnRows( nRows ) {}
266 void Set( SCCOL nCols, SCROW nRows )
267 { mnCols = nCols; mnRows = nRows; }
270 /** A single entry containing a line of text or representing a table. */
271 struct ScHTMLEntry : public ScEEParseEntry
273 public:
274 explicit ScHTMLEntry(
275 const SfxItemSet& rItemSet,
276 ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
278 /** Returns true, if the selection of the entry is empty. */
279 bool IsEmpty() const { return !aSel.HasRange(); }
280 /** Returns true, if the entry has any content to be imported. */
281 bool HasContents() const;
282 /** Returns true, if the entry represents a table. */
283 bool IsTable() const { return nTab != SC_HTML_NO_TABLE; }
284 /** Returns true, if the entry represents a table. */
285 ScHTMLTableId GetTableId() const { return nTab; }
287 /** Sets or clears the import always state. */
288 void SetImportAlways() { mbImportAlways = true; }
289 /** Sets start point of the entry selection to the start of the import info object. */
290 void AdjustStart( const HtmlImportInfo& rInfo );
291 /** Sets end point of the entry selection to the end of the import info object. */
292 void AdjustEnd( const HtmlImportInfo& rInfo );
293 /** Deletes leading and trailing empty paragraphs from the entry. */
294 void Strip( const EditEngine& rEditEngine );
296 /** Returns read/write access to the item set of this entry. */
297 SfxItemSet& GetItemSet() { return aItemSet; }
298 /** Returns read-only access to the item set of this entry. */
299 const SfxItemSet& GetItemSet() const { return aItemSet; }
301 private:
302 bool mbImportAlways; /// true = Always import this entry.
305 /** This struct handles creation of unique table identifiers. */
306 struct ScHTMLTableAutoId
308 const ScHTMLTableId mnTableId; /// The created unique table identifier.
309 ScHTMLTableId& mrnUnusedId; /// Reference to global unused identifier variable.
311 /** The constructor assigns an unused identifier to member mnTableId. */
312 explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
315 class ScHTMLTableMap;
317 /** Stores data for one table in an HTML document.
319 This class does the main work for importing an HTML document. It manages
320 the correct insertion of parse entries into the correct cells and the
321 creation of nested tables. Recalculation of resulting document size and
322 position is done recursively in all nested tables.
324 class ScHTMLTable
326 public:
327 /** Creates a new HTML table without content.
328 @descr Internally handles a current cell position. This position is
329 invalid until first calls of RowOn() and DataOn().
330 @param rParentTable Reference to the parent table that owns this table.
331 @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
332 explicit ScHTMLTable(
333 ScHTMLTable& rParentTable,
334 const HtmlImportInfo& rInfo,
335 bool bPreFormText );
337 virtual ~ScHTMLTable();
339 /** Returns the name of the table, specified in the TABLE tag. */
340 const OUString& GetTableName() const { return maTableName; }
341 /** Returns the unique identifier of the table. */
342 ScHTMLTableId GetTableId() const { return maTableId.mnTableId; }
343 /** Returns the cell spanning of the specified cell. */
344 ScHTMLSize GetSpan( const ScHTMLPos& rCellPos ) const;
346 /** Searches in all nested tables for the specified table.
347 @param nTableId Unique identifier of the table. */
348 ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const;
350 /** Puts the item into the item set of the current entry. */
351 void PutItem( const SfxPoolItem& rItem );
352 /** Inserts a text portion into current entry. */
353 void PutText( const HtmlImportInfo& rInfo );
354 /** Inserts a new line, if in preformatted text, else does nothing. */
355 void InsertPara( const HtmlImportInfo& rInfo );
357 /** Inserts a line break (<br> tag).
358 @descr Inserts the current entry regardless if it is empty. */
359 void BreakOn();
360 /** Inserts a heading line (<p> and <h*> tags). */
361 void HeadingOn();
362 /** Processes a hyperlink (<a> tag). */
363 void AnchorOn();
365 /** Starts a *new* table nested in this table (<table> tag).
366 @return Pointer to the new table. */
367 ScHTMLTable* TableOn( const HtmlImportInfo& rInfo );
368 /** Closes *this* table (</table> tag).
369 @return Pointer to the parent table. */
370 ScHTMLTable* TableOff( const HtmlImportInfo& rInfo );
371 /** Starts a *new* table based on preformatted text (<pre> tag).
372 @return Pointer to the new table. */
373 ScHTMLTable* PreOn( const HtmlImportInfo& rInfo );
374 /** Closes *this* table based on preformatted text (</pre> tag).
375 @return Pointer to the parent table. */
376 ScHTMLTable* PreOff( const HtmlImportInfo& rInfo );
378 /** Starts next row (<tr> tag).
379 @descr Cell address is invalid until first call of DataOn(). */
380 void RowOn( const HtmlImportInfo& rInfo );
381 /** Closes the current row (<tr> tag).
382 @descr Cell address is invalid until call of RowOn() and DataOn(). */
383 void RowOff( const HtmlImportInfo& rInfo );
384 /** Starts the next cell (<td> or <th> tag). */
385 void DataOn( const HtmlImportInfo& rInfo );
386 /** Closes the current cell (</td> or </th> tag).
387 @descr Cell address is invalid until next call of DataOn(). */
388 void DataOff( const HtmlImportInfo& rInfo );
390 /** Starts the body of the HTML document (<body> tag). */
391 void BodyOn( const HtmlImportInfo& rInfo );
392 /** Closes the body of the HTML document (</body> tag). */
393 void BodyOff( const HtmlImportInfo& rInfo );
395 /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
396 @descr Used to close this table object regardless on opening tag type.
397 @return Pointer to the parent table, or this, if no parent found. */
398 ScHTMLTable* CloseTable( const HtmlImportInfo& rInfo );
400 /** Returns the resulting document row/column count of the specified HTML row/column. */
401 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
402 /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
403 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
404 /** Returns the total document row/column count in the specified direction. */
405 SCCOLROW GetDocSize( ScHTMLOrient eOrient ) const;
406 /** Returns the total document row/column count of the specified HTML cell. */
407 ScHTMLSize GetDocSize( const ScHTMLPos& rCellPos ) const;
409 /** Returns the resulting Calc position of the top left edge of the table. */
410 const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
411 /** Calculates the resulting Calc position of the specified HTML column/row. */
412 SCCOLROW GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
413 /** Calculates the resulting Calc position of the specified HTML cell. */
414 ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const;
416 /** Calculates the current Calc document area of this table. */
417 void GetDocRange( ScRange& rRange ) const;
419 /** Applies border formatting to the passed document. */
420 void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
422 SvNumberFormatter* GetFormatTable();
424 protected:
425 /** Creates a new HTML table without parent.
426 @descr This constructor is used to create the "global table". */
427 explicit ScHTMLTable(
428 SfxItemPool& rPool,
429 EditEngine& rEditEngine,
430 std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
431 ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser );
433 /** Fills all empty cells in this and nested tables with dummy parse entries. */
434 void FillEmptyCells();
435 /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
436 void RecalcDocSize();
437 /** Recalculates the position of all cell entries and nested tables.
438 @param rBasePos The origin of the table in the Calc document. */
439 void RecalcDocPos( const ScHTMLPos& rBasePos );
441 private:
442 typedef ::std::unique_ptr< ScHTMLTableMap > ScHTMLTableMapPtr;
443 typedef ::std::unique_ptr< SfxItemSet > SfxItemSetPtr;
444 typedef ::std::vector< SCCOLROW > ScSizeVec;
445 typedef ::std::vector< ScHTMLEntry* > ScHTMLEntryVector;
446 typedef ::std::map< ScHTMLPos, ScHTMLEntryVector > ScHTMLEntryMap;
447 typedef ::std::unique_ptr< ScHTMLEntry > ScHTMLEntryPtr;
449 /** Returns true, if the current cell does not contain an entry yet. */
450 bool IsEmptyCell() const;
451 /** Returns the item set from cell, row, or table, depending on current state. */
452 const SfxItemSet& GetCurrItemSet() const;
454 /** Returns true, if import info represents a space character. */
455 static bool IsSpaceCharInfo( const HtmlImportInfo& rInfo );
457 /** Creates and returns a new empty flying entry at position (0,0). */
458 ScHTMLEntryPtr CreateEntry() const;
459 /** Creates a new flying entry.
460 @param rInfo Contains the initial edit engine selection for the entry. */
461 void CreateNewEntry( const HtmlImportInfo& rInfo );
463 /** Inserts an empty line in front of the next entry. */
464 void InsertLeadingEmptyLine();
466 /** Pushes the passed entry into the list of the current cell. */
467 void ImplPushEntryToVector( ScHTMLEntryVector& rEntryVector, ScHTMLEntryPtr& rxEntry );
468 /** Tries to insert the entry into the current cell.
469 @descr If insertion is not possible (i.e., currently no cell open), the
470 entry will be inserted into the parent table.
471 @return true = Entry has been pushed into the current cell; false = Entry dropped. */
472 bool PushEntry( ScHTMLEntryPtr& rxEntry );
473 /** Puts the current entry into the entry list, if it is not empty.
474 @param rInfo The import info struct containing the end position of the current entry.
475 @param bLastInCell true = If cell is still empty, put this entry always.
476 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
477 bool PushEntry( const HtmlImportInfo& rInfo, bool bLastInCell = false );
478 /** Pushes a new entry into current cell which references a nested table.*/
479 void PushTableEntry( ScHTMLTableId nTableId );
481 /** Tries to find a table from the table container.
482 @descr Assumes that the table is located in the current container or
483 that the passed table identifier is 0.
484 @param nTableId Unique identifier of the table or 0. */
485 ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const;
486 /** Inserts a nested table in the current cell at the specified position.
487 @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
488 ScHTMLTable* InsertNestedTable( const HtmlImportInfo& rInfo, bool bPreFormText );
490 /** Inserts a new cell in an unused position, starting from current cell position. */
491 void InsertNewCell( const ScHTMLSize& rSpanSize );
493 /** Set internal states for a new table row. */
494 void ImplRowOn();
495 /** Set internal states for leaving a table row. */
496 void ImplRowOff();
497 /** Set internal states for entering a new table cell. */
498 void ImplDataOn( const ScHTMLSize& rSpanSize );
499 /** Set internal states for leaving a table cell. */
500 void ImplDataOff();
502 /** Inserts additional formatting options from import info into the item set. */
503 static void ProcessFormatOptions( SfxItemSet& rItemSet, const HtmlImportInfo& rInfo );
505 /** Updates the document column/row size of the specified column or row.
506 @descr Only increases the present count, never decreases. */
507 void SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
508 /** Calculates and sets the resulting size the cell needs in the document.
509 @descr Reduces the needed size in merged cells.
510 @param nCellPos The first column/row position of the (merged) cell.
511 @param nCellSpan The cell spanning in the specified orientation.
512 @param nRealDocSize The raw document size of all entries of the cell. */
513 void CalcNeededDocSize(
514 ScHTMLOrient eOrient, SCCOLROW nCellPos,
515 SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
517 private:
518 ScHTMLTable* mpParentTable; /// Pointer to parent table.
519 ScHTMLTableMapPtr mxNestedTables; /// Table of nested HTML tables.
520 OUString maTableName; /// Table name from <table id> option.
521 ScHTMLTableAutoId maTableId; /// Unique identifier of this table.
522 SfxItemSet maTableItemSet; /// Items for the entire table.
523 SfxItemSetPtr mxRowItemSet; /// Items for the current table row.
524 SfxItemSetPtr mxDataItemSet; /// Items for the current cell.
525 ScRangeList maHMergedCells; /// List of all horizontally merged cells.
526 ScRangeList maVMergedCells; /// List of all vertically merged cells.
527 ScRangeList maUsedCells; /// List of all used cells.
528 EditEngine& mrEditEngine; /// Edit engine (from ScEEParser).
529 std::vector<std::shared_ptr<ScEEParseEntry>>& mrEEParseList; /// List that owns the parse entries (from ScEEParser).
530 ScHTMLEntryMap maEntryMap; /// List of entries for each cell.
531 ScHTMLEntryVector* mpCurrEntryVector; /// Current entry vector from map for faster access.
532 ScHTMLEntryPtr mxCurrEntry; /// Working entry, not yet inserted in a list.
533 ScSizeVec maCumSizes[ 2 ]; /// Cumulated cell counts for each HTML table column/row.
534 ScHTMLSize maSize; /// Size of the table.
535 ScHTMLPos maCurrCell; /// Address of current cell to fill.
536 ScHTMLPos maDocBasePos; /// Resulting base address in a Calc document.
537 ScHTMLParser* mpParser;
538 bool mbBorderOn:1; /// true = Table borders on.
539 bool const mbPreFormText:1; /// true = Table from preformatted text (<pre> tag).
540 bool mbRowOn:1; /// true = Inside of <tr> </tr>.
541 bool mbDataOn:1; /// true = Inside of <td> </td> or <th> </th>.
542 bool mbPushEmptyLine:1; /// true = Insert empty line before current entry.
545 /** The "global table" representing the entire HTML document. */
546 class ScHTMLGlobalTable : public ScHTMLTable
548 public:
549 explicit ScHTMLGlobalTable(
550 SfxItemPool& rPool,
551 EditEngine& rEditEngine,
552 std::vector<std::shared_ptr<ScEEParseEntry>>& rEEParseList,
553 ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser );
555 virtual ~ScHTMLGlobalTable() override;
557 /** Recalculates sizes and resulting positions of all document entries. */
558 void Recalc();
561 /** The HTML parser for data queries. Focuses on data import, not on layout.
563 Builds the table structure correctly, ignores extended formatting like
564 pictures or column widths.
566 class ScHTMLQueryParser : public ScHTMLParser
568 public:
569 explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
570 virtual ~ScHTMLQueryParser() override;
572 virtual ErrCode Read( SvStream& rStrm, const OUString& rBaseURL ) override;
574 /** Returns the "global table" which contains the entire HTML document. */
575 virtual const ScHTMLTable* GetGlobalTable() const override;
577 private:
578 /** Handles all possible tags in the HTML document. */
579 void ProcessToken( const HtmlImportInfo& rInfo );
580 /** Inserts a text portion into current entry. */
581 void InsertText( const HtmlImportInfo& rInfo );
582 /** Processes the <font> tag. */
583 void FontOn( const HtmlImportInfo& rInfo );
585 /** Processes the <meta> tag. */
586 void MetaOn( const HtmlImportInfo& rInfo );
587 /** Opens the title of the HTML document (<title> tag). */
588 void TitleOn();
589 /** Closes the title of the HTML document (</title> tag). */
590 void TitleOff( const HtmlImportInfo& rInfo );
592 /** Opens a new table at the current position. */
593 void TableOn( const HtmlImportInfo& rInfo );
594 /** Closes the current table. */
595 void TableOff( const HtmlImportInfo& rInfo );
596 /** Opens a new table based on preformatted text. */
597 void PreOn( const HtmlImportInfo& rInfo );
598 /** Closes the current preformatted text table. */
599 void PreOff( const HtmlImportInfo& rInfo );
601 /** Closes the current table, regardless on opening tag. */
602 void CloseTable( const HtmlImportInfo& rInfo );
604 static void ParseStyle(const OUString& rStrm);
606 DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void );
608 private:
609 typedef ::std::unique_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr;
611 OUStringBuffer maTitle; /// The title of the document.
612 ScHTMLGlobalTablePtr mxGlobTable; /// Contains the entire imported document.
613 ScHTMLTable* mpCurrTable; /// Pointer to current table (performance).
614 ScHTMLTableId mnUnusedId; /// First unused table identifier.
615 bool mbTitleOn; /// true = Inside of <title> </title>.
618 #endif
620 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */