1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
26 #include <string_view>
27 #include <unordered_map>
29 #include <o3tl/sorted_vector.hxx>
31 #include <rangelst.hxx>
32 #include "eeparser.hxx"
34 const sal_uInt32 SC_HTML_FONTSIZES
= 7; // like export, HTML options
36 // Pixel tolerance for SeekOffset and related.
37 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL
= 1; // single table
38 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE
= 10; // nested
40 // BASE class for HTML parser classes
45 * Collection of HTML style data parsed from the content of <style>
50 typedef std::unordered_map
<OUString
, OUString
> PropsType
;
51 typedef ::std::map
<OUString
, PropsType
> NamePropsType
;
52 typedef ::std::map
<OUString
, NamePropsType
> ElemsType
;
54 NamePropsType m_GlobalProps
; /// global properties (for a given class for all elements)
55 NamePropsType m_ElemGlobalProps
; /// element global properties (no class specified)
56 ElemsType m_ElemProps
; /// element to class to properties (both element and class are given)
57 const OUString maEmpty
; /// just a persistent empty string.
61 void add(const char* pElemName
, size_t nElemName
, const char* pClassName
, size_t nClassName
,
62 const OUString
& aProp
, const OUString
& aValue
);
65 * Find best-matching property value for given element and class names.
67 const OUString
& getPropertyValue(
68 const OUString
& rElem
, const OUString
& rClass
, const OUString
& rPropName
) const;
71 static void insertProp(
72 NamePropsType
& rProps
, const OUString
& aName
,
73 const OUString
& aProp
, const OUString
& aValue
);
76 /** Base class for HTML parser classes. */
77 class ScHTMLParser
: public ScEEParser
79 ScHTMLStyles maStyles
;
81 sal_uInt32 maFontHeights
[ SC_HTML_FONTSIZES
];
82 ScDocument
* mpDoc
; /// The destination document.
85 explicit ScHTMLParser( EditEngine
* pEditEngine
, ScDocument
* pDoc
);
86 virtual ~ScHTMLParser() override
;
88 virtual ErrCode
Read( SvStream
& rStrm
, const OUString
& rBaseURL
) override
= 0;
90 ScHTMLStyles
& GetStyles() { return maStyles
;}
91 ScDocument
& GetDoc() { return *mpDoc
;}
93 /** Returns the "global table" which contains the entire HTML document. */
94 virtual const ScHTMLTable
* GetGlobalTable() const = 0;
97 typedef o3tl::sorted_vector
<sal_uLong
> ScHTMLColOffset
;
99 struct ScHTMLTableStackEntry
101 ScRangeListRef xLockedList
;
102 std::shared_ptr
<ScEEParseEntry
> xCellEntry
;
103 ScHTMLColOffset
* pLocalColOffset
;
104 sal_uLong nFirstTableCell
;
109 sal_uInt16 nTableWidth
;
110 sal_uInt16 nColOffset
;
111 sal_uInt16 nColOffsetStart
;
113 ScHTMLTableStackEntry( const std::shared_ptr
<ScEEParseEntry
>& rE
,
114 const ScRangeListRef
& rL
, ScHTMLColOffset
* pTO
,
117 SCCOL nStart
, SCCOL nMax
, sal_uInt16 nTab
,
118 sal_uInt16 nTW
, sal_uInt16 nCO
, sal_uInt16 nCOS
,
120 : xLockedList( rL
), xCellEntry(rE
),
121 pLocalColOffset( pTO
),
122 nFirstTableCell( nFTC
),
124 nColCntStart( nStart
), nMaxCol( nMax
),
125 nTable( nTab
), nTableWidth( nTW
),
126 nColOffset( nCO
), nColOffsetStart( nCOS
),
131 struct ScHTMLAdjustStackEntry
136 ScHTMLAdjustStackEntry( SCCOL nLCol
, SCROW nNRow
,
138 : nLastCol( nLCol
), nNextRow( nNRow
),
147 // TODO these need better names
148 typedef ::std::map
<SCROW
, SCROW
> InnerMap
;
149 typedef ::std::map
<sal_uInt16
, InnerMap
*> OuterMap
;
151 class ScHTMLLayoutParser
: public ScHTMLParser
156 ::std::stack
< std::unique_ptr
<ScHTMLTableStackEntry
> >
159 ScRangeListRef xLockedList
; // per table
160 std::unique_ptr
<OuterMap
> pTables
;
161 ScHTMLColOffset maColOffset
;
162 ScHTMLColOffset
* pLocalColOffset
; // per table
163 sal_uLong nFirstTableCell
; // per table
166 sal_uInt16 nMaxTable
;
167 SCCOL nColCntStart
; // first Col per table
168 SCCOL nMaxCol
; // per table
169 sal_uInt16 nTableWidth
; // per table
170 sal_uInt16 nColOffset
; // current, pixel
171 sal_uInt16 nColOffsetStart
; // start value per table, in pixel
172 sal_uInt16 nOffsetTolerance
; // for use with SeekOffset and related
173 bool bFirstRow
; // per table, whether in first row
174 bool bTabInTabCell
:1;
178 DECL_LINK( HTMLImportHdl
, HtmlImportInfo
&, void );
179 void NewActEntry( const ScEEParseEntry
* );
180 static void EntryEnd( ScEEParseEntry
*, const ESelection
& );
181 void ProcToken( HtmlImportInfo
* );
182 void CloseEntry( const HtmlImportInfo
* );
183 void NextRow( const HtmlImportInfo
* );
184 void SkipLocked( ScEEParseEntry
*, bool bJoin
= true );
185 static bool SeekOffset( const ScHTMLColOffset
*, sal_uInt16 nOffset
,
186 SCCOL
* pCol
, sal_uInt16 nOffsetTol
);
187 static void MakeCol( ScHTMLColOffset
*, sal_uInt16
& nOffset
,
188 sal_uInt16
& nWidth
, sal_uInt16 nOffsetTol
,
189 sal_uInt16 nWidthTol
);
190 static void MakeColNoRef( ScHTMLColOffset
*, sal_uInt16 nOffset
,
191 sal_uInt16 nWidth
, sal_uInt16 nOffsetTol
,
192 sal_uInt16 nWidthTol
);
193 static void ModifyOffset( ScHTMLColOffset
*, sal_uInt16
& nOldOffset
,
194 sal_uInt16
& nNewOffset
, sal_uInt16 nOffsetTol
);
195 void Colonize( ScEEParseEntry
* );
196 sal_uInt16
GetWidth( const ScEEParseEntry
* );
200 sal_uInt16
GetWidthPixel( const HTMLOption
& );
201 bool IsAtBeginningOfText( const HtmlImportInfo
* );
203 void TableOn( HtmlImportInfo
* );
204 void ColOn( HtmlImportInfo
* );
205 void TableRowOn( const HtmlImportInfo
* );
206 void TableRowOff( const HtmlImportInfo
* );
207 void TableDataOn( HtmlImportInfo
* );
208 void TableDataOff( const HtmlImportInfo
* );
209 void TableOff( const HtmlImportInfo
* );
210 void Image( HtmlImportInfo
* );
211 void AnchorOn( HtmlImportInfo
* );
212 void FontOn( HtmlImportInfo
* );
215 ScHTMLLayoutParser( EditEngine
*, const OUString
& rBaseURL
, const Size
& aPageSize
, ScDocument
* );
216 virtual ~ScHTMLLayoutParser() override
;
217 virtual ErrCode
Read( SvStream
&, const OUString
& rBaseURL
) override
;
218 virtual const ScHTMLTable
* GetGlobalTable() const override
;
221 // HTML DATA QUERY PARSER
223 /** Declares the orientation in or for a table: column or row. */
224 enum ScHTMLOrient
{ tdCol
= 0 , tdRow
= 1 };
226 /** Type for a unique identifier for each table. */
227 typedef sal_uInt16 ScHTMLTableId
;
228 /** Identifier of the "global table" (the entire HTML document). */
229 const ScHTMLTableId SC_HTML_GLOBAL_TABLE
= 0;
230 /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
231 const ScHTMLTableId SC_HTML_NO_TABLE
= 0;
233 /** A 2D cell position in an HTML table. */
239 explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
240 explicit ScHTMLPos( SCCOL nCol
, SCROW nRow
) :
241 mnCol( nCol
), mnRow( nRow
) {}
242 explicit ScHTMLPos( const ScAddress
& rAddr
) { Set( rAddr
); }
244 SCCOLROW
Get( ScHTMLOrient eOrient
) const
245 { return (eOrient
== tdCol
) ? mnCol
: mnRow
; }
246 void Set( SCCOL nCol
, SCROW nRow
)
247 { mnCol
= nCol
; mnRow
= nRow
; }
248 void Set( const ScAddress
& rAddr
)
249 { Set( rAddr
.Col(), rAddr
.Row() ); }
250 ScAddress
MakeAddr() const
251 { return ScAddress( mnCol
, mnRow
, 0 ); }
254 inline bool operator<( const ScHTMLPos
& rPos1
, const ScHTMLPos
& rPos2
)
256 return (rPos1
.mnRow
< rPos2
.mnRow
) || ((rPos1
.mnRow
== rPos2
.mnRow
) && (rPos1
.mnCol
< rPos2
.mnCol
));
259 /** A 2D cell size in an HTML table. */
265 explicit ScHTMLSize( SCCOL nCols
, SCROW nRows
) :
266 mnCols( nCols
), mnRows( nRows
) {}
267 void Set( SCCOL nCols
, SCROW nRows
)
268 { mnCols
= nCols
; mnRows
= nRows
; }
271 /** A single entry containing a line of text or representing a table. */
272 struct ScHTMLEntry
: public ScEEParseEntry
275 explicit ScHTMLEntry(
276 const SfxItemSet
& rItemSet
,
277 ScHTMLTableId nTableId
= SC_HTML_NO_TABLE
);
279 /** Returns true, if the selection of the entry is empty. */
280 bool IsEmpty() const { return !aSel
.HasRange(); }
281 /** Returns true, if the entry has any content to be imported. */
282 bool HasContents() const;
283 /** Returns true, if the entry represents a table. */
284 bool IsTable() const { return nTab
!= SC_HTML_NO_TABLE
; }
285 /** Returns true, if the entry represents a table. */
286 ScHTMLTableId
GetTableId() const { return nTab
; }
288 /** Sets or clears the import always state. */
289 void SetImportAlways() { mbImportAlways
= true; }
290 /** Sets start point of the entry selection to the start of the import info object. */
291 void AdjustStart( const HtmlImportInfo
& rInfo
);
292 /** Sets end point of the entry selection to the end of the import info object. */
293 void AdjustEnd( const HtmlImportInfo
& rInfo
);
294 /** Deletes leading and trailing empty paragraphs from the entry. */
295 void Strip( const EditEngine
& rEditEngine
);
297 /** Returns read/write access to the item set of this entry. */
298 SfxItemSet
& GetItemSet() { return aItemSet
; }
299 /** Returns read-only access to the item set of this entry. */
300 const SfxItemSet
& GetItemSet() const { return aItemSet
; }
303 bool mbImportAlways
; /// true = Always import this entry.
306 /** This struct handles creation of unique table identifiers. */
307 struct ScHTMLTableAutoId
309 const ScHTMLTableId mnTableId
; /// The created unique table identifier.
310 ScHTMLTableId
& mrnUnusedId
; /// Reference to global unused identifier variable.
312 /** The constructor assigns an unused identifier to member mnTableId. */
313 explicit ScHTMLTableAutoId( ScHTMLTableId
& rnUnusedId
);
316 class ScHTMLTableMap
;
318 /** Stores data for one table in an HTML document.
320 This class does the main work for importing an HTML document. It manages
321 the correct insertion of parse entries into the correct cells and the
322 creation of nested tables. Recalculation of resulting document size and
323 position is done recursively in all nested tables.
328 /** Creates a new HTML table without content.
329 @descr Internally handles a current cell position. This position is
330 invalid until first calls of RowOn() and DataOn().
331 @param rParentTable Reference to the parent table that owns this table.
332 @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
333 explicit ScHTMLTable(
334 ScHTMLTable
& rParentTable
,
335 const HtmlImportInfo
& rInfo
,
337 const ScDocument
& rDoc
);
339 virtual ~ScHTMLTable();
341 /** Returns the name of the table, specified in the TABLE tag. */
342 const OUString
& GetTableName() const { return maTableName
; }
343 /** Returns the caption of the table, specified in the <caption> tag. */
344 const OUString
& GetTableCaption() const { return maCaption
; }
345 /** Returns the unique identifier of the table. */
346 ScHTMLTableId
GetTableId() const { return maTableId
.mnTableId
; }
347 /** Returns the cell spanning of the specified cell. */
348 ScHTMLSize
GetSpan( const ScHTMLPos
& rCellPos
) const;
350 /** Searches in all nested tables for the specified table.
351 @param nTableId Unique identifier of the table. */
352 ScHTMLTable
* FindNestedTable( ScHTMLTableId nTableId
) const;
354 /** Puts the item into the item set of the current entry. */
355 void PutItem( const SfxPoolItem
& rItem
);
356 /** Inserts a text portion into current entry. */
357 void PutText( const HtmlImportInfo
& rInfo
);
358 /** Inserts a new line, if in preformatted text, else does nothing. */
359 void InsertPara( const HtmlImportInfo
& rInfo
);
361 /** Inserts a line break (<br> tag).
362 @descr Inserts the current entry regardless if it is empty. */
364 /** Inserts a heading line (<p> and <h*> tags). */
366 /** Processes a hyperlink (<a> tag). */
369 /** Starts a *new* table nested in this table (<table> tag).
370 @return Pointer to the new table. */
371 ScHTMLTable
* TableOn( const HtmlImportInfo
& rInfo
);
372 /** Closes *this* table (</table> tag).
373 @return Pointer to the parent table. */
374 ScHTMLTable
* TableOff( const HtmlImportInfo
& rInfo
);
375 /** Processes the caption of the table (<caption> tag). */
377 /** Processes the caption of the table (</caption> tag). */
379 /** Starts a *new* table based on preformatted text (<pre> tag).
380 @return Pointer to the new table. */
381 ScHTMLTable
* PreOn( const HtmlImportInfo
& rInfo
);
382 /** Closes *this* table based on preformatted text (</pre> tag).
383 @return Pointer to the parent table. */
384 ScHTMLTable
* PreOff( const HtmlImportInfo
& rInfo
);
386 /** Starts next row (<tr> tag).
387 @descr Cell address is invalid until first call of DataOn(). */
388 void RowOn( const HtmlImportInfo
& rInfo
);
389 /** Closes the current row (<tr> tag).
390 @descr Cell address is invalid until call of RowOn() and DataOn(). */
391 void RowOff( const HtmlImportInfo
& rInfo
);
392 /** Starts the next cell (<td> or <th> tag). */
393 void DataOn( const HtmlImportInfo
& rInfo
);
394 /** Closes the current cell (</td> or </th> tag).
395 @descr Cell address is invalid until next call of DataOn(). */
396 void DataOff( const HtmlImportInfo
& rInfo
);
398 /** Starts the body of the HTML document (<body> tag). */
399 void BodyOn( const HtmlImportInfo
& rInfo
);
400 /** Closes the body of the HTML document (</body> tag). */
401 void BodyOff( const HtmlImportInfo
& rInfo
);
403 /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
404 @descr Used to close this table object regardless on opening tag type.
405 @return Pointer to the parent table, or this, if no parent found. */
406 ScHTMLTable
* CloseTable( const HtmlImportInfo
& rInfo
);
408 /** Returns the resulting document row/column count of the specified HTML row/column. */
409 SCCOLROW
GetDocSize( ScHTMLOrient eOrient
, SCCOLROW nCellPos
) const;
410 /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
411 SCCOLROW
GetDocSize( ScHTMLOrient eOrient
, SCCOLROW nCellBegin
, SCCOLROW nCellEnd
) const;
412 /** Returns the total document row/column count in the specified direction. */
413 SCCOLROW
GetDocSize( ScHTMLOrient eOrient
) const;
414 /** Returns the total document row/column count of the specified HTML cell. */
415 ScHTMLSize
GetDocSize( const ScHTMLPos
& rCellPos
) const;
417 /** Returns the resulting Calc position of the top left edge of the table. */
418 const ScHTMLPos
& GetDocPos() const { return maDocBasePos
; }
419 /** Calculates the resulting Calc position of the specified HTML column/row. */
420 SCCOLROW
GetDocPos( ScHTMLOrient eOrient
, SCCOLROW nCellPos
) const;
421 /** Calculates the resulting Calc position of the specified HTML cell. */
422 ScHTMLPos
GetDocPos( const ScHTMLPos
& rCellPos
) const;
424 /** Calculates the current Calc document area of this table. */
425 void GetDocRange( ScRange
& rRange
) const;
427 /** Applies border formatting to the passed document. */
428 void ApplyCellBorders( ScDocument
* pDoc
, const ScAddress
& rFirstPos
) const;
430 SvNumberFormatter
* GetFormatTable();
433 /** Creates a new HTML table without parent.
434 @descr This constructor is used to create the "global table". */
435 explicit ScHTMLTable(
437 EditEngine
& rEditEngine
,
438 std::vector
<std::shared_ptr
<ScEEParseEntry
>>& rEEParseList
,
439 ScHTMLTableId
& rnUnusedId
, ScHTMLParser
* pParser
,
440 const ScDocument
& rDoc
);
442 /** Fills all empty cells in this and nested tables with dummy parse entries. */
443 void FillEmptyCells();
444 /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
445 void RecalcDocSize();
446 /** Recalculates the position of all cell entries and nested tables.
447 @param rBasePos The origin of the table in the Calc document. */
448 void RecalcDocPos( const ScHTMLPos
& rBasePos
);
451 typedef ::std::unique_ptr
< ScHTMLTableMap
> ScHTMLTableMapPtr
;
452 typedef ::std::vector
< SCCOLROW
> ScSizeVec
;
453 typedef ::std::vector
< ScHTMLEntry
* > ScHTMLEntryVector
;
454 typedef ::std::unique_ptr
< ScHTMLEntry
> ScHTMLEntryPtr
;
456 /** Returns true, if the current cell does not contain an entry yet. */
457 bool IsEmptyCell() const;
458 /** Returns the item set from cell, row, or table, depending on current state. */
459 const SfxItemSet
& GetCurrItemSet() const;
461 /** Returns true, if import info represents a space character. */
462 static bool IsSpaceCharInfo( const HtmlImportInfo
& rInfo
);
464 /** Creates and returns a new empty flying entry at position (0,0). */
465 ScHTMLEntryPtr
CreateEntry() const;
466 /** Creates a new flying entry.
467 @param rInfo Contains the initial edit engine selection for the entry. */
468 void CreateNewEntry( const HtmlImportInfo
& rInfo
);
470 /** Inserts an empty line in front of the next entry. */
471 void InsertLeadingEmptyLine();
473 /** Pushes the passed entry into the list of the current cell. */
474 void ImplPushEntryToVector( ScHTMLEntryVector
& rEntryVector
, ScHTMLEntryPtr
& rxEntry
);
475 /** Tries to insert the entry into the current cell.
476 @descr If insertion is not possible (i.e., currently no cell open), the
477 entry will be inserted into the parent table.
478 @return true = Entry has been pushed into the current cell; false = Entry dropped. */
479 bool PushEntry( ScHTMLEntryPtr
& rxEntry
);
480 /** Puts the current entry into the entry list, if it is not empty.
481 @param rInfo The import info struct containing the end position of the current entry.
482 @param bLastInCell true = If cell is still empty, put this entry always.
483 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
484 bool PushEntry( const HtmlImportInfo
& rInfo
, bool bLastInCell
= false );
485 /** Pushes a new entry into current cell which references a nested table.*/
486 void PushTableEntry( ScHTMLTableId nTableId
);
488 /** Tries to find a table from the table container.
489 @descr Assumes that the table is located in the current container or
490 that the passed table identifier is 0.
491 @param nTableId Unique identifier of the table or 0. */
492 ScHTMLTable
* GetExistingTable( ScHTMLTableId nTableId
) const;
493 /** Inserts a nested table in the current cell at the specified position.
494 @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
495 ScHTMLTable
* InsertNestedTable( const HtmlImportInfo
& rInfo
, bool bPreFormText
);
497 /** Inserts a new cell in an unused position, starting from current cell position. */
498 void InsertNewCell( const ScHTMLSize
& rSpanSize
);
500 /** Set internal states for a new table row. */
502 /** Set internal states for leaving a table row. */
504 /** Set internal states for entering a new table cell. */
505 void ImplDataOn( const ScHTMLSize
& rSpanSize
);
506 /** Set internal states for leaving a table cell. */
509 /** Inserts additional formatting options from import info into the item set. */
510 static void ProcessFormatOptions( SfxItemSet
& rItemSet
, const HtmlImportInfo
& rInfo
);
512 /** Updates the document column/row size of the specified column or row.
513 @descr Only increases the present count, never decreases. */
514 void SetDocSize( ScHTMLOrient eOrient
, SCCOLROW nCellPos
, SCCOLROW nSize
);
515 /** Calculates and sets the resulting size the cell needs in the document.
516 @descr Reduces the needed size in merged cells.
517 @param nCellPos The first column/row position of the (merged) cell.
518 @param nCellSpan The cell spanning in the specified orientation.
519 @param nRealDocSize The raw document size of all entries of the cell. */
520 void CalcNeededDocSize(
521 ScHTMLOrient eOrient
, SCCOLROW nCellPos
,
522 SCCOLROW nCellSpan
, SCCOLROW nRealDocSize
);
525 ScHTMLTable
* mpParentTable
; /// Pointer to parent table.
526 ScHTMLTableMapPtr mxNestedTables
; /// Table of nested HTML tables.
527 OUString maTableName
; /// Table name from <table id> option.
528 OUString maCaption
; /// Caption name of the table from <caption> </caption>
529 OUStringBuffer maCaptionBuffer
; /// Caption buffer of the table from <caption> </caption>
530 ScHTMLTableAutoId maTableId
; /// Unique identifier of this table.
531 SfxItemSet maTableItemSet
; /// Items for the entire table.
532 std::optional
<SfxItemSet
> moRowItemSet
; /// Items for the current table row.
533 std::optional
<SfxItemSet
> moDataItemSet
; /// Items for the current cell.
534 ScRangeList maHMergedCells
; /// List of all horizontally merged cells.
535 ScRangeList maVMergedCells
; /// List of all vertically merged cells.
536 ScRangeList maUsedCells
; /// List of all used cells.
537 EditEngine
& mrEditEngine
; /// Edit engine (from ScEEParser).
538 std::vector
<std::shared_ptr
<ScEEParseEntry
>>& mrEEParseList
; /// List that owns the parse entries (from ScEEParser).
539 std::map
< ScHTMLPos
, ScHTMLEntryVector
> maEntryMap
; /// List of entries for each cell.
540 ScHTMLEntryVector
* mpCurrEntryVector
; /// Current entry vector from map for faster access.
541 ScHTMLEntryPtr mxCurrEntry
; /// Working entry, not yet inserted in a list.
542 ScSizeVec maCumSizes
[ 2 ]; /// Cumulated cell counts for each HTML table column/row.
543 ScHTMLSize maSize
; /// Size of the table.
544 ScHTMLPos maCurrCell
; /// Address of current cell to fill.
545 ScHTMLPos maDocBasePos
; /// Resulting base address in a Calc document.
546 ScHTMLParser
* mpParser
;
547 const ScDocument
& mrDoc
;
548 bool mbBorderOn
:1; /// true = Table borders on.
549 bool mbPreFormText
:1; /// true = Table from preformatted text (<pre> tag).
550 bool mbRowOn
:1; /// true = Inside of <tr> </tr>.
551 bool mbDataOn
:1; /// true = Inside of <td> </td> or <th> </th>.
552 bool mbPushEmptyLine
:1; /// true = Insert empty line before current entry.
553 bool mbCaptionOn
:1; /// true = Inside of <caption> </caption>
556 /** The "global table" representing the entire HTML document. */
557 class ScHTMLGlobalTable
: public ScHTMLTable
560 explicit ScHTMLGlobalTable(
562 EditEngine
& rEditEngine
,
563 std::vector
<std::shared_ptr
<ScEEParseEntry
>>& rEEParseList
,
564 ScHTMLTableId
& rnUnusedId
, ScHTMLParser
* pParser
,
565 const ScDocument
& rDoc
);
567 virtual ~ScHTMLGlobalTable() override
;
569 /** Recalculates sizes and resulting positions of all document entries. */
573 /** The HTML parser for data queries. Focuses on data import, not on layout.
575 Builds the table structure correctly, ignores extended formatting like
576 pictures or column widths.
578 class ScHTMLQueryParser
: public ScHTMLParser
581 explicit ScHTMLQueryParser( EditEngine
* pEditEngine
, ScDocument
* pDoc
);
582 virtual ~ScHTMLQueryParser() override
;
584 virtual ErrCode
Read( SvStream
& rStrm
, const OUString
& rBaseURL
) override
;
586 /** Returns the "global table" which contains the entire HTML document. */
587 virtual const ScHTMLTable
* GetGlobalTable() const override
;
590 /** Handles all possible tags in the HTML document. */
591 void ProcessToken( const HtmlImportInfo
& rInfo
);
592 /** Inserts a text portion into current entry. */
593 void InsertText( const HtmlImportInfo
& rInfo
);
594 /** Processes the <font> tag. */
595 void FontOn( const HtmlImportInfo
& rInfo
);
597 /** Processes the <meta> tag. */
598 void MetaOn( const HtmlImportInfo
& rInfo
);
599 /** Opens the title of the HTML document (<title> tag). */
601 /** Closes the title of the HTML document (</title> tag). */
602 void TitleOff( const HtmlImportInfo
& rInfo
);
604 /** Opens a new table at the current position. */
605 void TableOn( const HtmlImportInfo
& rInfo
);
606 /** Closes the current table. */
607 void TableOff( const HtmlImportInfo
& rInfo
);
608 /** Opens a new table based on preformatted text. */
609 void PreOn( const HtmlImportInfo
& rInfo
);
610 /** Closes the current preformatted text table. */
611 void PreOff( const HtmlImportInfo
& rInfo
);
613 /** Closes the current table, regardless on opening tag. */
614 void CloseTable( const HtmlImportInfo
& rInfo
);
616 void ParseStyle(std::u16string_view rStrm
);
618 DECL_LINK( HTMLImportHdl
, HtmlImportInfo
&, void );
621 typedef ::std::unique_ptr
< ScHTMLGlobalTable
> ScHTMLGlobalTablePtr
;
623 OUStringBuffer maTitle
; /// The title of the document.
624 ScHTMLGlobalTablePtr mxGlobTable
; /// Contains the entire imported document.
625 ScHTMLTable
* mpCurrTable
; /// Pointer to current table (performance).
626 ScHTMLTableId mnUnusedId
; /// First unused table identifier.
627 bool mbTitleOn
; /// true = Inside of <title> </title>.
630 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */