1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
26 #include <string_view>
27 #include <unordered_map>
30 #include <o3tl/sorted_vector.hxx>
31 #include <svtools/parhtml.hxx>
33 #include <rangelst.hxx>
34 #include "eeparser.hxx"
36 const sal_uInt32 SC_HTML_FONTSIZES
= 7; // like export, HTML options
38 // Pixel tolerance for SeekOffset and related.
39 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL
= 1; // single table
40 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE
= 10; // nested
42 // BASE class for HTML parser classes
47 * Collection of HTML style data parsed from the content of <style>
52 typedef std::unordered_map
<OUString
, OUString
> PropsType
;
53 typedef ::std::map
<OUString
, PropsType
> NamePropsType
;
54 typedef ::std::map
<OUString
, NamePropsType
> ElemsType
;
56 NamePropsType m_GlobalProps
; /// global properties (for a given class for all elements)
57 NamePropsType m_ElemGlobalProps
; /// element global properties (no class specified)
58 ElemsType m_ElemProps
; /// element to class to properties (both element and class are given)
59 const OUString maEmpty
; /// just a persistent empty string.
63 void add(const char* pElemName
, size_t nElemName
, const char* pClassName
, size_t nClassName
,
64 const OUString
& aProp
, const OUString
& aValue
);
67 * Find best-matching property value for given element and class names.
69 const OUString
& getPropertyValue(
70 const OUString
& rElem
, const OUString
& rClass
, const OUString
& rPropName
) const;
73 static void insertProp(
74 NamePropsType
& rProps
, const OUString
& aName
,
75 const OUString
& aProp
, const OUString
& aValue
);
78 /** Base class for HTML parser classes. */
79 class ScHTMLParser
: public ScEEParser
81 ScHTMLStyles maStyles
;
83 sal_uInt32 maFontHeights
[ SC_HTML_FONTSIZES
];
84 ScDocument
* mpDoc
; /// The destination document.
87 explicit ScHTMLParser( EditEngine
* pEditEngine
, ScDocument
* pDoc
);
88 virtual ~ScHTMLParser() override
;
90 virtual ErrCode
Read( SvStream
& rStrm
, const OUString
& rBaseURL
) override
= 0;
92 ScHTMLStyles
& GetStyles() { return maStyles
;}
93 ScDocument
& GetDoc() { return *mpDoc
;}
95 /** Returns the "global table" which contains the entire HTML document. */
96 virtual const ScHTMLTable
* GetGlobalTable() const = 0;
99 typedef o3tl::sorted_vector
<sal_uLong
> ScHTMLColOffset
;
101 struct ScHTMLTableStackEntry
103 ScRangeListRef xLockedList
;
104 std::shared_ptr
<ScEEParseEntry
> xCellEntry
;
105 std::shared_ptr
<ScHTMLColOffset
> xLocalColOffset
;
106 sal_uLong nFirstTableCell
;
111 sal_uInt16 nTableWidth
;
112 sal_uInt16 nColOffset
;
113 sal_uInt16 nColOffsetStart
;
115 ScHTMLTableStackEntry( std::shared_ptr
<ScEEParseEntry
> xE
,
116 ScRangeListRef xL
, std::shared_ptr
<ScHTMLColOffset
> xTO
,
119 SCCOL nStart
, SCCOL nMax
, sal_uInt16 nTab
,
120 sal_uInt16 nTW
, sal_uInt16 nCO
, sal_uInt16 nCOS
,
122 : xLockedList(std::move( xL
)), xCellEntry(std::move(xE
)),
123 xLocalColOffset( std::move(xTO
) ),
124 nFirstTableCell( nFTC
),
126 nColCntStart( nStart
), nMaxCol( nMax
),
127 nTable( nTab
), nTableWidth( nTW
),
128 nColOffset( nCO
), nColOffsetStart( nCOS
),
133 struct ScHTMLAdjustStackEntry
138 ScHTMLAdjustStackEntry( SCCOL nLCol
, SCROW nNRow
,
140 : nLastCol( nLCol
), nNextRow( nNRow
),
149 // TODO these need better names
150 typedef ::std::map
<SCROW
, SCROW
> InnerMap
;
151 typedef ::std::map
<sal_uInt16
, std::unique_ptr
<InnerMap
>> OuterMap
;
153 /// HTML parser used during paste into Calc.
154 class ScHTMLLayoutParser
: public ScHTMLParser
159 ::std::stack
< std::unique_ptr
<ScHTMLTableStackEntry
> >
162 ScRangeListRef xLockedList
; // per table
163 std::unique_ptr
<OuterMap
> pTables
;
164 ScHTMLColOffset maColOffset
;
165 std::shared_ptr
<ScHTMLColOffset
> xLocalColOffset
; // per table
166 sal_uLong nFirstTableCell
; // per table
169 sal_uInt16 nMaxTable
;
170 SCCOL nColCntStart
; // first Col per table
171 SCCOL nMaxCol
; // per table
172 sal_uInt16 nTableWidth
; // per table
173 sal_uInt16 nColOffset
; // current, pixel
174 sal_uInt16 nColOffsetStart
; // start value per table, in pixel
175 sal_uInt16 nOffsetTolerance
; // for use with SeekOffset and related
176 bool bFirstRow
; // per table, whether in first row
177 bool bTabInTabCell
:1;
181 DECL_LINK( HTMLImportHdl
, HtmlImportInfo
&, void );
182 void NewActEntry( const ScEEParseEntry
* );
183 static void EntryEnd( ScEEParseEntry
*, const ESelection
& );
184 void ProcToken( HtmlImportInfo
* );
185 void CloseEntry( const HtmlImportInfo
* );
186 void NextRow( const HtmlImportInfo
* );
187 void SkipLocked( ScEEParseEntry
*, bool bJoin
= true );
188 static bool SeekOffset( const ScHTMLColOffset
*, sal_uInt16 nOffset
,
189 SCCOL
* pCol
, sal_uInt16 nOffsetTol
);
190 static void MakeCol( ScHTMLColOffset
*, sal_uInt16
& nOffset
,
191 sal_uInt16
& nWidth
, sal_uInt16 nOffsetTol
,
192 sal_uInt16 nWidthTol
);
193 static void MakeColNoRef( ScHTMLColOffset
*, sal_uInt16 nOffset
,
194 sal_uInt16 nWidth
, sal_uInt16 nOffsetTol
,
195 sal_uInt16 nWidthTol
);
196 static void ModifyOffset( ScHTMLColOffset
*, sal_uInt16
& nOldOffset
,
197 sal_uInt16
& nNewOffset
, sal_uInt16 nOffsetTol
);
198 void Colonize( ScEEParseEntry
* );
199 sal_uInt16
GetWidth( const ScEEParseEntry
* );
203 sal_uInt16
GetWidthPixel( const HTMLOption
& );
204 bool IsAtBeginningOfText( const HtmlImportInfo
* );
206 void TableOn( HtmlImportInfo
* );
207 void ColOn( HtmlImportInfo
* );
208 void TableRowOn( const HtmlImportInfo
* );
209 void TableRowOff( const HtmlImportInfo
* );
210 void TableDataOn( HtmlImportInfo
* );
211 void TableDataOff( const HtmlImportInfo
* );
212 void TableOff( const HtmlImportInfo
* );
213 void Image( HtmlImportInfo
* );
214 void AnchorOn( HtmlImportInfo
* );
215 void FontOn( HtmlImportInfo
* );
216 void SpanOn(HtmlImportInfo
* pInfo
);
217 /// Handles the various data-sheets-* attributes on <td> and <span>.
218 void HandleDataSheetsAttributes(const HTMLOptions
& rOptions
);
221 ScHTMLLayoutParser( EditEngine
*, OUString aBaseURL
, const Size
& aPageSize
, ScDocument
* );
222 virtual ~ScHTMLLayoutParser() override
;
223 virtual ErrCode
Read( SvStream
&, const OUString
& rBaseURL
) override
;
224 virtual const ScHTMLTable
* GetGlobalTable() const override
;
227 // HTML DATA QUERY PARSER
229 /** Declares the orientation in or for a table: column or row. */
230 enum ScHTMLOrient
{ tdCol
= 0 , tdRow
= 1 };
232 /** Type for a unique identifier for each table. */
233 typedef sal_uInt16 ScHTMLTableId
;
234 /** Identifier of the "global table" (the entire HTML document). */
235 const ScHTMLTableId SC_HTML_GLOBAL_TABLE
= 0;
236 /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
237 const ScHTMLTableId SC_HTML_NO_TABLE
= 0;
239 /** A 2D cell position in an HTML table. */
245 explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
246 explicit ScHTMLPos( SCCOL nCol
, SCROW nRow
) :
247 mnCol( nCol
), mnRow( nRow
) {}
248 explicit ScHTMLPos( const ScAddress
& rAddr
) { Set( rAddr
); }
250 SCCOLROW
Get( ScHTMLOrient eOrient
) const
251 { return (eOrient
== tdCol
) ? mnCol
: mnRow
; }
252 void Set( SCCOL nCol
, SCROW nRow
)
253 { mnCol
= nCol
; mnRow
= nRow
; }
254 void Set( const ScAddress
& rAddr
)
255 { Set( rAddr
.Col(), rAddr
.Row() ); }
256 ScAddress
MakeAddr() const
257 { return ScAddress( mnCol
, mnRow
, 0 ); }
260 inline bool operator<( const ScHTMLPos
& rPos1
, const ScHTMLPos
& rPos2
)
262 return (rPos1
.mnRow
< rPos2
.mnRow
) || ((rPos1
.mnRow
== rPos2
.mnRow
) && (rPos1
.mnCol
< rPos2
.mnCol
));
265 /** A 2D cell size in an HTML table. */
271 explicit ScHTMLSize( SCCOL nCols
, SCROW nRows
) :
272 mnCols( nCols
), mnRows( nRows
) {}
273 void Set( SCCOL nCols
, SCROW nRows
)
274 { mnCols
= nCols
; mnRows
= nRows
; }
277 /** A single entry containing a line of text or representing a table. */
278 struct ScHTMLEntry
: public ScEEParseEntry
281 explicit ScHTMLEntry(
282 const SfxItemSet
& rItemSet
,
283 ScHTMLTableId nTableId
= SC_HTML_NO_TABLE
);
285 /** Returns true, if the selection of the entry is empty. */
286 bool IsEmpty() const { return !aSel
.HasRange(); }
287 /** Returns true, if the entry has any content to be imported. */
288 bool HasContents() const;
289 /** Returns true, if the entry represents a table. */
290 bool IsTable() const { return nTab
!= SC_HTML_NO_TABLE
; }
291 /** Returns true, if the entry represents a table. */
292 ScHTMLTableId
GetTableId() const { return nTab
; }
294 /** Sets or clears the import always state. */
295 void SetImportAlways() { mbImportAlways
= true; }
296 /** Sets start point of the entry selection to the start of the import info object. */
297 void AdjustStart( const HtmlImportInfo
& rInfo
);
298 /** Sets end point of the entry selection to the end of the import info object. */
299 void AdjustEnd( const HtmlImportInfo
& rInfo
);
300 /** Deletes leading and trailing empty paragraphs from the entry. */
301 void Strip( const EditEngine
& rEditEngine
);
303 /** Returns read/write access to the item set of this entry. */
304 SfxItemSet
& GetItemSet() { return aItemSet
; }
305 /** Returns read-only access to the item set of this entry. */
306 const SfxItemSet
& GetItemSet() const { return aItemSet
; }
309 bool mbImportAlways
; /// true = Always import this entry.
312 /** This struct handles creation of unique table identifiers. */
313 struct ScHTMLTableAutoId
315 const ScHTMLTableId mnTableId
; /// The created unique table identifier.
316 ScHTMLTableId
& mrnUnusedId
; /// Reference to global unused identifier variable.
318 /** The constructor assigns an unused identifier to member mnTableId. */
319 explicit ScHTMLTableAutoId( ScHTMLTableId
& rnUnusedId
);
322 class ScHTMLTableMap
;
324 /** Stores data for one table in an HTML document.
326 This class does the main work for importing an HTML document. It manages
327 the correct insertion of parse entries into the correct cells and the
328 creation of nested tables. Recalculation of resulting document size and
329 position is done recursively in all nested tables.
334 /** Creates a new HTML table without content.
335 @descr Internally handles a current cell position. This position is
336 invalid until first calls of RowOn() and DataOn().
337 @param rParentTable Reference to the parent table that owns this table.
338 @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
339 explicit ScHTMLTable(
340 ScHTMLTable
& rParentTable
,
341 const HtmlImportInfo
& rInfo
,
343 const ScDocument
& rDoc
);
345 virtual ~ScHTMLTable();
347 /** Returns the name of the table, specified in the TABLE tag. */
348 const OUString
& GetTableName() const { return maTableName
; }
349 /** Returns the caption of the table, specified in the <caption> tag. */
350 const OUString
& GetTableCaption() const { return maCaption
; }
351 /** Returns the unique identifier of the table. */
352 ScHTMLTableId
GetTableId() const { return maTableId
.mnTableId
; }
353 /** Returns the cell spanning of the specified cell. */
354 ScHTMLSize
GetSpan( const ScHTMLPos
& rCellPos
) const;
356 /** Searches in all nested tables for the specified table.
357 @param nTableId Unique identifier of the table. */
358 ScHTMLTable
* FindNestedTable( ScHTMLTableId nTableId
) const;
360 /** Puts the item into the item set of the current entry. */
361 void PutItem( const SfxPoolItem
& rItem
);
362 /** Inserts a text portion into current entry. */
363 void PutText( const HtmlImportInfo
& rInfo
);
364 /** Inserts a new line, if in preformatted text, else does nothing. */
365 void InsertPara( const HtmlImportInfo
& rInfo
);
367 /** Inserts a line break (<br> tag).
368 @descr Inserts the current entry regardless if it is empty. */
370 /** Inserts a heading line (<p> and <h*> tags). */
372 /** Processes a hyperlink (<a> tag). */
375 /** Starts a *new* table nested in this table (<table> tag).
376 @return Pointer to the new table. */
377 ScHTMLTable
* TableOn( const HtmlImportInfo
& rInfo
);
378 /** Closes *this* table (</table> tag).
379 @return Pointer to the parent table. */
380 ScHTMLTable
* TableOff( const HtmlImportInfo
& rInfo
);
381 /** Processes the caption of the table (<caption> tag). */
383 /** Processes the caption of the table (</caption> tag). */
385 /** Starts a *new* table based on preformatted text (<pre> tag).
386 @return Pointer to the new table. */
387 ScHTMLTable
* PreOn( const HtmlImportInfo
& rInfo
);
388 /** Closes *this* table based on preformatted text (</pre> tag).
389 @return Pointer to the parent table. */
390 ScHTMLTable
* PreOff( const HtmlImportInfo
& rInfo
);
392 /** Starts next row (<tr> tag).
393 @descr Cell address is invalid until first call of DataOn(). */
394 void RowOn( const HtmlImportInfo
& rInfo
);
395 /** Closes the current row (<tr> tag).
396 @descr Cell address is invalid until call of RowOn() and DataOn(). */
397 void RowOff( const HtmlImportInfo
& rInfo
);
398 /** Starts the next cell (<td> or <th> tag). */
399 void DataOn( const HtmlImportInfo
& rInfo
);
400 /** Closes the current cell (</td> or </th> tag).
401 @descr Cell address is invalid until next call of DataOn(). */
402 void DataOff( const HtmlImportInfo
& rInfo
);
404 /** Starts the body of the HTML document (<body> tag). */
405 void BodyOn( const HtmlImportInfo
& rInfo
);
406 /** Closes the body of the HTML document (</body> tag). */
407 void BodyOff( const HtmlImportInfo
& rInfo
);
409 /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
410 @descr Used to close this table object regardless on opening tag type.
411 @return Pointer to the parent table, or this, if no parent found. */
412 ScHTMLTable
* CloseTable( const HtmlImportInfo
& rInfo
);
414 /** Returns the resulting document row/column count of the specified HTML row/column. */
415 SCCOLROW
GetDocSize( ScHTMLOrient eOrient
, SCCOLROW nCellPos
) const;
416 /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
417 SCCOLROW
GetDocSize( ScHTMLOrient eOrient
, SCCOLROW nCellBegin
, SCCOLROW nCellEnd
) const;
418 /** Returns the total document row/column count in the specified direction. */
419 SCCOLROW
GetDocSize( ScHTMLOrient eOrient
) const;
420 /** Returns the total document row/column count of the specified HTML cell. */
421 ScHTMLSize
GetDocSize( const ScHTMLPos
& rCellPos
) const;
423 /** Returns the resulting Calc position of the top left edge of the table. */
424 const ScHTMLPos
& GetDocPos() const { return maDocBasePos
; }
425 /** Calculates the resulting Calc position of the specified HTML column/row. */
426 SCCOLROW
GetDocPos( ScHTMLOrient eOrient
, SCCOLROW nCellPos
) const;
427 /** Calculates the resulting Calc position of the specified HTML cell. */
428 ScHTMLPos
GetDocPos( const ScHTMLPos
& rCellPos
) const;
430 /** Calculates the current Calc document area of this table. */
431 void GetDocRange( ScRange
& rRange
) const;
433 /** Applies border formatting to the passed document. */
434 void ApplyCellBorders( ScDocument
* pDoc
, const ScAddress
& rFirstPos
) const;
436 SvNumberFormatter
* GetFormatTable();
439 /** Creates a new HTML table without parent.
440 @descr This constructor is used to create the "global table". */
441 explicit ScHTMLTable(
443 EditEngine
& rEditEngine
,
444 std::vector
<std::shared_ptr
<ScEEParseEntry
>>& rEEParseList
,
445 ScHTMLTableId
& rnUnusedId
, ScHTMLParser
* pParser
,
446 const ScDocument
& rDoc
);
448 /** Fills all empty cells in this and nested tables with dummy parse entries. */
449 void FillEmptyCells();
450 /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
451 void RecalcDocSize();
452 /** Recalculates the position of all cell entries and nested tables.
453 @param rBasePos The origin of the table in the Calc document. */
454 void RecalcDocPos( const ScHTMLPos
& rBasePos
);
457 typedef ::std::unique_ptr
< ScHTMLTableMap
> ScHTMLTableMapPtr
;
458 typedef ::std::vector
< SCCOLROW
> ScSizeVec
;
459 typedef ::std::vector
< ScHTMLEntry
* > ScHTMLEntryVector
;
460 typedef ::std::unique_ptr
< ScHTMLEntry
> ScHTMLEntryPtr
;
462 /** Returns true, if the current cell does not contain an entry yet. */
463 bool IsEmptyCell() const;
464 /** Returns the item set from cell, row, or table, depending on current state. */
465 const SfxItemSet
& GetCurrItemSet() const;
467 /** Returns true, if import info represents a space character. */
468 static bool IsSpaceCharInfo( const HtmlImportInfo
& rInfo
);
470 /** Creates and returns a new empty flying entry at position (0,0). */
471 ScHTMLEntryPtr
CreateEntry() const;
472 /** Creates a new flying entry.
473 @param rInfo Contains the initial edit engine selection for the entry. */
474 void CreateNewEntry( const HtmlImportInfo
& rInfo
);
476 /** Inserts an empty line in front of the next entry. */
477 void InsertLeadingEmptyLine();
479 /** Pushes the passed entry into the list of the current cell. */
480 void ImplPushEntryToVector( ScHTMLEntryVector
& rEntryVector
, ScHTMLEntryPtr
& rxEntry
);
481 /** Tries to insert the entry into the current cell.
482 @descr If insertion is not possible (i.e., currently no cell open), the
483 entry will be inserted into the parent table.
484 @return true = Entry has been pushed into the current cell; false = Entry dropped. */
485 bool PushEntry( ScHTMLEntryPtr
& rxEntry
);
486 /** Puts the current entry into the entry list, if it is not empty.
487 @param rInfo The import info struct containing the end position of the current entry.
488 @param bLastInCell true = If cell is still empty, put this entry always.
489 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
490 bool PushEntry( const HtmlImportInfo
& rInfo
, bool bLastInCell
= false );
491 /** Pushes a new entry into current cell which references a nested table.*/
492 void PushTableEntry( ScHTMLTableId nTableId
);
494 /** Tries to find a table from the table container.
495 @descr Assumes that the table is located in the current container or
496 that the passed table identifier is 0.
497 @param nTableId Unique identifier of the table or 0. */
498 ScHTMLTable
* GetExistingTable( ScHTMLTableId nTableId
) const;
499 /** Inserts a nested table in the current cell at the specified position.
500 @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
501 ScHTMLTable
* InsertNestedTable( const HtmlImportInfo
& rInfo
, bool bPreFormText
);
503 /** Inserts a new cell in an unused position, starting from current cell position. */
504 void InsertNewCell( const ScHTMLSize
& rSpanSize
);
506 /** Set internal states for a new table row. */
508 /** Set internal states for leaving a table row. */
510 /** Set internal states for entering a new table cell. */
511 void ImplDataOn( const ScHTMLSize
& rSpanSize
);
512 /** Set internal states for leaving a table cell. */
515 /** Inserts additional formatting options from import info into the item set. */
516 static void ProcessFormatOptions( SfxItemSet
& rItemSet
, const HtmlImportInfo
& rInfo
);
518 /** Updates the document column/row size of the specified column or row.
519 @descr Only increases the present count, never decreases. */
520 void SetDocSize( ScHTMLOrient eOrient
, SCCOLROW nCellPos
, SCCOLROW nSize
);
521 /** Calculates and sets the resulting size the cell needs in the document.
522 @descr Reduces the needed size in merged cells.
523 @param nCellPos The first column/row position of the (merged) cell.
524 @param nCellSpan The cell spanning in the specified orientation.
525 @param nRealDocSize The raw document size of all entries of the cell. */
526 void CalcNeededDocSize(
527 ScHTMLOrient eOrient
, SCCOLROW nCellPos
,
528 SCCOLROW nCellSpan
, SCCOLROW nRealDocSize
);
531 ScHTMLTable
* mpParentTable
; /// Pointer to parent table.
532 ScHTMLTableMapPtr mxNestedTables
; /// Table of nested HTML tables.
533 OUString maTableName
; /// Table name from <table id> option.
534 OUString maCaption
; /// Caption name of the table from <caption> </caption>
535 OUStringBuffer maCaptionBuffer
; /// Caption buffer of the table from <caption> </caption>
536 ScHTMLTableAutoId maTableId
; /// Unique identifier of this table.
537 SfxItemSet maTableItemSet
; /// Items for the entire table.
538 std::optional
<SfxItemSet
> moRowItemSet
; /// Items for the current table row.
539 std::optional
<SfxItemSet
> moDataItemSet
; /// Items for the current cell.
540 ScRangeList maHMergedCells
; /// List of all horizontally merged cells.
541 ScRangeList maVMergedCells
; /// List of all vertically merged cells.
542 ScRangeList maUsedCells
; /// List of all used cells.
543 EditEngine
& mrEditEngine
; /// Edit engine (from ScEEParser).
544 std::vector
<std::shared_ptr
<ScEEParseEntry
>>& mrEEParseList
; /// List that owns the parse entries (from ScEEParser).
545 std::map
< ScHTMLPos
, ScHTMLEntryVector
> maEntryMap
; /// List of entries for each cell.
546 ScHTMLEntryVector
* mpCurrEntryVector
; /// Current entry vector from map for faster access.
547 ScHTMLEntryPtr mxCurrEntry
; /// Working entry, not yet inserted in a list.
548 ScSizeVec maCumSizes
[ 2 ]; /// Cumulated cell counts for each HTML table column/row.
549 ScHTMLSize maSize
; /// Size of the table.
550 ScHTMLPos maCurrCell
; /// Address of current cell to fill.
551 ScHTMLPos maDocBasePos
; /// Resulting base address in a Calc document.
552 ScHTMLParser
* mpParser
;
553 const ScDocument
& mrDoc
;
554 bool mbBorderOn
:1; /// true = Table borders on.
555 bool mbPreFormText
:1; /// true = Table from preformatted text (<pre> tag).
556 bool mbRowOn
:1; /// true = Inside of <tr> </tr>.
557 bool mbDataOn
:1; /// true = Inside of <td> </td> or <th> </th>.
558 bool mbPushEmptyLine
:1; /// true = Insert empty line before current entry.
559 bool mbCaptionOn
:1; /// true = Inside of <caption> </caption>
562 /** The "global table" representing the entire HTML document. */
563 class ScHTMLGlobalTable
: public ScHTMLTable
566 explicit ScHTMLGlobalTable(
568 EditEngine
& rEditEngine
,
569 std::vector
<std::shared_ptr
<ScEEParseEntry
>>& rEEParseList
,
570 ScHTMLTableId
& rnUnusedId
, ScHTMLParser
* pParser
,
571 const ScDocument
& rDoc
);
573 virtual ~ScHTMLGlobalTable() override
;
575 /** Recalculates sizes and resulting positions of all document entries. */
579 /** The HTML parser for data queries. Focuses on data import, not on layout.
581 Builds the table structure correctly, ignores extended formatting like
582 pictures or column widths.
584 Used during file load / import into Calc.
586 class ScHTMLQueryParser
: public ScHTMLParser
589 explicit ScHTMLQueryParser( EditEngine
* pEditEngine
, ScDocument
* pDoc
);
590 virtual ~ScHTMLQueryParser() override
;
592 virtual ErrCode
Read( SvStream
& rStrm
, const OUString
& rBaseURL
) override
;
594 /** Returns the "global table" which contains the entire HTML document. */
595 virtual const ScHTMLTable
* GetGlobalTable() const override
;
598 /** Handles all possible tags in the HTML document. */
599 void ProcessToken( const HtmlImportInfo
& rInfo
);
600 /** Inserts a text portion into current entry. */
601 void InsertText( const HtmlImportInfo
& rInfo
);
602 /** Processes the <font> tag. */
603 void FontOn( const HtmlImportInfo
& rInfo
);
605 /** Processes the <meta> tag. */
606 void MetaOn( const HtmlImportInfo
& rInfo
);
607 /** Opens the title of the HTML document (<title> tag). */
609 /** Closes the title of the HTML document (</title> tag). */
610 void TitleOff( const HtmlImportInfo
& rInfo
);
612 /** Opens a new table at the current position. */
613 void TableOn( const HtmlImportInfo
& rInfo
);
614 /** Closes the current table. */
615 void TableOff( const HtmlImportInfo
& rInfo
);
616 /** Opens a new table based on preformatted text. */
617 void PreOn( const HtmlImportInfo
& rInfo
);
618 /** Closes the current preformatted text table. */
619 void PreOff( const HtmlImportInfo
& rInfo
);
621 /** Closes the current table, regardless on opening tag. */
622 void CloseTable( const HtmlImportInfo
& rInfo
);
624 void ParseStyle(std::u16string_view rStrm
);
626 DECL_LINK( HTMLImportHdl
, HtmlImportInfo
&, void );
629 typedef ::std::unique_ptr
< ScHTMLGlobalTable
> ScHTMLGlobalTablePtr
;
631 OUStringBuffer maTitle
; /// The title of the document.
632 ScHTMLGlobalTablePtr mxGlobTable
; /// Contains the entire imported document.
633 ScHTMLTable
* mpCurrTable
; /// Pointer to current table (performance).
634 ScHTMLTableId mnUnusedId
; /// First unused table identifier.
635 bool mbTitleOn
; /// true = Inside of <title> </title>.
638 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */