1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_SC_SOURCE_FILTER_INC_HTMLPARS_HXX
21 #define INCLUDED_SC_SOURCE_FILTER_INC_HTMLPARS_HXX
27 #include <unordered_map>
29 #include <o3tl/sorted_vector.hxx>
30 #include <boost/ptr_container/ptr_map.hpp>
32 #include "rangelst.hxx"
33 #include "eeparser.hxx"
35 const sal_uInt32 SC_HTML_FONTSIZES
= 7; // wie Export, HTML-Options
37 // Pixel tolerance for SeekOffset and related.
38 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL
= 1; // single table
39 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE
= 10; // nested
41 // BASE class for HTML parser classes
46 * Collection of HTML style data parsed from the content of <style>
51 typedef std::unordered_map
<OUString
, OUString
, OUStringHash
> PropsType
;
52 typedef ::boost::ptr_map
<OUString
, PropsType
> NamePropsType
;
53 typedef ::boost::ptr_map
<OUString
, NamePropsType
> ElemsType
;
55 NamePropsType maGlobalProps
; /// global properties (for a given class for all elements)
56 NamePropsType maElemGlobalProps
; /// element global properties (no class specified)
57 ElemsType maElemProps
; /// element to class to properties (both element and class are given)
58 const OUString maEmpty
; /// just a persistent empty string.
62 void add(const char* pElemName
, size_t nElemName
, const char* pClassName
, size_t nClassName
,
63 const OUString
& aProp
, const OUString
& aValue
);
66 * Find best-matching property value for given element and class names.
68 const OUString
& getPropertyValue(
69 const OUString
& rElem
, const OUString
& rClass
, const OUString
& rPropName
) const;
72 static void insertProp(
73 NamePropsType
& rProps
, const OUString
& aName
,
74 const OUString
& aProp
, const OUString
& aValue
);
77 /** Base class for HTML parser classes. */
78 class ScHTMLParser
: public ScEEParser
80 ScHTMLStyles maStyles
;
82 sal_uInt32 maFontHeights
[ SC_HTML_FONTSIZES
];
83 ScDocument
* mpDoc
; /// The destination document.
86 explicit ScHTMLParser( EditEngine
* pEditEngine
, ScDocument
* pDoc
);
87 virtual ~ScHTMLParser();
89 virtual sal_uLong
Read( SvStream
& rStrm
, const OUString
& rBaseURL
) SAL_OVERRIDE
= 0;
91 ScHTMLStyles
& GetStyles() { return maStyles
;}
92 ScDocument
& GetDoc() { return *mpDoc
;}
94 /** Returns the "global table" which contains the entire HTML document. */
95 virtual const ScHTMLTable
* GetGlobalTable() const = 0;
98 typedef o3tl::sorted_vector
<sal_uLong
> ScHTMLColOffset
;
100 struct ScHTMLTableStackEntry
102 ScRangeListRef xLockedList
;
103 ScEEParseEntry
* pCellEntry
;
104 ScHTMLColOffset
* pLocalColOffset
;
105 sal_uLong nFirstTableCell
;
111 sal_uInt16 nTableWidth
;
112 sal_uInt16 nColOffset
;
113 sal_uInt16 nColOffsetStart
;
115 ScHTMLTableStackEntry( ScEEParseEntry
* pE
,
116 const ScRangeListRef
& rL
, ScHTMLColOffset
* pTO
,
118 SCCOL nCol
, SCROW nRow
,
119 SCCOL nStart
, SCCOL nMax
, sal_uInt16 nTab
,
120 sal_uInt16 nTW
, sal_uInt16 nCO
, sal_uInt16 nCOS
,
122 : xLockedList( rL
), pCellEntry( pE
),
123 pLocalColOffset( pTO
),
124 nFirstTableCell( nFTC
),
125 nColCnt( nCol
), nRowCnt( nRow
),
126 nColCntStart( nStart
), nMaxCol( nMax
),
127 nTable( nTab
), nTableWidth( nTW
),
128 nColOffset( nCO
), nColOffsetStart( nCOS
),
131 ~ScHTMLTableStackEntry() {}
133 typedef ::std::stack
< ScHTMLTableStackEntry
* > ScHTMLTableStack
;
135 struct ScHTMLAdjustStackEntry
140 ScHTMLAdjustStackEntry( SCCOL nLCol
, SCROW nNRow
,
142 : nLastCol( nLCol
), nNextRow( nNRow
),
146 typedef ::std::stack
< ScHTMLAdjustStackEntry
* > ScHTMLAdjustStack
;
152 // TODO these need better names
153 typedef ::std::map
<SCROW
, SCROW
> InnerMap
;
154 typedef ::std::map
<sal_uInt16
, InnerMap
*> OuterMap
;
156 class ScHTMLLayoutParser
: public ScHTMLParser
161 ScHTMLTableStack aTableStack
;
163 ScRangeListRef xLockedList
; // je Table
165 ScHTMLColOffset
* pColOffset
;
166 ScHTMLColOffset
* pLocalColOffset
; // je Table
167 sal_uLong nFirstTableCell
; // je Table
170 sal_uInt16 nMaxTable
;
171 SCCOL nColCntStart
; // erste Col je Table
172 SCCOL nMaxCol
; // je Table
173 sal_uInt16 nTableWidth
; // je Table
174 sal_uInt16 nColOffset
; // aktuell, Pixel
175 sal_uInt16 nColOffsetStart
; // Startwert je Table, in Pixel
176 sal_uInt16 nOffsetTolerance
; // for use with SeekOffset and related
177 bool bTabInTabCell
:1;
178 bool bFirstRow
:1; // je Table, ob in erster Zeile
182 DECL_LINK( HTMLImportHdl
, ImportInfo
* );
183 void NewActEntry( ScEEParseEntry
* );
184 static void EntryEnd( ScEEParseEntry
*, const ESelection
& );
185 void ProcToken( ImportInfo
* );
186 void CloseEntry( ImportInfo
* );
187 void NextRow( ImportInfo
* );
188 void SkipLocked( ScEEParseEntry
*, bool bJoin
= true );
189 static bool SeekOffset( ScHTMLColOffset
*, sal_uInt16 nOffset
,
190 SCCOL
* pCol
, sal_uInt16 nOffsetTol
);
191 static void MakeCol( ScHTMLColOffset
*, sal_uInt16
& nOffset
,
192 sal_uInt16
& nWidth
, sal_uInt16 nOffsetTol
,
193 sal_uInt16 nWidthTol
);
194 static void MakeColNoRef( ScHTMLColOffset
*, sal_uInt16 nOffset
,
195 sal_uInt16 nWidth
, sal_uInt16 nOffsetTol
,
196 sal_uInt16 nWidthTol
);
197 static void ModifyOffset( ScHTMLColOffset
*, sal_uInt16
& nOldOffset
,
198 sal_uInt16
& nNewOffset
, sal_uInt16 nOffsetTol
);
199 void Colonize( ScEEParseEntry
* );
200 sal_uInt16
GetWidth( ScEEParseEntry
* );
204 sal_uInt16
GetWidthPixel( const HTMLOption
& );
205 bool IsAtBeginningOfText( ImportInfo
* );
207 void TableOn( ImportInfo
* );
208 void ColOn( ImportInfo
* );
209 void TableRowOn( ImportInfo
* );
210 void TableRowOff( ImportInfo
* );
211 void TableDataOn( ImportInfo
* );
212 void TableDataOff( ImportInfo
* );
213 void TableOff( ImportInfo
* );
214 void Image( ImportInfo
* );
215 void AnchorOn( ImportInfo
* );
216 void FontOn( ImportInfo
* );
219 ScHTMLLayoutParser( EditEngine
*, const OUString
& rBaseURL
, const Size
& aPageSize
, ScDocument
* );
220 virtual ~ScHTMLLayoutParser();
221 virtual sal_uLong
Read( SvStream
&, const OUString
& rBaseURL
) SAL_OVERRIDE
;
222 virtual const ScHTMLTable
* GetGlobalTable() const SAL_OVERRIDE
;
225 // HTML DATA QUERY PARSER
227 /** Declares the orientation in or for a table: column or row. */
228 enum ScHTMLOrient
{ tdCol
= 0 , tdRow
= 1 };
230 /** Type for a unique identifier for each table. */
231 typedef sal_uInt16 ScHTMLTableId
;
232 /** Identifier of the "global table" (the entire HTML document). */
233 const ScHTMLTableId SC_HTML_GLOBAL_TABLE
= 0;
234 /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
235 const ScHTMLTableId SC_HTML_NO_TABLE
= 0;
237 /** A 2D cell position in an HTML table. */
243 inline explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
244 inline explicit ScHTMLPos( SCCOL nCol
, SCROW nRow
) :
245 mnCol( nCol
), mnRow( nRow
) {}
246 inline explicit ScHTMLPos( const ScAddress
& rAddr
) { Set( rAddr
); }
248 inline SCCOLROW
Get( ScHTMLOrient eOrient
) const
249 { return (eOrient
== tdCol
) ? mnCol
: mnRow
; }
250 inline void Set( SCCOL nCol
, SCROW nRow
)
251 { mnCol
= nCol
; mnRow
= nRow
; }
252 inline void Set( const ScAddress
& rAddr
)
253 { Set( rAddr
.Col(), rAddr
.Row() ); }
254 inline void Move( SCsCOL nColDiff
, SCsROW nRowDiff
)
255 { mnCol
= mnCol
+ nColDiff
; mnRow
= mnRow
+ nRowDiff
; }
256 inline ScAddress
MakeAddr() const
257 { return ScAddress( mnCol
, mnRow
, 0 ); }
260 inline bool operator==( const ScHTMLPos
& rPos1
, const ScHTMLPos
& rPos2
)
262 return (rPos1
.mnRow
== rPos2
.mnRow
) && (rPos1
.mnCol
== rPos2
.mnCol
);
265 inline bool operator<( const ScHTMLPos
& rPos1
, const ScHTMLPos
& rPos2
)
267 return (rPos1
.mnRow
< rPos2
.mnRow
) || ((rPos1
.mnRow
== rPos2
.mnRow
) && (rPos1
.mnCol
< rPos2
.mnCol
));
270 /** A 2D cell size in an HTML table. */
276 inline explicit ScHTMLSize() : mnCols( 0 ), mnRows( 0 ) {}
277 inline explicit ScHTMLSize( SCCOL nCols
, SCROW nRows
) :
278 mnCols( nCols
), mnRows( nRows
) {}
280 inline SCCOLROW
Get( ScHTMLOrient eOrient
) const
281 { return (eOrient
== tdCol
) ? mnCols
: mnRows
; }
282 inline void Set( SCCOL nCols
, SCROW nRows
)
283 { mnCols
= nCols
; mnRows
= nRows
; }
284 inline void Expand( SCsCOL nColDiff
, SCsROW nRowDiff
)
285 { mnCols
= mnCols
+ nColDiff
; mnRows
= mnRows
+ nRowDiff
; }
288 inline bool operator==( const ScHTMLSize
& rSize1
, const ScHTMLSize
& rSize2
)
290 return (rSize1
.mnRows
== rSize2
.mnRows
) && (rSize1
.mnCols
== rSize2
.mnCols
);
293 /** A single entry containing a line of text or representing a table. */
294 struct ScHTMLEntry
: public ScEEParseEntry
297 explicit ScHTMLEntry(
298 const SfxItemSet
& rItemSet
,
299 ScHTMLTableId nTableId
= SC_HTML_NO_TABLE
);
301 /** Returns true, if the selection of the entry is empty. */
302 inline bool IsEmpty() const { return !aSel
.HasRange(); }
303 /** Returns true, if the entry has any content to be imported. */
304 bool HasContents() const;
305 /** Returns true, if the entry represents a table. */
306 inline bool IsTable() const { return nTab
!= SC_HTML_NO_TABLE
; }
307 /** Returns true, if the entry represents a table. */
308 inline ScHTMLTableId
GetTableId() const { return nTab
; }
310 /** Sets or cleares the import always state. */
311 inline void SetImportAlways( bool bSet
= true ) { mbImportAlways
= bSet
; }
312 /** Sets start point of the entry selection to the start of the import info object. */
313 void AdjustStart( const ImportInfo
& rInfo
);
314 /** Sets end point of the entry selection to the end of the import info object. */
315 void AdjustEnd( const ImportInfo
& rInfo
);
316 /** Deletes leading and trailing empty paragraphs from the entry. */
317 void Strip( const EditEngine
& rEditEngine
);
319 /** Returns read/write access to the item set of this entry. */
320 inline SfxItemSet
& GetItemSet() { return aItemSet
; }
321 /** Returns read-only access to the item set of this entry. */
322 inline const SfxItemSet
& GetItemSet() const { return aItemSet
; }
325 bool mbImportAlways
; /// true = Always import this entry.
328 /** This struct handles creation of unique table identifiers. */
329 struct ScHTMLTableAutoId
331 const ScHTMLTableId mnTableId
; /// The created unique table identifier.
332 ScHTMLTableId
& mrnUnusedId
; /// Reference to global unused identifier variable.
334 /** The constructor assigns an unused identifier to member mnTableId. */
335 explicit ScHTMLTableAutoId( ScHTMLTableId
& rnUnusedId
);
338 class ScHTMLTableMap
;
340 /** Stores data for one table in an HTML document.
342 This class does the main work for importing an HTML document. It manages
343 the correct insertion of parse entries into the correct cells and the
344 creation of nested tables. Recalculation of resulting document size and
345 position is done recursively in all nested tables.
350 /** Creates a new HTML table without content.
351 @descr Internally handles a current cell position. This position is
352 invalid until first calls of RowOn() and DataOn().
353 @param rParentTable Reference to the parent table that owns this table.
354 @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
355 explicit ScHTMLTable(
356 ScHTMLTable
& rParentTable
,
357 const ImportInfo
& rInfo
,
360 virtual ~ScHTMLTable();
362 /** Returns the name of the table, specified in the TABLE tag. */
363 inline const OUString
& GetTableName() const { return maTableName
; }
364 /** Returns the unique identifier of the table. */
365 inline ScHTMLTableId
GetTableId() const { return maTableId
.mnTableId
; }
366 /** Returns the table size. */
367 inline const ScHTMLSize
& GetSize() const { return maSize
; }
368 /** Returns the cell spanning of the specified cell. */
369 ScHTMLSize
GetSpan( const ScHTMLPos
& rCellPos
) const;
371 /** Searches in all nested tables for the specified table.
372 @param nTableId Unique identifier of the table. */
373 ScHTMLTable
* FindNestedTable( ScHTMLTableId nTableId
) const;
375 /** Puts the item into the item set of the current entry. */
376 void PutItem( const SfxPoolItem
& rItem
);
377 /** Inserts a text portion into current entry. */
378 void PutText( const ImportInfo
& rInfo
);
379 /** Inserts a new line, if in preformatted text, else does nothing. */
380 void InsertPara( const ImportInfo
& rInfo
);
382 /** Inserts a line break (<br> tag).
383 @descr Inserts the current entry regardless if it is empty. */
385 /** Inserts a heading line (<p> and <h*> tags). */
387 /** Processes a hyperlink (<a> tag). */
390 /** Starts a *new* table nested in this table (<table> tag).
391 @return Pointer to the new table. */
392 ScHTMLTable
* TableOn( const ImportInfo
& rInfo
);
393 /** Closes *this* table (</table> tag).
394 @return Pointer to the parent table. */
395 ScHTMLTable
* TableOff( const ImportInfo
& rInfo
);
396 /** Starts a *new* table based on preformatted text (<pre> tag).
397 @return Pointer to the new table. */
398 ScHTMLTable
* PreOn( const ImportInfo
& rInfo
);
399 /** Closes *this* table based on preformatted text (</pre> tag).
400 @return Pointer to the parent table. */
401 ScHTMLTable
* PreOff( const ImportInfo
& rInfo
);
403 /** Starts next row (<tr> tag).
404 @descr Cell address is invalid until first call of DataOn(). */
405 void RowOn( const ImportInfo
& rInfo
);
406 /** Closes the current row (<tr> tag).
407 @descr Cell address is invalid until call of RowOn() and DataOn(). */
408 void RowOff( const ImportInfo
& rInfo
);
409 /** Starts the next cell (<td> or <th> tag). */
410 void DataOn( const ImportInfo
& rInfo
);
411 /** Closes the current cell (</td> or </th> tag).
412 @descr Cell address is invalid until next call of DataOn(). */
413 void DataOff( const ImportInfo
& rInfo
);
415 /** Starts the body of the HTML document (<body> tag). */
416 void BodyOn( const ImportInfo
& rInfo
);
417 /** Closes the body of the HTML document (</body> tag). */
418 void BodyOff( const ImportInfo
& rInfo
);
420 /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
421 @descr Used to close this table object regardless on opening tag type.
422 @return Pointer to the parent table, or this, if no parent found. */
423 ScHTMLTable
* CloseTable( const ImportInfo
& rInfo
);
425 /** Returns the resulting document row/column count of the specified HTML row/column. */
426 SCCOLROW
GetDocSize( ScHTMLOrient eOrient
, SCCOLROW nCellPos
) const;
427 /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
428 SCCOLROW
GetDocSize( ScHTMLOrient eOrient
, SCCOLROW nCellBegin
, SCCOLROW nCellEnd
) const;
429 /** Returns the total document row/column count in the specified direction. */
430 SCCOLROW
GetDocSize( ScHTMLOrient eOrient
) const;
431 /** Returns the total document row/column count of the specified HTML cell. */
432 ScHTMLSize
GetDocSize( const ScHTMLPos
& rCellPos
) const;
434 /** Returns the resulting Calc position of the top left edge of the table. */
435 inline const ScHTMLPos
& GetDocPos() const { return maDocBasePos
; }
436 /** Calculates the resulting Calc position of the specified HTML column/row. */
437 SCCOLROW
GetDocPos( ScHTMLOrient eOrient
, SCCOLROW nCellPos
= 0 ) const;
438 /** Calculates the resulting Calc position of the specified HTML cell. */
439 ScHTMLPos
GetDocPos( const ScHTMLPos
& rCellPos
) const;
441 /** Calculates the current Calc document area of this table. */
442 void GetDocRange( ScRange
& rRange
) const;
444 /** Applies border formatting to the passed document. */
445 void ApplyCellBorders( ScDocument
* pDoc
, const ScAddress
& rFirstPos
) const;
447 SvNumberFormatter
* GetFormatTable();
450 /** Creates a new HTML table without parent.
451 @descr This constructor is used to create the "global table". */
452 explicit ScHTMLTable(
454 EditEngine
& rEditEngine
,
455 ::std::vector
< ScEEParseEntry
* >& rEEParseList
,
456 ScHTMLTableId
& rnUnusedId
, ScHTMLParser
* pParser
);
458 /** Fills all empty cells in this and nested tables with dummy parse entries. */
459 void FillEmptyCells();
460 /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
461 void RecalcDocSize();
462 /** Recalculates the position of all cell entries and nested tables.
463 @param rBasePos The origin of the table in the Calc document. */
464 void RecalcDocPos( const ScHTMLPos
& rBasePos
);
467 typedef ::std::unique_ptr
< ScHTMLTableMap
> ScHTMLTableMapPtr
;
468 typedef ::std::unique_ptr
< SfxItemSet
> SfxItemSetPtr
;
469 typedef ::std::vector
< SCCOLROW
> ScSizeVec
;
470 typedef ::std::list
< ScHTMLEntry
* > ScHTMLEntryList
;
471 typedef ::std::map
< ScHTMLPos
, ScHTMLEntryList
> ScHTMLEntryMap
;
472 typedef ::std::unique_ptr
< ScHTMLEntry
> ScHTMLEntryPtr
;
474 /** Returns true, if the current cell does not contain an entry yet. */
475 bool IsEmptyCell() const;
476 /** Returns the item set from cell, row, or table, depending on current state. */
477 const SfxItemSet
& GetCurrItemSet() const;
479 /** Returns true, if import info represents a space character. */
480 static bool IsSpaceCharInfo( const ImportInfo
& rInfo
);
482 /** Creates and returns a new empty flying entry at position (0,0). */
483 ScHTMLEntryPtr
CreateEntry() const;
484 /** Creates a new flying entry.
485 @param rInfo Contains the initial edit engine selection for the entry. */
486 void CreateNewEntry( const ImportInfo
& rInfo
);
488 /** Inserts an empty line in front of the next entry. */
489 void InsertLeadingEmptyLine();
491 /** Pushes the passed entry into the list of the current cell. */
492 void ImplPushEntryToList( ScHTMLEntryList
& rEntryList
, ScHTMLEntryPtr
& rxEntry
);
493 /** Tries to insert the entry into the current cell.
494 @descr If insertion is not possible (i.e., currently no cell open), the
495 entry will be inserted into the parent table.
496 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
497 bool PushEntry( ScHTMLEntryPtr
& rxEntry
);
498 /** Puts the current entry into the entry list, if it is not empty.
499 @param rInfo The import info struct containing the end position of the current entry.
500 @param bLastInCell true = If cell is still empty, put this entry always.
501 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
502 bool PushEntry( const ImportInfo
& rInfo
, bool bLastInCell
= false );
503 /** Pushes a new entry into current cell which references a nested table.
504 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
505 bool PushTableEntry( ScHTMLTableId nTableId
);
507 /** Tries to find a table from the table container.
508 @descr Assumes that the table is located in the current container or
509 that the passed table identifier is 0.
510 @param nTableId Unique identifier of the table or 0. */
511 ScHTMLTable
* GetExistingTable( ScHTMLTableId nTableId
) const;
512 /** Inserts a nested table in the current cell at the specified position.
513 @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
514 ScHTMLTable
* InsertNestedTable( const ImportInfo
& rInfo
, bool bPreFormText
);
516 /** Inserts a new cell in an unused position, starting from current cell position. */
517 void InsertNewCell( const ScHTMLSize
& rSpanSize
);
519 /** Set internal states for a new table row. */
521 /** Set internal states for leaving a table row. */
523 /** Set internal states for entering a new table cell. */
524 void ImplDataOn( const ScHTMLSize
& rSpanSize
);
525 /** Set internal states for leaving a table cell. */
528 /** Inserts additional formatting options from import info into the item set. */
529 static void ProcessFormatOptions( SfxItemSet
& rItemSet
, const ImportInfo
& rInfo
);
531 /** Updates the document column/row size of the specified column or row.
532 @descr Only increases the present count, never decreases. */
533 void SetDocSize( ScHTMLOrient eOrient
, SCCOLROW nCellPos
, SCCOLROW nSize
);
534 /** Calculates and sets the resulting size the cell needs in the document.
535 @descr Reduces the needed size in merged cells.
536 @param nCellPos The first column/row position of the (merged) cell.
537 @param nCellSpan The cell spanning in the specified orientation.
538 @param nRealDocSize The raw document size of all entries of the cell. */
539 void CalcNeededDocSize(
540 ScHTMLOrient eOrient
, SCCOLROW nCellPos
,
541 SCCOLROW nCellSpan
, SCCOLROW nRealDocSize
);
544 ScHTMLTable
* mpParentTable
; /// Pointer to parent table.
545 ScHTMLTableMapPtr mxNestedTables
; /// Table of nested HTML tables.
546 OUString maTableName
; /// Table name from <table id> option.
547 ScHTMLTableAutoId maTableId
; /// Unique identifier of this table.
548 SfxItemSet maTableItemSet
; /// Items for the entire table.
549 SfxItemSetPtr mxRowItemSet
; /// Items for the current table row.
550 SfxItemSetPtr mxDataItemSet
; /// Items for the current cell.
551 ScRangeList maHMergedCells
; /// List of all horizontally merged cells.
552 ScRangeList maVMergedCells
; /// List of all vertically merged cells.
553 ScRangeList maUsedCells
; /// List of all used cells.
554 EditEngine
& mrEditEngine
; /// Edit engine (from ScEEParser).
555 ::std::vector
< ScEEParseEntry
* >& mrEEParseList
; /// List that owns the parse entries (from ScEEParser).
556 ScHTMLEntryMap maEntryMap
; /// List of entries for each cell.
557 ScHTMLEntryList
* mpCurrEntryList
; /// Current entry list from map for faster access.
558 ScHTMLEntryPtr mxCurrEntry
; /// Working entry, not yet inserted in a list.
559 ScSizeVec maCumSizes
[ 2 ]; /// Cumulated cell counts for each HTML table column/row.
560 ScHTMLSize maSize
; /// Size of the table.
561 ScHTMLPos maCurrCell
; /// Address of current cell to fill.
562 ScHTMLPos maDocBasePos
; /// Resulting base address in a Calc document.
563 ScHTMLParser
* mpParser
;
564 bool mbBorderOn
:1; /// true = Table borders on.
565 bool mbPreFormText
:1; /// true = Table from preformatted text (<pre> tag).
566 bool mbRowOn
:1; /// true = Inside of <tr> </tr>.
567 bool mbDataOn
:1; /// true = Inside of <td> </td> or <th> </th>.
568 bool mbPushEmptyLine
:1; /// true = Insert empty line before current entry.
571 /** The "global table" representing the entire HTML document. */
572 class ScHTMLGlobalTable
: public ScHTMLTable
575 explicit ScHTMLGlobalTable(
577 EditEngine
& rEditEngine
,
578 ::std::vector
< ScEEParseEntry
* >& rEEParseList
,
579 ScHTMLTableId
& rnUnusedId
, ScHTMLParser
* pParser
);
581 virtual ~ScHTMLGlobalTable();
583 /** Recalculates sizes and resulting positions of all document entries. */
587 /** The HTML parser for data queries. Focuses on data import, not on layout.
589 Builds the table structure correctly, ignores extended formatting like
590 pictures or column widths.
592 class ScHTMLQueryParser
: public ScHTMLParser
595 explicit ScHTMLQueryParser( EditEngine
* pEditEngine
, ScDocument
* pDoc
);
596 virtual ~ScHTMLQueryParser();
598 virtual sal_uLong
Read( SvStream
& rStrm
, const OUString
& rBaseURL
) SAL_OVERRIDE
;
600 /** Returns the "global table" which contains the entire HTML document. */
601 virtual const ScHTMLTable
* GetGlobalTable() const SAL_OVERRIDE
;
604 /** Handles all possible tags in the HTML document. */
605 void ProcessToken( const ImportInfo
& rInfo
);
606 /** Inserts a text portion into current entry. */
607 void InsertText( const ImportInfo
& rInfo
);
608 /** Processes the <font> tag. */
609 void FontOn( const ImportInfo
& rInfo
);
611 /** Processes the <meta> tag. */
612 void MetaOn( const ImportInfo
& rInfo
);
613 /** Opens the title of the HTML document (<title> tag). */
614 void TitleOn( const ImportInfo
& rInfo
);
615 /** Closes the title of the HTML document (</title> tag). */
616 void TitleOff( const ImportInfo
& rInfo
);
618 /** Opens a new table at the current position. */
619 void TableOn( const ImportInfo
& rInfo
);
620 /** Closes the current table. */
621 void TableOff( const ImportInfo
& rInfo
);
622 /** Opens a new table based on preformatted text. */
623 void PreOn( const ImportInfo
& rInfo
);
624 /** Closes the current preformatted text table. */
625 void PreOff( const ImportInfo
& rInfo
);
627 /** Closes the current table, regardless on opening tag. */
628 void CloseTable( const ImportInfo
& rInfo
);
630 void ParseStyle(const OUString
& rStrm
);
632 DECL_LINK( HTMLImportHdl
, const ImportInfo
* );
635 typedef ::std::unique_ptr
< ScHTMLGlobalTable
> ScHTMLGlobalTablePtr
;
637 OUStringBuffer maTitle
; /// The title of the document.
638 ScHTMLGlobalTablePtr mxGlobTable
; /// Contains the entire imported document.
639 ScHTMLTable
* mpCurrTable
; /// Pointer to current table (performance).
640 ScHTMLTableId mnUnusedId
; /// First unused table identifier.
641 bool mbTitleOn
; /// true = Inside of <title> </title>.
646 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */