1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef SC_HTMLPARS_HXX
21 #define SC_HTMLPARS_HXX
28 #include <o3tl/sorted_vector.hxx>
29 #include <boost/ptr_container/ptr_map.hpp>
30 #include <boost/unordered_map.hpp>
32 #include "rangelst.hxx"
33 #include "eeparser.hxx"
35 const sal_uInt32 SC_HTML_FONTSIZES
= 7; // wie Export, HTML-Options
37 // Pixel tolerance for SeekOffset and related.
38 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL
= 1; // single table
39 const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE
= 10; // nested
41 // ============================================================================
42 // BASE class for HTML parser classes
43 // ============================================================================
48 * Collection of HTML style data parsed from the content of <style>
53 typedef ::boost::unordered_map
<OUString
, OUString
, OUStringHash
> PropsType
;
54 typedef ::boost::ptr_map
<OUString
, PropsType
> NamePropsType
;
55 typedef ::boost::ptr_map
<OUString
, NamePropsType
> ElemsType
;
57 NamePropsType maGlobalProps
; /// global properties (for a given class for all elements)
58 NamePropsType maElemGlobalProps
; /// element global properties (no class specified)
59 ElemsType maElemProps
; /// element to class to properties (both element and class are given)
60 const OUString maEmpty
; /// just a persistent empty string.
64 void add(const char* pElemName
, size_t nElemName
, const char* pClassName
, size_t nClassName
,
65 const OUString
& aProp
, const OUString
& aValue
);
68 * Find best-matching property value for given element and class names.
70 const OUString
& getPropertyValue(
71 const OUString
& rElem
, const OUString
& rClass
, const OUString
& rPropName
) const;
74 static void insertProp(
75 NamePropsType
& rProps
, const OUString
& aName
,
76 const OUString
& aProp
, const OUString
& aValue
);
79 /** Base class for HTML parser classes. */
80 class ScHTMLParser
: public ScEEParser
82 ScHTMLStyles maStyles
;
84 sal_uInt32 maFontHeights
[ SC_HTML_FONTSIZES
];
85 ScDocument
* mpDoc
; /// The destination document.
88 explicit ScHTMLParser( EditEngine
* pEditEngine
, ScDocument
* pDoc
);
89 virtual ~ScHTMLParser();
91 virtual sal_uLong
Read( SvStream
& rStrm
, const OUString
& rBaseURL
) = 0;
93 ScHTMLStyles
& GetStyles();
96 /** Returns the "global table" which contains the entire HTML document. */
97 virtual const ScHTMLTable
* GetGlobalTable() const = 0;
101 // ============================================================================
103 typedef o3tl::sorted_vector
<sal_uLong
> ScHTMLColOffset
;
105 struct ScHTMLTableStackEntry
107 ScRangeListRef xLockedList
;
108 ScEEParseEntry
* pCellEntry
;
109 ScHTMLColOffset
* pLocalColOffset
;
110 sal_uLong nFirstTableCell
;
116 sal_uInt16 nTableWidth
;
117 sal_uInt16 nColOffset
;
118 sal_uInt16 nColOffsetStart
;
120 ScHTMLTableStackEntry( ScEEParseEntry
* pE
,
121 const ScRangeListRef
& rL
, ScHTMLColOffset
* pTO
,
123 SCCOL nCol
, SCROW nRow
,
124 SCCOL nStart
, SCCOL nMax
, sal_uInt16 nTab
,
125 sal_uInt16 nTW
, sal_uInt16 nCO
, sal_uInt16 nCOS
,
127 : xLockedList( rL
), pCellEntry( pE
),
128 pLocalColOffset( pTO
),
129 nFirstTableCell( nFTC
),
130 nColCnt( nCol
), nRowCnt( nRow
),
131 nColCntStart( nStart
), nMaxCol( nMax
),
132 nTable( nTab
), nTableWidth( nTW
),
133 nColOffset( nCO
), nColOffsetStart( nCOS
),
136 ~ScHTMLTableStackEntry() {}
138 typedef ::std::stack
< ScHTMLTableStackEntry
* > ScHTMLTableStack
;
140 struct ScHTMLAdjustStackEntry
145 ScHTMLAdjustStackEntry( SCCOL nLCol
, SCROW nNRow
,
147 : nLastCol( nLCol
), nNextRow( nNRow
),
151 typedef ::std::stack
< ScHTMLAdjustStackEntry
* > ScHTMLAdjustStack
;
154 // ============================================================================
160 // TODO these need better names
161 typedef ::std::map
<SCROW
, SCROW
> InnerMap
;
162 typedef ::std::map
<sal_uInt16
, InnerMap
*> OuterMap
;
164 class ScHTMLLayoutParser
: public ScHTMLParser
169 ScHTMLTableStack aTableStack
;
171 ScRangeListRef xLockedList
; // je Table
173 ScHTMLColOffset
* pColOffset
;
174 ScHTMLColOffset
* pLocalColOffset
; // je Table
175 sal_uLong nFirstTableCell
; // je Table
178 sal_uInt16 nMaxTable
;
179 SCCOL nColCntStart
; // erste Col je Table
180 SCCOL nMaxCol
; // je Table
181 sal_uInt16 nTableWidth
; // je Table
182 sal_uInt16 nColOffset
; // aktuell, Pixel
183 sal_uInt16 nColOffsetStart
; // Startwert je Table, in Pixel
184 sal_uInt16 nOffsetTolerance
; // for use with SeekOffset and related
185 bool bTabInTabCell
:1;
186 bool bFirstRow
:1; // je Table, ob in erster Zeile
190 DECL_LINK( HTMLImportHdl
, ImportInfo
* );
191 void NewActEntry( ScEEParseEntry
* );
192 void EntryEnd( ScEEParseEntry
*, const ESelection
& );
193 void ProcToken( ImportInfo
* );
194 void CloseEntry( ImportInfo
* );
195 void NextRow( ImportInfo
* );
196 void SkipLocked( ScEEParseEntry
*, bool bJoin
= true );
197 static bool SeekOffset( ScHTMLColOffset
*, sal_uInt16 nOffset
,
198 SCCOL
* pCol
, sal_uInt16 nOffsetTol
);
199 static void MakeCol( ScHTMLColOffset
*, sal_uInt16
& nOffset
,
200 sal_uInt16
& nWidth
, sal_uInt16 nOffsetTol
,
201 sal_uInt16 nWidthTol
);
202 static void MakeColNoRef( ScHTMLColOffset
*, sal_uInt16 nOffset
,
203 sal_uInt16 nWidth
, sal_uInt16 nOffsetTol
,
204 sal_uInt16 nWidthTol
);
205 static void ModifyOffset( ScHTMLColOffset
*, sal_uInt16
& nOldOffset
,
206 sal_uInt16
& nNewOffset
, sal_uInt16 nOffsetTol
);
207 void Colonize( ScEEParseEntry
* );
208 sal_uInt16
GetWidth( ScEEParseEntry
* );
212 sal_uInt16
GetWidthPixel( const HTMLOption
& );
213 bool IsAtBeginningOfText( ImportInfo
* );
215 void TableOn( ImportInfo
* );
216 void ColOn( ImportInfo
* );
217 void TableRowOn( ImportInfo
* );
218 void TableRowOff( ImportInfo
* );
219 void TableDataOn( ImportInfo
* );
220 void TableDataOff( ImportInfo
* );
221 void TableOff( ImportInfo
* );
222 void Image( ImportInfo
* );
223 void AnchorOn( ImportInfo
* );
224 void FontOn( ImportInfo
* );
227 ScHTMLLayoutParser( EditEngine
*, const OUString
& rBaseURL
, const Size
& aPageSize
, ScDocument
* );
228 virtual ~ScHTMLLayoutParser();
229 virtual sal_uLong
Read( SvStream
&, const OUString
& rBaseURL
);
230 virtual const ScHTMLTable
* GetGlobalTable() const;
235 // ============================================================================
236 // HTML DATA QUERY PARSER
237 // ============================================================================
239 /** Declares the orientation in or for a table: column or row. */
240 enum ScHTMLOrient
{ tdCol
= 0 , tdRow
= 1 };
242 /** Type for a unique identifier for each table. */
243 typedef sal_uInt16 ScHTMLTableId
;
244 /** Identifier of the "global table" (the entire HTML document). */
245 const ScHTMLTableId SC_HTML_GLOBAL_TABLE
= 0;
246 /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
247 const ScHTMLTableId SC_HTML_NO_TABLE
= 0;
249 // ============================================================================
251 /** A 2D cell position in an HTML table. */
257 inline explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
258 inline explicit ScHTMLPos( SCCOL nCol
, SCROW nRow
) :
259 mnCol( nCol
), mnRow( nRow
) {}
260 inline explicit ScHTMLPos( const ScAddress
& rAddr
) { Set( rAddr
); }
262 inline SCCOLROW
Get( ScHTMLOrient eOrient
) const
263 { return (eOrient
== tdCol
) ? mnCol
: mnRow
; }
264 inline void Set( SCCOL nCol
, SCROW nRow
)
265 { mnCol
= nCol
; mnRow
= nRow
; }
266 inline void Set( const ScAddress
& rAddr
)
267 { Set( rAddr
.Col(), rAddr
.Row() ); }
268 inline void Move( SCsCOL nColDiff
, SCsROW nRowDiff
)
269 { mnCol
= mnCol
+ nColDiff
; mnRow
= mnRow
+ nRowDiff
; }
270 inline ScAddress
MakeAddr() const
271 { return ScAddress( mnCol
, mnRow
, 0 ); }
274 inline bool operator==( const ScHTMLPos
& rPos1
, const ScHTMLPos
& rPos2
)
276 return (rPos1
.mnRow
== rPos2
.mnRow
) && (rPos1
.mnCol
== rPos2
.mnCol
);
279 inline bool operator<( const ScHTMLPos
& rPos1
, const ScHTMLPos
& rPos2
)
281 return (rPos1
.mnRow
< rPos2
.mnRow
) || ((rPos1
.mnRow
== rPos2
.mnRow
) && (rPos1
.mnCol
< rPos2
.mnCol
));
284 // ----------------------------------------------------------------------------
286 /** A 2D cell size in an HTML table. */
292 inline explicit ScHTMLSize() : mnCols( 0 ), mnRows( 0 ) {}
293 inline explicit ScHTMLSize( SCCOL nCols
, SCROW nRows
) :
294 mnCols( nCols
), mnRows( nRows
) {}
296 inline SCCOLROW
Get( ScHTMLOrient eOrient
) const
297 { return (eOrient
== tdCol
) ? mnCols
: mnRows
; }
298 inline void Set( SCCOL nCols
, SCROW nRows
)
299 { mnCols
= nCols
; mnRows
= nRows
; }
300 inline void Expand( SCsCOL nColDiff
, SCsROW nRowDiff
)
301 { mnCols
= mnCols
+ nColDiff
; mnRows
= mnRows
+ nRowDiff
; }
304 inline bool operator==( const ScHTMLSize
& rSize1
, const ScHTMLSize
& rSize2
)
306 return (rSize1
.mnRows
== rSize2
.mnRows
) && (rSize1
.mnCols
== rSize2
.mnCols
);
309 // ============================================================================
311 /** A single entry containing a line of text or representing a table. */
312 struct ScHTMLEntry
: public ScEEParseEntry
315 explicit ScHTMLEntry(
316 const SfxItemSet
& rItemSet
,
317 ScHTMLTableId nTableId
= SC_HTML_NO_TABLE
);
319 /** Returns true, if the selection of the entry is empty. */
320 inline bool IsEmpty() const { return !aSel
.HasRange(); }
321 /** Returns true, if the entry has any content to be imported. */
322 bool HasContents() const;
323 /** Returns true, if the entry represents a table. */
324 inline bool IsTable() const { return nTab
!= SC_HTML_NO_TABLE
; }
325 /** Returns true, if the entry represents a table. */
326 inline ScHTMLTableId
GetTableId() const { return nTab
; }
328 /** Sets or cleares the import always state. */
329 inline void SetImportAlways( bool bSet
= true ) { mbImportAlways
= bSet
; }
330 /** Sets start point of the entry selection to the start of the import info object. */
331 void AdjustStart( const ImportInfo
& rInfo
);
332 /** Sets end point of the entry selection to the end of the import info object. */
333 void AdjustEnd( const ImportInfo
& rInfo
);
334 /** Deletes leading and trailing empty paragraphs from the entry. */
335 void Strip( const EditEngine
& rEditEngine
);
337 /** Returns read/write access to the item set of this entry. */
338 inline SfxItemSet
& GetItemSet() { return aItemSet
; }
339 /** Returns read-only access to the item set of this entry. */
340 inline const SfxItemSet
& GetItemSet() const { return aItemSet
; }
343 bool mbImportAlways
; /// true = Always import this entry.
346 // ============================================================================
348 /** This struct handles creation of unique table identifiers. */
349 struct ScHTMLTableAutoId
351 const ScHTMLTableId mnTableId
; /// The created unique table identifier.
352 ScHTMLTableId
& mrnUnusedId
; /// Reference to global unused identifier variable.
354 /** The constructor assigns an unused identifier to member mnTableId. */
355 explicit ScHTMLTableAutoId( ScHTMLTableId
& rnUnusedId
);
358 // ----------------------------------------------------------------------------
360 class ScHTMLTableMap
;
362 /** Stores data for one table in an HTML document.
364 This class does the main work for importing an HTML document. It manages
365 the correct insertion of parse entries into the correct cells and the
366 creation of nested tables. Recalculation of resulting document size and
367 position is done recursively in all nested tables.
372 /** Creates a new HTML table without content.
373 @descr Internally handles a current cell position. This position is
374 invalid until first calls of RowOn() and DataOn().
375 @param rParentTable Reference to the parent table that owns this table.
376 @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
377 explicit ScHTMLTable(
378 ScHTMLTable
& rParentTable
,
379 const ImportInfo
& rInfo
,
382 virtual ~ScHTMLTable();
384 /** Returns the name of the table, specified in the TABLE tag. */
385 inline const OUString
& GetTableName() const { return maTableName
; }
386 /** Returns the unique identifier of the table. */
387 inline ScHTMLTableId
GetTableId() const { return maTableId
.mnTableId
; }
388 /** Returns the table size. */
389 inline const ScHTMLSize
& GetSize() const { return maSize
; }
390 /** Returns the cell spanning of the specified cell. */
391 ScHTMLSize
GetSpan( const ScHTMLPos
& rCellPos
) const;
393 /** Searches in all nested tables for the specified table.
394 @param nTableId Unique identifier of the table. */
395 ScHTMLTable
* FindNestedTable( ScHTMLTableId nTableId
) const;
397 /** Puts the item into the item set of the current entry. */
398 void PutItem( const SfxPoolItem
& rItem
);
399 /** Inserts a text portion into current entry. */
400 void PutText( const ImportInfo
& rInfo
);
401 /** Inserts a new line, if in preformatted text, else does nothing. */
402 void InsertPara( const ImportInfo
& rInfo
);
404 /** Inserts a line break (<br> tag).
405 @descr Inserts the current entry regardless if it is empty. */
407 /** Inserts a heading line (<p> and <h*> tags). */
409 /** Processes a hyperlink (<a> tag). */
412 /** Starts a *new* table nested in this table (<table> tag).
413 @return Pointer to the new table. */
414 ScHTMLTable
* TableOn( const ImportInfo
& rInfo
);
415 /** Closes *this* table (</table> tag).
416 @return Pointer to the parent table. */
417 ScHTMLTable
* TableOff( const ImportInfo
& rInfo
);
418 /** Starts a *new* table based on preformatted text (<pre> tag).
419 @return Pointer to the new table. */
420 ScHTMLTable
* PreOn( const ImportInfo
& rInfo
);
421 /** Closes *this* table based on preformatted text (</pre> tag).
422 @return Pointer to the parent table. */
423 ScHTMLTable
* PreOff( const ImportInfo
& rInfo
);
425 /** Starts next row (<tr> tag).
426 @descr Cell address is invalid until first call of DataOn(). */
427 void RowOn( const ImportInfo
& rInfo
);
428 /** Closes the current row (<tr> tag).
429 @descr Cell address is invalid until call of RowOn() and DataOn(). */
430 void RowOff( const ImportInfo
& rInfo
);
431 /** Starts the next cell (<td> or <th> tag). */
432 void DataOn( const ImportInfo
& rInfo
);
433 /** Closes the current cell (</td> or </th> tag).
434 @descr Cell address is invalid until next call of DataOn(). */
435 void DataOff( const ImportInfo
& rInfo
);
437 /** Starts the body of the HTML document (<body> tag). */
438 void BodyOn( const ImportInfo
& rInfo
);
439 /** Closes the body of the HTML document (</body> tag). */
440 void BodyOff( const ImportInfo
& rInfo
);
442 /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
443 @descr Used to close this table object regardless on opening tag type.
444 @return Pointer to the parent table, or this, if no parent found. */
445 ScHTMLTable
* CloseTable( const ImportInfo
& rInfo
);
447 /** Returns the resulting document row/column count of the specified HTML row/column. */
448 SCCOLROW
GetDocSize( ScHTMLOrient eOrient
, SCCOLROW nCellPos
) const;
449 /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
450 SCCOLROW
GetDocSize( ScHTMLOrient eOrient
, SCCOLROW nCellBegin
, SCCOLROW nCellEnd
) const;
451 /** Returns the total document row/column count in the specified direction. */
452 SCCOLROW
GetDocSize( ScHTMLOrient eOrient
) const;
453 /** Returns the total document row/column count of the specified HTML cell. */
454 ScHTMLSize
GetDocSize( const ScHTMLPos
& rCellPos
) const;
456 /** Returns the resulting Calc position of the top left edge of the table. */
457 inline const ScHTMLPos
& GetDocPos() const { return maDocBasePos
; }
458 /** Calculates the resulting Calc position of the specified HTML column/row. */
459 SCCOLROW
GetDocPos( ScHTMLOrient eOrient
, SCCOLROW nCellPos
= 0 ) const;
460 /** Calculates the resulting Calc position of the specified HTML cell. */
461 ScHTMLPos
GetDocPos( const ScHTMLPos
& rCellPos
) const;
463 /** Calculates the current Calc document area of this table. */
464 void GetDocRange( ScRange
& rRange
) const;
466 /** Applies border formatting to the passed document. */
467 void ApplyCellBorders( ScDocument
* pDoc
, const ScAddress
& rFirstPos
) const;
469 SvNumberFormatter
* GetFormatTable();
472 /** Creates a new HTML table without parent.
473 @descr This constructor is used to create the "global table". */
474 explicit ScHTMLTable(
476 EditEngine
& rEditEngine
,
477 ::std::vector
< ScEEParseEntry
* >& rEEParseList
,
478 ScHTMLTableId
& rnUnusedId
, ScHTMLParser
* pParser
);
480 /** Fills all empty cells in this and nested tables with dummy parse entries. */
481 void FillEmptyCells();
482 /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
483 void RecalcDocSize();
484 /** Recalculates the position of all cell entries and nested tables.
485 @param rBasePos The origin of the table in the Calc document. */
486 void RecalcDocPos( const ScHTMLPos
& rBasePos
);
489 typedef ::std::auto_ptr
< ScHTMLTableMap
> ScHTMLTableMapPtr
;
490 typedef ::std::auto_ptr
< SfxItemSet
> SfxItemSetPtr
;
491 typedef ::std::vector
< SCCOLROW
> ScSizeVec
;
492 typedef ::std::list
< ScHTMLEntry
* > ScHTMLEntryList
;
493 typedef ::std::map
< ScHTMLPos
, ScHTMLEntryList
> ScHTMLEntryMap
;
494 typedef ::std::auto_ptr
< ScHTMLEntry
> ScHTMLEntryPtr
;
496 /** Returns true, if the current cell does not contain an entry yet. */
497 bool IsEmptyCell() const;
498 /** Returns the item set from cell, row, or table, depending on current state. */
499 const SfxItemSet
& GetCurrItemSet() const;
501 /** Returns true, if import info represents a space character. */
502 static bool IsSpaceCharInfo( const ImportInfo
& rInfo
);
504 /** Creates and returns a new empty flying entry at position (0,0). */
505 ScHTMLEntryPtr
CreateEntry() const;
506 /** Creates a new flying entry.
507 @param rInfo Contains the initial edit engine selection for the entry. */
508 void CreateNewEntry( const ImportInfo
& rInfo
);
510 /** Inserts an empty line in front of the next entry. */
511 void InsertLeadingEmptyLine();
513 /** Pushes the passed entry into the list of the current cell. */
514 void ImplPushEntryToList( ScHTMLEntryList
& rEntryList
, ScHTMLEntryPtr
& rxEntry
);
515 /** Tries to insert the entry into the current cell.
516 @descr If insertion is not possible (i.e., currently no cell open), the
517 entry will be inserted into the parent table.
518 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
519 bool PushEntry( ScHTMLEntryPtr
& rxEntry
);
520 /** Puts the current entry into the entry list, if it is not empty.
521 @param rInfo The import info struct containing the end position of the current entry.
522 @param bLastInCell true = If cell is still empty, put this entry always.
523 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
524 bool PushEntry( const ImportInfo
& rInfo
, bool bLastInCell
= false );
525 /** Pushes a new entry into current cell which references a nested table.
526 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
527 bool PushTableEntry( ScHTMLTableId nTableId
);
529 /** Tries to find a table from the table container.
530 @descr Assumes that the table is located in the current container or
531 that the passed table identifier is 0.
532 @param nTableId Unique identifier of the table or 0. */
533 ScHTMLTable
* GetExistingTable( ScHTMLTableId nTableId
) const;
534 /** Inserts a nested table in the current cell at the specified position.
535 @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
536 ScHTMLTable
* InsertNestedTable( const ImportInfo
& rInfo
, bool bPreFormText
);
538 /** Inserts a new cell in an unused position, starting from current cell position. */
539 void InsertNewCell( const ScHTMLSize
& rSpanSize
);
541 /** Set internal states for a new table row. */
543 /** Set internal states for leaving a table row. */
545 /** Set internal states for entering a new table cell. */
546 void ImplDataOn( const ScHTMLSize
& rSpanSize
);
547 /** Set internal states for leaving a table cell. */
550 /** Inserts additional formatting options from import info into the item set. */
551 void ProcessFormatOptions( SfxItemSet
& rItemSet
, const ImportInfo
& rInfo
);
553 /** Updates the document column/row size of the specified column or row.
554 @descr Only increases the present count, never decreases. */
555 void SetDocSize( ScHTMLOrient eOrient
, SCCOLROW nCellPos
, SCCOLROW nSize
);
556 /** Calculates and sets the resulting size the cell needs in the document.
557 @descr Reduces the needed size in merged cells.
558 @param nCellPos The first column/row position of the (merged) cell.
559 @param nCellSpan The cell spanning in the specified orientation.
560 @param nRealDocSize The raw document size of all entries of the cell. */
561 void CalcNeededDocSize(
562 ScHTMLOrient eOrient
, SCCOLROW nCellPos
,
563 SCCOLROW nCellSpan
, SCCOLROW nRealDocSize
);
566 ScHTMLTable
* mpParentTable
; /// Pointer to parent table.
567 ScHTMLTableMapPtr mxNestedTables
; /// Table of nested HTML tables.
568 OUString maTableName
; /// Table name from <table id> option.
569 ScHTMLTableAutoId maTableId
; /// Unique identifier of this table.
570 SfxItemSet maTableItemSet
; /// Items for the entire table.
571 SfxItemSetPtr mxRowItemSet
; /// Items for the current table row.
572 SfxItemSetPtr mxDataItemSet
; /// Items for the current cell.
573 ScRangeList maHMergedCells
; /// List of all horizontally merged cells.
574 ScRangeList maVMergedCells
; /// List of all vertically merged cells.
575 ScRangeList maUsedCells
; /// List of all used cells.
576 EditEngine
& mrEditEngine
; /// Edit engine (from ScEEParser).
577 ::std::vector
< ScEEParseEntry
* >& mrEEParseList
; /// List that owns the parse entries (from ScEEParser).
578 ScHTMLEntryMap maEntryMap
; /// List of entries for each cell.
579 ScHTMLEntryList
* mpCurrEntryList
; /// Current entry list from map for faster access.
580 ScHTMLEntryPtr mxCurrEntry
; /// Working entry, not yet inserted in a list.
581 ScSizeVec maCumSizes
[ 2 ]; /// Cumulated cell counts for each HTML table column/row.
582 ScHTMLSize maSize
; /// Size of the table.
583 ScHTMLPos maCurrCell
; /// Address of current cell to fill.
584 ScHTMLPos maDocBasePos
; /// Resulting base address in a Calc document.
585 ScHTMLParser
* mpParser
;
586 bool mbBorderOn
:1; /// true = Table borders on.
587 bool mbPreFormText
:1; /// true = Table from preformatted text (<pre> tag).
588 bool mbRowOn
:1; /// true = Inside of <tr> </tr>.
589 bool mbDataOn
:1; /// true = Inside of <td> </td> or <th> </th>.
590 bool mbPushEmptyLine
:1; /// true = Insert empty line before current entry.
593 // ----------------------------------------------------------------------------
595 /** The "global table" representing the entire HTML document. */
596 class ScHTMLGlobalTable
: public ScHTMLTable
599 explicit ScHTMLGlobalTable(
601 EditEngine
& rEditEngine
,
602 ::std::vector
< ScEEParseEntry
* >& rEEParseList
,
603 ScHTMLTableId
& rnUnusedId
, ScHTMLParser
* pParser
);
605 virtual ~ScHTMLGlobalTable();
607 /** Recalculates sizes and resulting positions of all document entries. */
611 // ============================================================================
613 /** The HTML parser for data queries. Focuses on data import, not on layout.
615 Builds the table structure correctly, ignores extended formatting like
616 pictures or column widths.
618 class ScHTMLQueryParser
: public ScHTMLParser
621 explicit ScHTMLQueryParser( EditEngine
* pEditEngine
, ScDocument
* pDoc
);
622 virtual ~ScHTMLQueryParser();
624 virtual sal_uLong
Read( SvStream
& rStrm
, const OUString
& rBaseURL
);
626 /** Returns the "global table" which contains the entire HTML document. */
627 virtual const ScHTMLTable
* GetGlobalTable() const;
630 /** Handles all possible tags in the HTML document. */
631 void ProcessToken( const ImportInfo
& rInfo
);
632 /** Inserts a text portion into current entry. */
633 void InsertText( const ImportInfo
& rInfo
);
634 /** Processes the <font> tag. */
635 void FontOn( const ImportInfo
& rInfo
);
637 /** Processes the <meta> tag. */
638 void MetaOn( const ImportInfo
& rInfo
);
639 /** Opens the title of the HTML document (<title> tag). */
640 void TitleOn( const ImportInfo
& rInfo
);
641 /** Closes the title of the HTML document (</title> tag). */
642 void TitleOff( const ImportInfo
& rInfo
);
644 /** Opens a new table at the current position. */
645 void TableOn( const ImportInfo
& rInfo
);
646 /** Closes the current table. */
647 void TableOff( const ImportInfo
& rInfo
);
648 /** Opens a new table based on preformatted text. */
649 void PreOn( const ImportInfo
& rInfo
);
650 /** Closes the current preformatted text table. */
651 void PreOff( const ImportInfo
& rInfo
);
653 /** Closes the current table, regardless on opening tag. */
654 void CloseTable( const ImportInfo
& rInfo
);
656 void ParseStyle(const OUString
& rStrm
);
658 DECL_LINK( HTMLImportHdl
, const ImportInfo
* );
661 typedef ::std::auto_ptr
< ScHTMLGlobalTable
> ScHTMLGlobalTablePtr
;
663 OUStringBuffer maTitle
; /// The title of the document.
664 ScHTMLGlobalTablePtr mxGlobTable
; /// Contains the entire imported document.
665 ScHTMLTable
* mpCurrTable
; /// Pointer to current table (performance).
666 ScHTMLTableId mnUnusedId
; /// First unused table identifier.
667 bool mbTitleOn
; /// true = Inside of <title> </title>.
671 // ============================================================================
675 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */