merged tag ooo/OOO330_m14
[LibreOffice.git] / sc / source / filter / inc / htmlpars.hxx
blobf053a1088ff120916ce59f3a4f757cf17a4b8d94
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 #ifndef SC_HTMLPARS_HXX
29 #define SC_HTMLPARS_HXX
31 #include <tools/stack.hxx>
33 #include <memory>
34 #include <vector>
35 #include <list>
36 #include <map>
38 #include "rangelst.hxx"
39 #include "eeparser.hxx"
41 const sal_uInt32 SC_HTML_FONTSIZES = 7; // wie Export, HTML-Options
43 // Pixel tolerance for SeekOffset and related.
44 const USHORT SC_HTML_OFFSET_TOLERANCE_SMALL = 1; // single table
45 const USHORT SC_HTML_OFFSET_TOLERANCE_LARGE = 10; // nested
47 // ============================================================================
48 // BASE class for HTML parser classes
49 // ============================================================================
51 class ScHTMLTable;
53 /** Base class for HTML parser classes. */
54 class ScHTMLParser : public ScEEParser
56 protected:
57 sal_uInt32 maFontHeights[ SC_HTML_FONTSIZES ];
58 ScDocument* mpDoc; /// The destination document.
60 public:
61 explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
62 virtual ~ScHTMLParser();
64 virtual ULONG Read( SvStream& rStrm, const String& rBaseURL ) = 0;
66 /** Returns the "global table" which contains the entire HTML document. */
67 virtual const ScHTMLTable* GetGlobalTable() const = 0;
71 // ============================================================================
73 SV_DECL_VARARR_SORT( ScHTMLColOffset, ULONG, 16, 4)
75 struct ScHTMLTableStackEntry
77 ScRangeListRef xLockedList;
78 ScEEParseEntry* pCellEntry;
79 ScHTMLColOffset* pLocalColOffset;
80 ULONG nFirstTableCell;
81 SCCOL nColCnt;
82 SCROW nRowCnt;
83 SCCOL nColCntStart;
84 SCCOL nMaxCol;
85 USHORT nTable;
86 USHORT nTableWidth;
87 USHORT nColOffset;
88 USHORT nColOffsetStart;
89 BOOL bFirstRow;
90 ScHTMLTableStackEntry( ScEEParseEntry* pE,
91 const ScRangeListRef& rL, ScHTMLColOffset* pTO,
92 ULONG nFTC,
93 SCCOL nCol, SCROW nRow,
94 SCCOL nStart, SCCOL nMax, USHORT nTab,
95 USHORT nTW, USHORT nCO, USHORT nCOS,
96 BOOL bFR )
97 : xLockedList( rL ), pCellEntry( pE ),
98 pLocalColOffset( pTO ),
99 nFirstTableCell( nFTC ),
100 nColCnt( nCol ), nRowCnt( nRow ),
101 nColCntStart( nStart ), nMaxCol( nMax ),
102 nTable( nTab ), nTableWidth( nTW ),
103 nColOffset( nCO ), nColOffsetStart( nCOS ),
104 bFirstRow( bFR )
106 ~ScHTMLTableStackEntry() {}
108 DECLARE_STACK( ScHTMLTableStack, ScHTMLTableStackEntry* )
110 struct ScHTMLAdjustStackEntry
112 SCCOL nLastCol;
113 SCROW nNextRow;
114 SCROW nCurRow;
115 ScHTMLAdjustStackEntry( SCCOL nLCol, SCROW nNRow,
116 SCROW nCRow )
117 : nLastCol( nLCol ), nNextRow( nNRow ),
118 nCurRow( nCRow )
121 DECLARE_STACK( ScHTMLAdjustStack, ScHTMLAdjustStackEntry* )
124 // ============================================================================
126 class EditEngine;
127 class ScDocument;
128 class HTMLOption;
130 class ScHTMLLayoutParser : public ScHTMLParser
132 private:
133 Size aPageSize;
134 String aBaseURL;
135 ScHTMLTableStack aTableStack;
136 String aString;
137 ScRangeListRef xLockedList; // je Table
138 Table* pTables;
139 ScHTMLColOffset* pColOffset;
140 ScHTMLColOffset* pLocalColOffset; // je Table
141 ULONG nFirstTableCell; // je Table
142 short nTableLevel;
143 USHORT nTable;
144 USHORT nMaxTable;
145 SCCOL nColCntStart; // erste Col je Table
146 SCCOL nMaxCol; // je Table
147 USHORT nTableWidth; // je Table
148 USHORT nColOffset; // aktuell, Pixel
149 USHORT nColOffsetStart; // Startwert je Table, in Pixel
150 USHORT nMetaCnt; // fuer ParseMetaOptions
151 USHORT nOffsetTolerance; // for use with SeekOffset and related
152 BOOL bCalcWidthHeight; // TRUE: calculate real column width
153 // FALSE: 1 html-col = 1 sc-col
154 BOOL bTabInTabCell;
155 BOOL bFirstRow; // je Table, ob in erster Zeile
156 BOOL bInCell;
157 BOOL bInTitle;
159 DECL_LINK( HTMLImportHdl, ImportInfo* );
160 void NewActEntry( ScEEParseEntry* );
161 void EntryEnd( ScEEParseEntry*, const ESelection& );
162 void ProcToken( ImportInfo* );
163 void CloseEntry( ImportInfo* );
164 void NextRow( ImportInfo* );
165 void SkipLocked( ScEEParseEntry*, BOOL bJoin = TRUE );
166 static BOOL SeekOffset( ScHTMLColOffset*, USHORT nOffset,
167 SCCOL* pCol, USHORT nOffsetTol );
168 static void MakeCol( ScHTMLColOffset*, USHORT& nOffset,
169 USHORT& nWidth, USHORT nOffsetTol,
170 USHORT nWidthTol );
171 static void MakeColNoRef( ScHTMLColOffset*, USHORT nOffset,
172 USHORT nWidth, USHORT nOffsetTol,
173 USHORT nWidthTol );
174 static void ModifyOffset( ScHTMLColOffset*, USHORT& nOldOffset,
175 USHORT& nNewOffset, USHORT nOffsetTol );
176 void Colonize( ScEEParseEntry* );
177 USHORT GetWidth( ScEEParseEntry* );
178 void SetWidths();
179 void Adjust();
181 USHORT GetWidthPixel( const HTMLOption* );
182 BOOL IsAtBeginningOfText( ImportInfo* );
184 void TableOn( ImportInfo* );
185 void ColOn( ImportInfo* );
186 void TableRowOn( ImportInfo* );
187 void TableRowOff( ImportInfo* );
188 void TableDataOn( ImportInfo* );
189 void TableDataOff( ImportInfo* );
190 void TableOff( ImportInfo* );
191 void Image( ImportInfo* );
192 void AnchorOn( ImportInfo* );
193 void FontOn( ImportInfo* );
195 public:
196 ScHTMLLayoutParser( EditEngine*, const String& rBaseURL, const Size& aPageSize, ScDocument* );
197 virtual ~ScHTMLLayoutParser();
198 virtual ULONG Read( SvStream&, const String& rBaseURL );
199 virtual const ScHTMLTable* GetGlobalTable() const;
204 // ============================================================================
205 // HTML DATA QUERY PARSER
206 // ============================================================================
208 /** Declares the orientation in or for a table: column or row. */
209 enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
211 /** Type for a unique identifier for each table. */
212 typedef sal_uInt16 ScHTMLTableId;
213 /** Identifier of the "global table" (the entire HTML document). */
214 const ScHTMLTableId SC_HTML_GLOBAL_TABLE = 0;
215 /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
216 const ScHTMLTableId SC_HTML_NO_TABLE = 0;
218 // ============================================================================
220 /** A 2D cell position in an HTML table. */
221 struct ScHTMLPos
223 SCCOL mnCol;
224 SCROW mnRow;
226 inline explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
227 inline explicit ScHTMLPos( SCCOL nCol, SCROW nRow ) :
228 mnCol( nCol ), mnRow( nRow ) {}
229 inline explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
231 inline SCCOLROW Get( ScHTMLOrient eOrient ) const
232 { return (eOrient == tdCol) ? mnCol : mnRow; }
233 inline void Set( SCCOL nCol, SCROW nRow )
234 { mnCol = nCol; mnRow = nRow; }
235 inline void Set( const ScAddress& rAddr )
236 { Set( rAddr.Col(), rAddr.Row() ); }
237 inline void Move( SCsCOL nColDiff, SCsROW nRowDiff )
238 { mnCol = mnCol + nColDiff; mnRow = mnRow + nRowDiff; }
239 inline ScAddress MakeAddr() const
240 { return ScAddress( mnCol, mnRow, 0 ); }
243 inline bool operator==( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
245 return (rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol == rPos2.mnCol);
248 inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
250 return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
253 // ----------------------------------------------------------------------------
255 /** A 2D cell size in an HTML table. */
256 struct ScHTMLSize
258 SCCOL mnCols;
259 SCROW mnRows;
261 inline explicit ScHTMLSize() : mnCols( 0 ), mnRows( 0 ) {}
262 inline explicit ScHTMLSize( SCCOL nCols, SCROW nRows ) :
263 mnCols( nCols ), mnRows( nRows ) {}
265 inline SCCOLROW Get( ScHTMLOrient eOrient ) const
266 { return (eOrient == tdCol) ? mnCols : mnRows; }
267 inline void Set( SCCOL nCols, SCROW nRows )
268 { mnCols = nCols; mnRows = nRows; }
269 inline void Expand( SCsCOL nColDiff, SCsROW nRowDiff )
270 { mnCols = mnCols + nColDiff; mnRows = mnRows + nRowDiff; }
273 inline bool operator==( const ScHTMLSize& rSize1, const ScHTMLSize& rSize2 )
275 return (rSize1.mnRows == rSize2.mnRows) && (rSize1.mnCols == rSize2.mnCols);
278 // ============================================================================
280 /** A single entry containing a line of text or representing a table. */
281 struct ScHTMLEntry : public ScEEParseEntry
283 public:
284 explicit ScHTMLEntry(
285 const SfxItemSet& rItemSet,
286 ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
288 /** Returns true, if the selection of the entry is empty. */
289 inline bool IsEmpty() const { return !aSel.HasRange(); }
290 /** Returns true, if the entry has any content to be imported. */
291 bool HasContents() const;
292 /** Returns true, if the entry represents a table. */
293 inline bool IsTable() const { return nTab != SC_HTML_NO_TABLE; }
294 /** Returns true, if the entry represents a table. */
295 inline ScHTMLTableId GetTableId() const { return nTab; }
297 /** Sets or cleares the import always state. */
298 inline void SetImportAlways( bool bSet = true ) { mbImportAlways = bSet; }
299 /** Sets start point of the entry selection to the start of the import info object. */
300 void AdjustStart( const ImportInfo& rInfo );
301 /** Sets end point of the entry selection to the end of the import info object. */
302 void AdjustEnd( const ImportInfo& rInfo );
303 /** Deletes leading and trailing empty paragraphs from the entry. */
304 void Strip( const EditEngine& rEditEngine );
306 /** Returns read/write access to the item set of this entry. */
307 inline SfxItemSet& GetItemSet() { return aItemSet; }
308 /** Returns read-only access to the item set of this entry. */
309 inline const SfxItemSet& GetItemSet() const { return aItemSet; }
311 private:
312 bool mbImportAlways; /// true = Always import this entry.
315 // ============================================================================
317 /** This struct handles creation of unique table identifiers. */
318 struct ScHTMLTableAutoId
320 const ScHTMLTableId mnTableId; /// The created unique table identifier.
321 ScHTMLTableId& mrnUnusedId; /// Reference to global unused identifier variable.
323 /** The constructor assigns an unused identifier to member mnTableId. */
324 explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
327 // ----------------------------------------------------------------------------
329 class ScHTMLTableMap;
331 /** Stores data for one table in an HTML document.
333 This class does the main work for importing an HTML document. It manages
334 the correct insertion of parse entries into the correct cells and the
335 creation of nested tables. Recalculation of resulting document size and
336 position is done recursively in all nested tables.
338 class ScHTMLTable
340 public:
341 /** Creates a new HTML table without content.
342 @descr Internally handles a current cell position. This position is
343 invalid until first calls of RowOn() and DataOn().
344 @param rParentTable Reference to the parent table that owns this table.
345 @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
346 explicit ScHTMLTable(
347 ScHTMLTable& rParentTable,
348 const ImportInfo& rInfo,
349 bool bPreFormText );
351 virtual ~ScHTMLTable();
353 /** Returns the name of the table, specified in the TABLE tag. */
354 inline const String& GetTableName() const { return maTableName; }
355 /** Returns the unique identifier of the table. */
356 inline ScHTMLTableId GetTableId() const { return maTableId.mnTableId; }
357 /** Returns the table size. */
358 inline const ScHTMLSize& GetSize() const { return maSize; }
359 /** Returns the cell spanning of the specified cell. */
360 ScHTMLSize GetSpan( const ScHTMLPos& rCellPos ) const;
362 /** Searches in all nested tables for the specified table.
363 @param nTableId Unique identifier of the table. */
364 ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const;
366 /** Puts the item into the item set of the current entry. */
367 void PutItem( const SfxPoolItem& rItem );
368 /** Inserts a text portion into current entry. */
369 void PutText( const ImportInfo& rInfo );
370 /** Inserts a new line, if in preformatted text, else does nothing. */
371 void InsertPara( const ImportInfo& rInfo );
373 /** Inserts a line break (<br> tag).
374 @descr Inserts the current entry regardless if it is empty. */
375 void BreakOn();
376 /** Inserts a heading line (<p> and <h*> tags). */
377 void HeadingOn();
378 /** Processes a hyperlink (<a> tag). */
379 void AnchorOn();
381 /** Starts a *new* table nested in this table (<table> tag).
382 @return Pointer to the new table. */
383 ScHTMLTable* TableOn( const ImportInfo& rInfo );
384 /** Closes *this* table (</table> tag).
385 @return Pointer to the parent table. */
386 ScHTMLTable* TableOff( const ImportInfo& rInfo );
387 /** Starts a *new* table based on preformatted text (<pre> tag).
388 @return Pointer to the new table. */
389 ScHTMLTable* PreOn( const ImportInfo& rInfo );
390 /** Closes *this* table based on preformatted text (</pre> tag).
391 @return Pointer to the parent table. */
392 ScHTMLTable* PreOff( const ImportInfo& rInfo );
394 /** Starts next row (<tr> tag).
395 @descr Cell address is invalid until first call of DataOn(). */
396 void RowOn( const ImportInfo& rInfo );
397 /** Closes the current row (<tr> tag).
398 @descr Cell address is invalid until call of RowOn() and DataOn(). */
399 void RowOff( const ImportInfo& rInfo );
400 /** Starts the next cell (<td> or <th> tag). */
401 void DataOn( const ImportInfo& rInfo );
402 /** Closes the current cell (</td> or </th> tag).
403 @descr Cell address is invalid until next call of DataOn(). */
404 void DataOff( const ImportInfo& rInfo );
406 /** Starts the body of the HTML document (<body> tag). */
407 void BodyOn( const ImportInfo& rInfo );
408 /** Closes the body of the HTML document (</body> tag). */
409 void BodyOff( const ImportInfo& rInfo );
411 /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
412 @descr Used to close this table object regardless on opening tag type.
413 @return Pointer to the parent table, or this, if no parent found. */
414 ScHTMLTable* CloseTable( const ImportInfo& rInfo );
416 /** Returns the resulting document row/column count of the specified HTML row/column. */
417 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
418 /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
419 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
420 /** Returns the total document row/column count in the specified direction. */
421 SCCOLROW GetDocSize( ScHTMLOrient eOrient ) const;
422 /** Returns the total document row/column count of the specified HTML cell. */
423 ScHTMLSize GetDocSize( const ScHTMLPos& rCellPos ) const;
425 /** Returns the resulting Calc position of the top left edge of the table. */
426 inline const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
427 /** Calculates the resulting Calc position of the specified HTML column/row. */
428 SCCOLROW GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos = 0 ) const;
429 /** Calculates the resulting Calc position of the specified HTML cell. */
430 ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const;
432 /** Calculates the current Calc document area of this table. */
433 void GetDocRange( ScRange& rRange ) const;
435 /** Applies border formatting to the passed document. */
436 void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
438 protected:
439 /** Creates a new HTML table without parent.
440 @descr This constructor is used to create the "global table". */
441 explicit ScHTMLTable(
442 SfxItemPool& rPool,
443 EditEngine& rEditEngine,
444 ScEEParseList& rEEParseList,
445 ScHTMLTableId& rnUnusedId );
447 /** Fills all empty cells in this and nested tables with dummy parse entries. */
448 void FillEmptyCells();
449 /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
450 void RecalcDocSize();
451 /** Recalculates the position of all cell entries and nested tables.
452 @param rBasePos The origin of the table in the Calc document. */
453 void RecalcDocPos( const ScHTMLPos& rBasePos );
455 private:
456 typedef ::std::auto_ptr< ScHTMLTableMap > ScHTMLTableMapPtr;
457 typedef ::std::auto_ptr< SfxItemSet > SfxItemSetPtr;
458 typedef ::std::vector< SCCOLROW > ScSizeVec;
459 typedef ::std::list< ScHTMLEntry* > ScHTMLEntryList;
460 typedef ::std::map< ScHTMLPos, ScHTMLEntryList > ScHTMLEntryMap;
461 typedef ::std::auto_ptr< ScHTMLEntry > ScHTMLEntryPtr;
463 /** Returns true, if the current cell does not contain an entry yet. */
464 bool IsEmptyCell() const;
465 /** Returns the item set from cell, row, or table, depending on current state. */
466 const SfxItemSet& GetCurrItemSet() const;
468 /** Returns true, if import info represents a space character. */
469 static bool IsSpaceCharInfo( const ImportInfo& rInfo );
471 /** Creates and returns a new empty flying entry at position (0,0). */
472 ScHTMLEntryPtr CreateEntry() const;
473 /** Creates a new flying entry.
474 @param rInfo Contains the initial edit engine selection for the entry. */
475 void CreateNewEntry( const ImportInfo& rInfo );
477 /** Inserts an empty line in front of the next entry. */
478 void InsertLeadingEmptyLine();
480 /** Pushes the passed entry into the list of the current cell. */
481 void ImplPushEntryToList( ScHTMLEntryList& rEntryList, ScHTMLEntryPtr& rxEntry );
482 /** Tries to insert the entry into the current cell.
483 @descr If insertion is not possible (i.e., currently no cell open), the
484 entry will be inserted into the parent table.
485 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
486 bool PushEntry( ScHTMLEntryPtr& rxEntry );
487 /** Puts the current entry into the entry list, if it is not empty.
488 @param rInfo The import info struct containing the end position of the current entry.
489 @param bLastInCell true = If cell is still empty, put this entry always.
490 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
491 bool PushEntry( const ImportInfo& rInfo, bool bLastInCell = false );
492 /** Pushes a new entry into current cell which references a nested table.
493 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
494 bool PushTableEntry( ScHTMLTableId nTableId );
496 /** Tries to find a table from the table container.
497 @descr Assumes that the table is located in the current container or
498 that the passed table identifier is 0.
499 @param nTableId Unique identifier of the table or 0. */
500 ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const;
501 /** Inserts a nested table in the current cell at the specified position.
502 @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
503 ScHTMLTable* InsertNestedTable( const ImportInfo& rInfo, bool bPreFormText );
505 /** Inserts a new cell in an unused position, starting from current cell position. */
506 void InsertNewCell( const ScHTMLSize& rSpanSize );
508 /** Set internal states for a new table row. */
509 void ImplRowOn();
510 /** Set internal states for leaving a table row. */
511 void ImplRowOff();
512 /** Set internal states for entering a new table cell. */
513 void ImplDataOn( const ScHTMLSize& rSpanSize );
514 /** Set internal states for leaving a table cell. */
515 void ImplDataOff();
517 /** Inserts additional formatting options from import info into the item set. */
518 void ProcessFormatOptions( SfxItemSet& rItemSet, const ImportInfo& rInfo );
520 /** Updates the document column/row size of the specified column or row.
521 @descr Only increases the present count, never decreases. */
522 void SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
523 /** Calculates and sets the resulting size the cell needs in the document.
524 @descr Reduces the needed size in merged cells.
525 @param nCellPos The first column/row position of the (merged) cell.
526 @param nCellSpan The cell spanning in the specified orientation.
527 @param nRealDocSize The raw document size of all entries of the cell. */
528 void CalcNeededDocSize(
529 ScHTMLOrient eOrient, SCCOLROW nCellPos,
530 SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
532 private:
533 ScHTMLTable* mpParentTable; /// Pointer to parent table.
534 ScHTMLTableMapPtr mxNestedTables; /// Table of nested HTML tables.
535 String maTableName; /// Table name from <table id> option.
536 ScHTMLTableAutoId maTableId; /// Unique identifier of this table.
537 SfxItemSet maTableItemSet; /// Items for the entire table.
538 SfxItemSetPtr mxRowItemSet; /// Items for the current table row.
539 SfxItemSetPtr mxDataItemSet; /// Items for the current cell.
540 ScRangeList maHMergedCells; /// List of all horizontally merged cells.
541 ScRangeList maVMergedCells; /// List of all vertically merged cells.
542 ScRangeList maUsedCells; /// List of all used cells.
543 EditEngine& mrEditEngine; /// Edit engine (from ScEEParser).
544 ScEEParseList& mrEEParseList; /// List that owns the parse entries (from ScEEParser).
545 ScHTMLEntryMap maEntryMap; /// List of entries for each cell.
546 ScHTMLEntryList* mpCurrEntryList; /// Current entry list from map for faster access.
547 ScHTMLEntryPtr mxCurrEntry; /// Working entry, not yet inserted in a list.
548 ScSizeVec maCumSizes[ 2 ]; /// Cumulated cell counts for each HTML table column/row.
549 ScHTMLSize maSize; /// Size of the table.
550 ScHTMLPos maCurrCell; /// Address of current cell to fill.
551 ScHTMLPos maDocBasePos; /// Resulting base address in a Calc document.
552 bool mbBorderOn; /// true = Table borders on.
553 bool mbPreFormText; /// true = Table from preformatted text (<pre> tag).
554 bool mbRowOn; /// true = Inside of <tr> </tr>.
555 bool mbDataOn; /// true = Inside of <td> </td> or <th> </th>.
556 bool mbPushEmptyLine; /// true = Insert empty line before current entry.
559 // ----------------------------------------------------------------------------
561 /** The "global table" representing the entire HTML document. */
562 class ScHTMLGlobalTable : public ScHTMLTable
564 public:
565 explicit ScHTMLGlobalTable(
566 SfxItemPool& rPool,
567 EditEngine& rEditEngine,
568 ScEEParseList& rEEParseList,
569 ScHTMLTableId& rnUnusedId );
571 virtual ~ScHTMLGlobalTable();
573 /** Recalculates sizes and resulting positions of all document entries. */
574 void Recalc();
577 // ============================================================================
579 /** The HTML parser for data queries. Focuses on data import, not on layout.
581 Builds the table structure correctly, ignores extended formatting like
582 pictures or column widths.
584 class ScHTMLQueryParser : public ScHTMLParser
586 public:
587 explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
588 virtual ~ScHTMLQueryParser();
590 virtual ULONG Read( SvStream& rStrm, const String& rBaseURL );
592 /** Returns the "global table" which contains the entire HTML document. */
593 virtual const ScHTMLTable* GetGlobalTable() const;
595 private:
596 /** Handles all possible tags in the HTML document. */
597 void ProcessToken( const ImportInfo& rInfo );
598 /** Inserts a text portion into current entry. */
599 void InsertText( const ImportInfo& rInfo );
600 /** Processes the <font> tag. */
601 void FontOn( const ImportInfo& rInfo );
603 /** Processes the <meta> tag. */
604 void MetaOn( const ImportInfo& rInfo );
605 /** Opens the title of the HTML document (<title> tag). */
606 void TitleOn( const ImportInfo& rInfo );
607 /** Closes the title of the HTML document (</title> tag). */
608 void TitleOff( const ImportInfo& rInfo );
610 /** Opens a new table at the current position. */
611 void TableOn( const ImportInfo& rInfo );
612 /** Closes the current table. */
613 void TableOff( const ImportInfo& rInfo );
614 /** Opens a new table based on preformatted text. */
615 void PreOn( const ImportInfo& rInfo );
616 /** Closes the current preformatted text table. */
617 void PreOff( const ImportInfo& rInfo );
619 /** Closes the current table, regardless on opening tag. */
620 void CloseTable( const ImportInfo& rInfo );
622 DECL_LINK( HTMLImportHdl, const ImportInfo* );
624 private:
625 typedef ::std::auto_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr;
627 String maTitle; /// The title of the document.
628 ScHTMLGlobalTablePtr mxGlobTable; /// Contains the entire imported document.
629 ScHTMLTable* mpCurrTable; /// Pointer to current table (performance).
630 ScHTMLTableId mnUnusedId; /// First unused table identifier.
631 bool mbTitleOn; /// true = Inside of <title> </title>.
635 // ============================================================================
637 #endif