update dev300-m58
[ooovba.git] / sc / source / filter / inc / htmlpars.hxx
blob47e5a9419d97db50776dfb9dc8fadcd2c2a84e2f
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: htmlpars.hxx,v $
10 * $Revision: 1.16.32.1 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #ifndef SC_HTMLPARS_HXX
32 #define SC_HTMLPARS_HXX
34 #include <tools/stack.hxx>
36 #include <memory>
37 #include <vector>
38 #include <list>
39 #include <map>
41 #include "rangelst.hxx"
42 #include "eeparser.hxx"
44 const sal_uInt32 SC_HTML_FONTSIZES = 7; // wie Export, HTML-Options
46 // Pixel tolerance for SeekOffset and related.
47 const USHORT SC_HTML_OFFSET_TOLERANCE_SMALL = 1; // single table
48 const USHORT SC_HTML_OFFSET_TOLERANCE_LARGE = 10; // nested
50 // ============================================================================
51 // BASE class for HTML parser classes
52 // ============================================================================
54 class ScHTMLTable;
56 /** Base class for HTML parser classes. */
57 class ScHTMLParser : public ScEEParser
59 protected:
60 sal_uInt32 maFontHeights[ SC_HTML_FONTSIZES ];
61 ScDocument* mpDoc; /// The destination document.
63 public:
64 explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
65 virtual ~ScHTMLParser();
67 virtual ULONG Read( SvStream& rStrm, const String& rBaseURL ) = 0;
69 /** Returns the "global table" which contains the entire HTML document. */
70 virtual const ScHTMLTable* GetGlobalTable() const = 0;
74 // ============================================================================
76 SV_DECL_VARARR_SORT( ScHTMLColOffset, ULONG, 16, 4)
78 struct ScHTMLTableStackEntry
80 ScRangeListRef xLockedList;
81 ScEEParseEntry* pCellEntry;
82 ScHTMLColOffset* pLocalColOffset;
83 ULONG nFirstTableCell;
84 SCCOL nColCnt;
85 SCROW nRowCnt;
86 SCCOL nColCntStart;
87 SCCOL nMaxCol;
88 USHORT nTable;
89 USHORT nTableWidth;
90 USHORT nColOffset;
91 USHORT nColOffsetStart;
92 BOOL bFirstRow;
93 ScHTMLTableStackEntry( ScEEParseEntry* pE,
94 const ScRangeListRef& rL, ScHTMLColOffset* pTO,
95 ULONG nFTC,
96 SCCOL nCol, SCROW nRow,
97 SCCOL nStart, SCCOL nMax, USHORT nTab,
98 USHORT nTW, USHORT nCO, USHORT nCOS,
99 BOOL bFR )
100 : xLockedList( rL ), pCellEntry( pE ),
101 pLocalColOffset( pTO ),
102 nFirstTableCell( nFTC ),
103 nColCnt( nCol ), nRowCnt( nRow ),
104 nColCntStart( nStart ), nMaxCol( nMax ),
105 nTable( nTab ), nTableWidth( nTW ),
106 nColOffset( nCO ), nColOffsetStart( nCOS ),
107 bFirstRow( bFR )
109 ~ScHTMLTableStackEntry() {}
111 DECLARE_STACK( ScHTMLTableStack, ScHTMLTableStackEntry* )
113 struct ScHTMLAdjustStackEntry
115 SCCOL nLastCol;
116 SCROW nNextRow;
117 SCROW nCurRow;
118 ScHTMLAdjustStackEntry( SCCOL nLCol, SCROW nNRow,
119 SCROW nCRow )
120 : nLastCol( nLCol ), nNextRow( nNRow ),
121 nCurRow( nCRow )
124 DECLARE_STACK( ScHTMLAdjustStack, ScHTMLAdjustStackEntry* )
127 // ============================================================================
129 class EditEngine;
130 class ScDocument;
131 class HTMLOption;
133 class ScHTMLLayoutParser : public ScHTMLParser
135 private:
136 Size aPageSize;
137 String aBaseURL;
138 ScHTMLTableStack aTableStack;
139 String aString;
140 ScRangeListRef xLockedList; // je Table
141 Table* pTables;
142 ScHTMLColOffset* pColOffset;
143 ScHTMLColOffset* pLocalColOffset; // je Table
144 ULONG nFirstTableCell; // je Table
145 short nTableLevel;
146 USHORT nTable;
147 USHORT nMaxTable;
148 SCCOL nColCntStart; // erste Col je Table
149 SCCOL nMaxCol; // je Table
150 USHORT nTableWidth; // je Table
151 USHORT nColOffset; // aktuell, Pixel
152 USHORT nColOffsetStart; // Startwert je Table, in Pixel
153 USHORT nMetaCnt; // fuer ParseMetaOptions
154 USHORT nOffsetTolerance; // for use with SeekOffset and related
155 BOOL bCalcWidthHeight; // TRUE: calculate real column width
156 // FALSE: 1 html-col = 1 sc-col
157 BOOL bTabInTabCell;
158 BOOL bFirstRow; // je Table, ob in erster Zeile
159 BOOL bInCell;
160 BOOL bInTitle;
162 DECL_LINK( HTMLImportHdl, ImportInfo* );
163 void NewActEntry( ScEEParseEntry* );
164 void EntryEnd( ScEEParseEntry*, const ESelection& );
165 void ProcToken( ImportInfo* );
166 void CloseEntry( ImportInfo* );
167 void NextRow( ImportInfo* );
168 void SkipLocked( ScEEParseEntry*, BOOL bJoin = TRUE );
169 static BOOL SeekOffset( ScHTMLColOffset*, USHORT nOffset,
170 SCCOL* pCol, USHORT nOffsetTol );
171 static void MakeCol( ScHTMLColOffset*, USHORT& nOffset,
172 USHORT& nWidth, USHORT nOffsetTol,
173 USHORT nWidthTol );
174 static void MakeColNoRef( ScHTMLColOffset*, USHORT nOffset,
175 USHORT nWidth, USHORT nOffsetTol,
176 USHORT nWidthTol );
177 static void ModifyOffset( ScHTMLColOffset*, USHORT& nOldOffset,
178 USHORT& nNewOffset, USHORT nOffsetTol );
179 void Colonize( ScEEParseEntry* );
180 USHORT GetWidth( ScEEParseEntry* );
181 void SetWidths();
182 void Adjust();
184 USHORT GetWidthPixel( const HTMLOption* );
185 BOOL IsAtBeginningOfText( ImportInfo* );
187 void TableOn( ImportInfo* );
188 void ColOn( ImportInfo* );
189 void TableRowOn( ImportInfo* );
190 void TableRowOff( ImportInfo* );
191 void TableDataOn( ImportInfo* );
192 void TableDataOff( ImportInfo* );
193 void TableOff( ImportInfo* );
194 void Image( ImportInfo* );
195 void AnchorOn( ImportInfo* );
196 void FontOn( ImportInfo* );
198 public:
199 ScHTMLLayoutParser( EditEngine*, const String& rBaseURL, const Size& aPageSize, ScDocument* );
200 virtual ~ScHTMLLayoutParser();
201 virtual ULONG Read( SvStream&, const String& rBaseURL );
202 virtual const ScHTMLTable* GetGlobalTable() const;
207 // ============================================================================
208 // HTML DATA QUERY PARSER
209 // ============================================================================
211 /** Declares the orientation in or for a table: column or row. */
212 enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
214 /** Type for a unique identifier for each table. */
215 typedef sal_uInt16 ScHTMLTableId;
216 /** Identifier of the "global table" (the entire HTML document). */
217 const ScHTMLTableId SC_HTML_GLOBAL_TABLE = 0;
218 /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
219 const ScHTMLTableId SC_HTML_NO_TABLE = 0;
221 // ============================================================================
223 /** A 2D cell position in an HTML table. */
224 struct ScHTMLPos
226 SCCOL mnCol;
227 SCROW mnRow;
229 inline explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
230 inline explicit ScHTMLPos( SCCOL nCol, SCROW nRow ) :
231 mnCol( nCol ), mnRow( nRow ) {}
232 inline explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
234 inline SCCOLROW Get( ScHTMLOrient eOrient ) const
235 { return (eOrient == tdCol) ? mnCol : mnRow; }
236 inline void Set( SCCOL nCol, SCROW nRow )
237 { mnCol = nCol; mnRow = nRow; }
238 inline void Set( const ScAddress& rAddr )
239 { Set( rAddr.Col(), rAddr.Row() ); }
240 inline void Move( SCsCOL nColDiff, SCsROW nRowDiff )
241 { mnCol = mnCol + nColDiff; mnRow = mnRow + nRowDiff; }
242 inline ScAddress MakeAddr() const
243 { return ScAddress( mnCol, mnRow, 0 ); }
246 inline bool operator==( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
248 return (rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol == rPos2.mnCol);
251 inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
253 return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
256 // ----------------------------------------------------------------------------
258 /** A 2D cell size in an HTML table. */
259 struct ScHTMLSize
261 SCCOL mnCols;
262 SCROW mnRows;
264 inline explicit ScHTMLSize() : mnCols( 0 ), mnRows( 0 ) {}
265 inline explicit ScHTMLSize( SCCOL nCols, SCROW nRows ) :
266 mnCols( nCols ), mnRows( nRows ) {}
268 inline SCCOLROW Get( ScHTMLOrient eOrient ) const
269 { return (eOrient == tdCol) ? mnCols : mnRows; }
270 inline void Set( SCCOL nCols, SCROW nRows )
271 { mnCols = nCols; mnRows = nRows; }
272 inline void Expand( SCsCOL nColDiff, SCsROW nRowDiff )
273 { mnCols = mnCols + nColDiff; mnRows = mnRows + nRowDiff; }
276 inline bool operator==( const ScHTMLSize& rSize1, const ScHTMLSize& rSize2 )
278 return (rSize1.mnRows == rSize2.mnRows) && (rSize1.mnCols == rSize2.mnCols);
281 // ============================================================================
283 /** A single entry containing a line of text or representing a table. */
284 struct ScHTMLEntry : public ScEEParseEntry
286 public:
287 explicit ScHTMLEntry(
288 const SfxItemSet& rItemSet,
289 ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
291 /** Returns true, if the selection of the entry is empty. */
292 inline bool IsEmpty() const { return !aSel.HasRange(); }
293 /** Returns true, if the entry has any content to be imported. */
294 bool HasContents() const;
295 /** Returns true, if the entry represents a table. */
296 inline bool IsTable() const { return nTab != SC_HTML_NO_TABLE; }
297 /** Returns true, if the entry represents a table. */
298 inline ScHTMLTableId GetTableId() const { return nTab; }
300 /** Sets or cleares the import always state. */
301 inline void SetImportAlways( bool bSet = true ) { mbImportAlways = bSet; }
302 /** Sets start point of the entry selection to the start of the import info object. */
303 void AdjustStart( const ImportInfo& rInfo );
304 /** Sets end point of the entry selection to the end of the import info object. */
305 void AdjustEnd( const ImportInfo& rInfo );
306 /** Deletes leading and trailing empty paragraphs from the entry. */
307 void Strip( const EditEngine& rEditEngine );
309 /** Returns read/write access to the item set of this entry. */
310 inline SfxItemSet& GetItemSet() { return aItemSet; }
311 /** Returns read-only access to the item set of this entry. */
312 inline const SfxItemSet& GetItemSet() const { return aItemSet; }
314 private:
315 bool mbImportAlways; /// true = Always import this entry.
318 // ============================================================================
320 /** This struct handles creation of unique table identifiers. */
321 struct ScHTMLTableAutoId
323 const ScHTMLTableId mnTableId; /// The created unique table identifier.
324 ScHTMLTableId& mrnUnusedId; /// Reference to global unused identifier variable.
326 /** The constructor assigns an unused identifier to member mnTableId. */
327 explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
330 // ----------------------------------------------------------------------------
332 class ScHTMLTableMap;
334 /** Stores data for one table in an HTML document.
336 This class does the main work for importing an HTML document. It manages
337 the correct insertion of parse entries into the correct cells and the
338 creation of nested tables. Recalculation of resulting document size and
339 position is done recursively in all nested tables.
341 class ScHTMLTable
343 public:
344 /** Creates a new HTML table without content.
345 @descr Internally handles a current cell position. This position is
346 invalid until first calls of RowOn() and DataOn().
347 @param rParentTable Reference to the parent table that owns this table.
348 @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
349 explicit ScHTMLTable(
350 ScHTMLTable& rParentTable,
351 const ImportInfo& rInfo,
352 bool bPreFormText );
354 virtual ~ScHTMLTable();
356 /** Returns the name of the table, specified in the TABLE tag. */
357 inline const String& GetTableName() const { return maTableName; }
358 /** Returns the unique identifier of the table. */
359 inline ScHTMLTableId GetTableId() const { return maTableId.mnTableId; }
360 /** Returns the table size. */
361 inline const ScHTMLSize& GetSize() const { return maSize; }
362 /** Returns the cell spanning of the specified cell. */
363 ScHTMLSize GetSpan( const ScHTMLPos& rCellPos ) const;
365 /** Searches in all nested tables for the specified table.
366 @param nTableId Unique identifier of the table. */
367 ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const;
369 /** Puts the item into the item set of the current entry. */
370 void PutItem( const SfxPoolItem& rItem );
371 /** Inserts a text portion into current entry. */
372 void PutText( const ImportInfo& rInfo );
373 /** Inserts a new line, if in preformatted text, else does nothing. */
374 void InsertPara( const ImportInfo& rInfo );
376 /** Inserts a line break (<br> tag).
377 @descr Inserts the current entry regardless if it is empty. */
378 void BreakOn();
379 /** Inserts a heading line (<p> and <h*> tags). */
380 void HeadingOn();
381 /** Processes a hyperlink (<a> tag). */
382 void AnchorOn();
384 /** Starts a *new* table nested in this table (<table> tag).
385 @return Pointer to the new table. */
386 ScHTMLTable* TableOn( const ImportInfo& rInfo );
387 /** Closes *this* table (</table> tag).
388 @return Pointer to the parent table. */
389 ScHTMLTable* TableOff( const ImportInfo& rInfo );
390 /** Starts a *new* table based on preformatted text (<pre> tag).
391 @return Pointer to the new table. */
392 ScHTMLTable* PreOn( const ImportInfo& rInfo );
393 /** Closes *this* table based on preformatted text (</pre> tag).
394 @return Pointer to the parent table. */
395 ScHTMLTable* PreOff( const ImportInfo& rInfo );
397 /** Starts next row (<tr> tag).
398 @descr Cell address is invalid until first call of DataOn(). */
399 void RowOn( const ImportInfo& rInfo );
400 /** Closes the current row (<tr> tag).
401 @descr Cell address is invalid until call of RowOn() and DataOn(). */
402 void RowOff( const ImportInfo& rInfo );
403 /** Starts the next cell (<td> or <th> tag). */
404 void DataOn( const ImportInfo& rInfo );
405 /** Closes the current cell (</td> or </th> tag).
406 @descr Cell address is invalid until next call of DataOn(). */
407 void DataOff( const ImportInfo& rInfo );
409 /** Starts the body of the HTML document (<body> tag). */
410 void BodyOn( const ImportInfo& rInfo );
411 /** Closes the body of the HTML document (</body> tag). */
412 void BodyOff( const ImportInfo& rInfo );
414 /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
415 @descr Used to close this table object regardless on opening tag type.
416 @return Pointer to the parent table, or this, if no parent found. */
417 ScHTMLTable* CloseTable( const ImportInfo& rInfo );
419 /** Returns the resulting document row/column count of the specified HTML row/column. */
420 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
421 /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
422 SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
423 /** Returns the total document row/column count in the specified direction. */
424 SCCOLROW GetDocSize( ScHTMLOrient eOrient ) const;
425 /** Returns the total document row/column count of the specified HTML cell. */
426 ScHTMLSize GetDocSize( const ScHTMLPos& rCellPos ) const;
428 /** Returns the resulting Calc position of the top left edge of the table. */
429 inline const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
430 /** Calculates the resulting Calc position of the specified HTML column/row. */
431 SCCOLROW GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos = 0 ) const;
432 /** Calculates the resulting Calc position of the specified HTML cell. */
433 ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const;
435 /** Calculates the current Calc document area of this table. */
436 void GetDocRange( ScRange& rRange ) const;
438 /** Applies border formatting to the passed document. */
439 void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
441 protected:
442 /** Creates a new HTML table without parent.
443 @descr This constructor is used to create the "global table". */
444 explicit ScHTMLTable(
445 SfxItemPool& rPool,
446 EditEngine& rEditEngine,
447 ScEEParseList& rEEParseList,
448 ScHTMLTableId& rnUnusedId );
450 /** Fills all empty cells in this and nested tables with dummy parse entries. */
451 void FillEmptyCells();
452 /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
453 void RecalcDocSize();
454 /** Recalculates the position of all cell entries and nested tables.
455 @param rBasePos The origin of the table in the Calc document. */
456 void RecalcDocPos( const ScHTMLPos& rBasePos );
458 private:
459 typedef ::std::auto_ptr< ScHTMLTableMap > ScHTMLTableMapPtr;
460 typedef ::std::auto_ptr< SfxItemSet > SfxItemSetPtr;
461 typedef ::std::vector< SCCOLROW > ScSizeVec;
462 typedef ::std::list< ScHTMLEntry* > ScHTMLEntryList;
463 typedef ::std::map< ScHTMLPos, ScHTMLEntryList > ScHTMLEntryMap;
464 typedef ::std::auto_ptr< ScHTMLEntry > ScHTMLEntryPtr;
466 /** Returns true, if the current cell does not contain an entry yet. */
467 bool IsEmptyCell() const;
468 /** Returns the item set from cell, row, or table, depending on current state. */
469 const SfxItemSet& GetCurrItemSet() const;
471 /** Returns true, if import info represents a space character. */
472 static bool IsSpaceCharInfo( const ImportInfo& rInfo );
474 /** Creates and returns a new empty flying entry at position (0,0). */
475 ScHTMLEntryPtr CreateEntry() const;
476 /** Creates a new flying entry.
477 @param rInfo Contains the initial edit engine selection for the entry. */
478 void CreateNewEntry( const ImportInfo& rInfo );
480 /** Inserts an empty line in front of the next entry. */
481 void InsertLeadingEmptyLine();
483 /** Pushes the passed entry into the list of the current cell. */
484 void ImplPushEntryToList( ScHTMLEntryList& rEntryList, ScHTMLEntryPtr& rxEntry );
485 /** Tries to insert the entry into the current cell.
486 @descr If insertion is not possible (i.e., currently no cell open), the
487 entry will be inserted into the parent table.
488 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
489 bool PushEntry( ScHTMLEntryPtr& rxEntry );
490 /** Puts the current entry into the entry list, if it is not empty.
491 @param rInfo The import info struct containing the end position of the current entry.
492 @param bLastInCell true = If cell is still empty, put this entry always.
493 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
494 bool PushEntry( const ImportInfo& rInfo, bool bLastInCell = false );
495 /** Pushes a new entry into current cell which references a nested table.
496 @return true = Entry as been pushed into the current cell; false = Entry dropped. */
497 bool PushTableEntry( ScHTMLTableId nTableId );
499 /** Tries to find a table from the table container.
500 @descr Assumes that the table is located in the current container or
501 that the passed table identifier is 0.
502 @param nTableId Unique identifier of the table or 0. */
503 ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const;
504 /** Inserts a nested table in the current cell at the specified position.
505 @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
506 ScHTMLTable* InsertNestedTable( const ImportInfo& rInfo, bool bPreFormText );
508 /** Inserts a new cell in an unused position, starting from current cell position. */
509 void InsertNewCell( const ScHTMLSize& rSpanSize );
511 /** Set internal states for a new table row. */
512 void ImplRowOn();
513 /** Set internal states for leaving a table row. */
514 void ImplRowOff();
515 /** Set internal states for entering a new table cell. */
516 void ImplDataOn( const ScHTMLSize& rSpanSize );
517 /** Set internal states for leaving a table cell. */
518 void ImplDataOff();
520 /** Inserts additional formatting options from import info into the item set. */
521 void ProcessFormatOptions( SfxItemSet& rItemSet, const ImportInfo& rInfo );
523 /** Updates the document column/row size of the specified column or row.
524 @descr Only increases the present count, never decreases. */
525 void SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
526 /** Calculates and sets the resulting size the cell needs in the document.
527 @descr Reduces the needed size in merged cells.
528 @param nCellPos The first column/row position of the (merged) cell.
529 @param nCellSpan The cell spanning in the specified orientation.
530 @param nRealDocSize The raw document size of all entries of the cell. */
531 void CalcNeededDocSize(
532 ScHTMLOrient eOrient, SCCOLROW nCellPos,
533 SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
535 private:
536 ScHTMLTable* mpParentTable; /// Pointer to parent table.
537 ScHTMLTableMapPtr mxNestedTables; /// Table of nested HTML tables.
538 String maTableName; /// Table name from <table id> option.
539 ScHTMLTableAutoId maTableId; /// Unique identifier of this table.
540 SfxItemSet maTableItemSet; /// Items for the entire table.
541 SfxItemSetPtr mxRowItemSet; /// Items for the current table row.
542 SfxItemSetPtr mxDataItemSet; /// Items for the current cell.
543 ScRangeList maHMergedCells; /// List of all horizontally merged cells.
544 ScRangeList maVMergedCells; /// List of all vertically merged cells.
545 ScRangeList maUsedCells; /// List of all used cells.
546 EditEngine& mrEditEngine; /// Edit engine (from ScEEParser).
547 ScEEParseList& mrEEParseList; /// List that owns the parse entries (from ScEEParser).
548 ScHTMLEntryMap maEntryMap; /// List of entries for each cell.
549 ScHTMLEntryList* mpCurrEntryList; /// Current entry list from map for faster access.
550 ScHTMLEntryPtr mxCurrEntry; /// Working entry, not yet inserted in a list.
551 ScSizeVec maCumSizes[ 2 ]; /// Cumulated cell counts for each HTML table column/row.
552 ScHTMLSize maSize; /// Size of the table.
553 ScHTMLPos maCurrCell; /// Address of current cell to fill.
554 ScHTMLPos maDocBasePos; /// Resulting base address in a Calc document.
555 bool mbBorderOn; /// true = Table borders on.
556 bool mbPreFormText; /// true = Table from preformatted text (<pre> tag).
557 bool mbRowOn; /// true = Inside of <tr> </tr>.
558 bool mbDataOn; /// true = Inside of <td> </td> or <th> </th>.
559 bool mbPushEmptyLine; /// true = Insert empty line before current entry.
562 // ----------------------------------------------------------------------------
564 /** The "global table" representing the entire HTML document. */
565 class ScHTMLGlobalTable : public ScHTMLTable
567 public:
568 explicit ScHTMLGlobalTable(
569 SfxItemPool& rPool,
570 EditEngine& rEditEngine,
571 ScEEParseList& rEEParseList,
572 ScHTMLTableId& rnUnusedId );
574 virtual ~ScHTMLGlobalTable();
576 /** Recalculates sizes and resulting positions of all document entries. */
577 void Recalc();
580 // ============================================================================
582 /** The HTML parser for data queries. Focuses on data import, not on layout.
584 Builds the table structure correctly, ignores extended formatting like
585 pictures or column widths.
587 class ScHTMLQueryParser : public ScHTMLParser
589 public:
590 explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
591 virtual ~ScHTMLQueryParser();
593 virtual ULONG Read( SvStream& rStrm, const String& rBaseURL );
595 /** Returns the "global table" which contains the entire HTML document. */
596 virtual const ScHTMLTable* GetGlobalTable() const;
598 private:
599 /** Handles all possible tags in the HTML document. */
600 void ProcessToken( const ImportInfo& rInfo );
601 /** Inserts a text portion into current entry. */
602 void InsertText( const ImportInfo& rInfo );
603 /** Processes the <font> tag. */
604 void FontOn( const ImportInfo& rInfo );
606 /** Processes the <meta> tag. */
607 void MetaOn( const ImportInfo& rInfo );
608 /** Opens the title of the HTML document (<title> tag). */
609 void TitleOn( const ImportInfo& rInfo );
610 /** Closes the title of the HTML document (</title> tag). */
611 void TitleOff( const ImportInfo& rInfo );
613 /** Opens a new table at the current position. */
614 void TableOn( const ImportInfo& rInfo );
615 /** Closes the current table. */
616 void TableOff( const ImportInfo& rInfo );
617 /** Opens a new table based on preformatted text. */
618 void PreOn( const ImportInfo& rInfo );
619 /** Closes the current preformatted text table. */
620 void PreOff( const ImportInfo& rInfo );
622 /** Closes the current table, regardless on opening tag. */
623 void CloseTable( const ImportInfo& rInfo );
625 DECL_LINK( HTMLImportHdl, const ImportInfo* );
627 private:
628 typedef ::std::auto_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr;
630 String maTitle; /// The title of the document.
631 ScHTMLGlobalTablePtr mxGlobTable; /// Contains the entire imported document.
632 ScHTMLTable* mpCurrTable; /// Pointer to current table (performance).
633 ScHTMLTableId mnUnusedId; /// First unused table identifier.
634 bool mbTitleOn; /// true = Inside of <title> </title>.
638 // ============================================================================
640 #endif