Update ooo320-m1
[ooovba.git] / dbaccess / source / ui / misc / HtmlReader.cxx
blob1bf5f4d9d09604e387cd0105438c85f01f34afe8
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: HtmlReader.cxx,v $
10 * $Revision: 1.34 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_dbaccess.hxx"
33 #include "HtmlReader.hxx"
34 #include <connectivity/dbconversion.hxx>
35 #include <connectivity/dbtools.hxx>
36 #include <tools/tenccvt.hxx>
37 #include <comphelper/extract.hxx>
38 #include "dbu_misc.hrc"
39 #include "dbustrings.hrc"
40 #include <sfx2/sfxhtml.hxx>
41 #include <tools/debug.hxx>
42 #include <tools/tenccvt.hxx>
43 #include "moduledbu.hxx"
44 #include <com/sun/star/sdbcx/XDataDescriptorFactory.hpp>
45 #include <com/sun/star/sdbcx/XColumnsSupplier.hpp>
46 #include <com/sun/star/sdbcx/XAppend.hpp>
47 #include <com/sun/star/sdbc/DataType.hpp>
48 #include <com/sun/star/sdbc/ColumnValue.hpp>
49 #include <com/sun/star/awt/FontDescriptor.hpp>
50 #include <com/sun/star/awt/FontWeight.hpp>
51 #include <com/sun/star/awt/FontStrikeout.hpp>
52 #include <com/sun/star/awt/FontSlant.hpp>
53 #include <com/sun/star/awt/FontUnderline.hpp>
54 #include <com/sun/star/util/NumberFormat.hpp>
55 #include <com/sun/star/util/XNumberFormatTypes.hpp>
56 #include <svtools/htmltokn.h>
57 #include <svtools/htmlkywd.hxx>
58 #include <tools/color.hxx>
59 #include "WCopyTable.hxx"
60 #include "WExtendPages.hxx"
61 #include "WNameMatch.hxx"
62 #include "WColumnSelect.hxx"
63 #include "QEnumTypes.hxx"
64 #include "WCPage.hxx"
65 #include <tools/inetmime.hxx>
66 #include <svtools/inettype.hxx>
67 #include <rtl/tencinfo.h>
68 #include "UITools.hxx"
69 #include <vcl/svapp.hxx>
70 #include <rtl/logfile.hxx>
72 using namespace dbaui;
73 using namespace ::com::sun::star::uno;
74 using namespace ::com::sun::star::beans;
75 using namespace ::com::sun::star::container;
76 using namespace ::com::sun::star::sdbc;
77 using namespace ::com::sun::star::sdbcx;
78 using namespace ::com::sun::star::awt;
80 #define DBAUI_HTML_FONTSIZES 8 // wie Export, HTML-Options
81 #define HTML_META_NONE 0
82 #define HTML_META_AUTHOR 1
83 #define HTML_META_DESCRIPTION 2
84 #define HTML_META_KEYWORDS 3
85 #define HTML_META_REFRESH 4
86 #define HTML_META_CLASSIFICATION 5
87 #define HTML_META_CREATED 6
88 #define HTML_META_CHANGEDBY 7
89 #define HTML_META_CHANGED 8
90 #define HTML_META_GENERATOR 9
91 #define HTML_META_SDFOOTNOTE 10
92 #define HTML_META_SDENDNOTE 11
93 #define HTML_META_CONTENT_TYPE 12
95 // ==========================================================================
96 DBG_NAME(OHTMLReader)
97 // ==========================================================================
98 // OHTMLReader
99 // ==========================================================================
100 OHTMLReader::OHTMLReader(SvStream& rIn,const SharedConnection& _rxConnection,
101 const Reference< ::com::sun::star::util::XNumberFormatter >& _rxNumberF,
102 const ::com::sun::star::uno::Reference< ::com::sun::star::lang::XMultiServiceFactory >& _rM,
103 const TColumnVector* pList,
104 const OTypeInfoMap* _pInfoMap)
105 :HTMLParser(rIn)
106 ,ODatabaseExport( _rxConnection, _rxNumberF, _rM, pList, _pInfoMap, rIn )
107 ,m_nTableCount(0)
108 ,m_nColumnWidth(87)
109 ,m_bMetaOptions(sal_False)
110 ,m_bSDNum(sal_False)
112 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::OHTMLReader" );
113 DBG_CTOR(OHTMLReader,NULL);
114 SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1 ) );
115 // If the file starts with a BOM, switch to UCS2.
116 SetSwitchToUCS2( TRUE );
118 // ---------------------------------------------------------------------------
119 OHTMLReader::OHTMLReader(SvStream& rIn,
120 sal_Int32 nRows,
121 const TPositions &_rColumnPositions,
122 const Reference< ::com::sun::star::util::XNumberFormatter >& _rxNumberF,
123 const ::com::sun::star::uno::Reference< ::com::sun::star::lang::XMultiServiceFactory >& _rM,
124 const TColumnVector* pList,
125 const OTypeInfoMap* _pInfoMap,
126 sal_Bool _bAutoIncrementEnabled)
127 :HTMLParser(rIn)
128 ,ODatabaseExport( nRows, _rColumnPositions, _rxNumberF, _rM, pList, _pInfoMap, _bAutoIncrementEnabled, rIn )
129 ,m_nTableCount(0)
130 ,m_nColumnWidth(87)
131 ,m_bMetaOptions(sal_False)
132 ,m_bSDNum(sal_False)
134 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::OHTMLReader" );
135 DBG_CTOR(OHTMLReader,NULL);
136 SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1 ) );
137 // If the file starts with a BOM, switch to UCS2.
138 SetSwitchToUCS2( TRUE );
140 // ---------------------------------------------------------------------------
141 OHTMLReader::~OHTMLReader()
143 DBG_DTOR(OHTMLReader,NULL);
145 // ---------------------------------------------------------------------------
146 SvParserState OHTMLReader::CallParser()
148 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::CallParser" );
149 DBG_CHKTHIS(OHTMLReader,NULL);
150 rInput.Seek(STREAM_SEEK_TO_BEGIN);
151 rInput.ResetError();
152 SvParserState eParseState = HTMLParser::CallParser();
153 SetColumnTypes(m_pColumnList,m_pInfoMap);
154 return m_bFoundTable ? eParseState : SVPAR_ERROR;
156 // -----------------------------------------------------------------------------
157 void OHTMLReader::NextToken( int nToken )
159 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::NextToken" );
160 DBG_CHKTHIS(OHTMLReader,NULL);
161 if(m_bError || !m_nRows) // falls Fehler oder keine Rows mehr zur "Uberpr"ufung dann gleich zur"uck
162 return;
163 if ( nToken == HTML_META )
164 setTextEncoding();
166 if(m_xConnection.is()) // gibt an welcher CTOR gerufen wurde und damit, ob eine Tabelle erstellt werden soll
168 switch(nToken)
170 case HTML_TABLE_ON:
171 ++m_nTableCount;
172 { // es kann auch TD oder TH sein, wenn es vorher kein TABLE gab
173 const HTMLOptions* pHtmlOptions = GetOptions();
174 sal_Int16 nArrLen = pHtmlOptions->Count();
175 for ( sal_Int16 i = 0; i < nArrLen; i++ )
177 const HTMLOption* pOption = (*pHtmlOptions)[i];
178 switch( pOption->GetToken() )
180 case HTML_O_WIDTH:
181 { // Prozent: von Dokumentbreite bzw. aeusserer Zelle
182 m_nColumnWidth = GetWidthPixel( pOption );
184 break;
188 case HTML_THEAD_ON:
189 case HTML_TBODY_ON:
191 sal_uInt32 nTell = rInput.Tell(); // verändert vielleicht die Position des Streams
192 if ( !m_xTable.is() )
193 {// erste Zeile als Header verwenden
194 m_bError = !CreateTable(nToken);
195 if ( m_bAppendFirstLine )
196 rInput.Seek(nTell);
199 break;
200 case HTML_TABLE_OFF:
201 if(!--m_nTableCount)
203 m_xTable = NULL;
205 break;
206 case HTML_TABLEROW_ON:
207 if ( m_pUpdateHelper.get() )
211 m_pUpdateHelper->moveToInsertRow(); // sonst neue Zeile anh"angen
213 catch(SQLException& e)
214 // UpdateFehlerbehandlung
216 showErrorDialog(e);
219 else
220 m_bError = sal_True;
221 break;
222 case HTML_TEXTTOKEN:
223 case HTML_SINGLECHAR:
224 if ( m_bInTbl ) //&& !m_bSDNum ) // wichtig, da wir sonst auch die Namen der Fonts bekommen
225 m_sTextToken += aToken;
226 break;
227 case HTML_PARABREAK_OFF:
228 m_sCurrent += m_sTextToken;
229 break;
230 case HTML_PARABREAK_ON:
231 m_sTextToken.Erase();
232 break;
233 case HTML_TABLEDATA_ON:
234 fetchOptions();
235 break;
236 case HTML_TABLEDATA_OFF:
238 if ( m_sCurrent.Len() )
239 m_sTextToken = m_sCurrent;
242 insertValueIntoColumn();
244 catch(SQLException& e)
245 // UpdateFehlerbehandlung
247 showErrorDialog(e);
249 m_sCurrent.Erase();
250 m_nColumnPos++;
251 eraseTokens();
252 m_bSDNum = m_bInTbl = sal_False;
254 break;
255 case HTML_TABLEROW_OFF:
256 if ( !m_pUpdateHelper.get() )
258 m_bError = sal_True;
259 break;
263 m_nRowCount++;
264 if (m_bIsAutoIncrement) // if bSetAutoIncrement then I have to set the autoincrement
265 m_pUpdateHelper->updateInt(1,m_nRowCount);
266 m_pUpdateHelper->insertRow();
268 catch(SQLException& e)
269 //////////////////////////////////////////////////////////////////////
270 // UpdateFehlerbehandlung
272 showErrorDialog(e);
274 m_nColumnPos = 0;
275 break;
278 else // Zweig nur f"ur Typpr"ufung g"ultig
280 switch(nToken)
282 case HTML_THEAD_ON:
283 case HTML_TBODY_ON:
284 // Der Spalten Kopf z"ahlt nicht mit
285 if(m_bHead)
289 while(GetNextToken() != HTML_TABLEROW_OFF);
290 m_bHead = sal_False;
292 break;
293 case HTML_TABLEDATA_ON:
294 case HTML_TABLEHEADER_ON:
295 fetchOptions();
296 break;
297 case HTML_TEXTTOKEN:
298 case HTML_SINGLECHAR:
299 if ( m_bInTbl ) // && !m_bSDNum ) // wichtig, da wir sonst auch die Namen der Fonts bekommen
300 m_sTextToken += aToken;
301 break;
302 case HTML_PARABREAK_OFF:
303 m_sCurrent += m_sTextToken;
304 break;
305 case HTML_PARABREAK_ON:
306 m_sTextToken.Erase();
307 break;
308 case HTML_TABLEDATA_OFF:
309 if ( m_sCurrent.Len() )
310 m_sTextToken = m_sCurrent;
311 adjustFormat();
312 m_nColumnPos++;
313 m_bSDNum = m_bInTbl = sal_False;
314 m_sCurrent.Erase();
315 break;
316 case HTML_TABLEROW_OFF:
317 if ( m_sCurrent.Len() )
318 m_sTextToken = m_sCurrent;
319 adjustFormat();
320 m_nColumnPos = 0;
321 m_nRows--;
322 m_sCurrent.Erase();
323 break;
327 // -----------------------------------------------------------------------------
328 void OHTMLReader::fetchOptions()
330 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::fetchOptions" );
331 m_bInTbl = TRUE;
332 const HTMLOptions* options = GetOptions();
333 sal_Int16 nArrLen = options->Count();
334 for ( sal_Int16 i = 0; i < nArrLen; i++ )
336 const HTMLOption* pOption = (*options)[i];
337 switch( pOption->GetToken() )
339 case HTML_O_SDVAL:
341 m_sValToken = pOption->GetString();
342 //m_sTextToken = pOption->GetString();
343 m_bSDNum = sal_True;
345 break;
346 case HTML_O_SDNUM:
347 m_sNumToken = pOption->GetString();
348 break;
352 //---------------------------------------------------------------------------------
353 void OHTMLReader::TableDataOn(SvxCellHorJustify& eVal,int nToken)
355 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::TableDataOn" );
356 DBG_CHKTHIS(OHTMLReader,NULL);
357 sal_Bool bHorJustifyCenterTH = (nToken == HTML_TABLEHEADER_ON);
358 const HTMLOptions* pHtmlOptions = GetOptions();
359 sal_Int16 nArrLen = pHtmlOptions->Count();
360 for ( sal_Int16 i = 0; i < nArrLen; i++ )
362 const HTMLOption* pOption = (*pHtmlOptions)[i];
363 switch( pOption->GetToken() )
365 case HTML_O_ALIGN:
367 bHorJustifyCenterTH = sal_False;
368 const String& rOptVal = pOption->GetString();
369 if (rOptVal.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_AL_right ))
370 eVal = SVX_HOR_JUSTIFY_RIGHT;
371 else if (rOptVal.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_AL_center ))
372 eVal = SVX_HOR_JUSTIFY_CENTER;
373 else if (rOptVal.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_AL_left ))
374 eVal = SVX_HOR_JUSTIFY_LEFT;
375 else
376 eVal = SVX_HOR_JUSTIFY_STANDARD;
378 break;
379 case HTML_O_WIDTH:
380 m_nWidth = GetWidthPixel( pOption );
381 break;
386 //---------------------------------------------------------------------------------
387 void OHTMLReader::TableFontOn(FontDescriptor& _rFont,sal_Int32 &_rTextColor)
389 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::TableFontOn" );
390 DBG_CHKTHIS(OHTMLReader,NULL);
391 const HTMLOptions* pHtmlOptions = GetOptions();
392 sal_Int16 nArrLen = pHtmlOptions->Count();
393 for ( sal_Int16 i = 0; i < nArrLen; i++ )
395 const HTMLOption* pOption = (*pHtmlOptions)[i];
396 switch( pOption->GetToken() )
398 case HTML_O_COLOR:
400 Color aColor;
401 pOption->GetColor( aColor );
402 _rTextColor = aColor.GetRGBColor();
404 break;
405 case HTML_O_FACE :
407 const String& rFace = pOption->GetString();
408 String aFontName;
409 xub_StrLen nPos = 0;
410 while( nPos != STRING_NOTFOUND )
411 { // Fontliste, VCL: Semikolon als Separator, HTML: Komma
412 String aFName = rFace.GetToken( 0, ',', nPos );
413 aFName.EraseTrailingChars().EraseLeadingChars();
414 if( aFontName.Len() )
415 aFontName += ';';
416 aFontName += aFName;
418 if ( aFontName.Len() )
419 _rFont.Name = ::rtl::OUString(aFontName);
421 break;
422 case HTML_O_SIZE :
424 sal_Int16 nSize = (sal_Int16) pOption->GetNumber();
425 if ( nSize == 0 )
426 nSize = 1;
427 else if ( nSize < DBAUI_HTML_FONTSIZES )
428 nSize = DBAUI_HTML_FONTSIZES;
430 _rFont.Height = nSize;
432 break;
436 // ---------------------------------------------------------------------------
437 sal_Int16 OHTMLReader::GetWidthPixel( const HTMLOption* pOption )
439 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::GetWidthPixel" );
440 DBG_CHKTHIS(OHTMLReader,NULL);
441 const String& rOptVal = pOption->GetString();
442 if ( rOptVal.Search('%') != STRING_NOTFOUND )
443 { // Prozent
444 DBG_ASSERT( m_nColumnWidth, "WIDTH Option: m_nColumnWidth==0 und Width%" );
445 return (sal_Int16)((pOption->GetNumber() * m_nColumnWidth) / 100);
447 else
449 if ( rOptVal.Search('*') != STRING_NOTFOUND )
450 { // relativ zu was?!?
451 //2do: ColArray aller relativen Werte sammeln und dann MakeCol
452 return 0;
454 else
455 return (sal_Int16)pOption->GetNumber(); // Pixel
458 // ---------------------------------------------------------------------------
459 sal_Bool OHTMLReader::CreateTable(int nToken)
461 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::CreateTable" );
462 DBG_CHKTHIS(OHTMLReader,NULL);
463 String aTempName(ModuleRes(STR_TBL_TITLE));
464 aTempName = aTempName.GetToken(0,' ');
465 aTempName = String(::dbtools::createUniqueName(m_xTables,::rtl::OUString(aTempName )));
467 int nTmpToken2 = nToken;
468 sal_Bool bCaption = sal_False;
469 sal_Bool bTableHeader = sal_False;
470 String aColumnName;
471 SvxCellHorJustify eVal;
473 String aTableName;
474 FontDescriptor aFont = ::dbaui::CreateFontDescriptor(Application::GetSettings().GetStyleSettings().GetAppFont());
475 sal_Int32 nTextColor = 0;
478 switch(nTmpToken2)
480 case HTML_TEXTTOKEN:
481 case HTML_SINGLECHAR:
482 if(bTableHeader)
483 aColumnName += aToken;
484 if(bCaption)
485 aTableName += aToken;
486 break;
487 case HTML_PARABREAK_OFF:
488 m_sCurrent += aColumnName;
489 break;
490 case HTML_PARABREAK_ON:
491 m_sTextToken.Erase();
492 break;
493 case HTML_TABLEDATA_ON:
494 // m_bAppendFirstLine = true;
495 // run through
496 case HTML_TABLEHEADER_ON:
497 TableDataOn(eVal,nTmpToken2);
498 bTableHeader = TRUE;
499 break;
500 case HTML_TABLEDATA_OFF:
501 // m_bAppendFirstLine = true;
502 // run through
503 case HTML_TABLEHEADER_OFF:
505 aColumnName.EraseLeadingChars();
506 aColumnName.EraseTrailingChars();
507 if (!aColumnName.Len() || m_bAppendFirstLine )
508 aColumnName = String(ModuleRes(STR_COLUMN_NAME));
509 else if ( m_sCurrent.Len() )
510 aColumnName = m_sCurrent;
512 CreateDefaultColumn(aColumnName);
513 aColumnName.Erase();
514 m_sCurrent.Erase();
516 eVal = SVX_HOR_JUSTIFY_STANDARD;
517 bTableHeader = sal_False;
519 break;
521 case HTML_TITLE_ON:
522 case HTML_CAPTION_ON:
523 bCaption = TRUE;
524 break;
525 case HTML_TITLE_OFF:
526 case HTML_CAPTION_OFF:
527 aTableName.EraseLeadingChars();
528 aTableName.EraseTrailingChars();
529 if(!aTableName.Len())
530 aTableName = String(::dbtools::createUniqueName(m_xTables,::rtl::OUString(aTableName)));
531 else
532 aTableName = aTempName;
533 bCaption = sal_False;
534 break;
535 case HTML_FONT_ON:
536 TableFontOn(aFont,nTextColor);
537 break;
538 case HTML_BOLD_ON:
539 aFont.Weight = ::com::sun::star::awt::FontWeight::BOLD;
540 break;
541 case HTML_ITALIC_ON:
542 aFont.Slant = ::com::sun::star::awt::FontSlant_ITALIC;
543 break;
544 case HTML_UNDERLINE_ON:
545 aFont.Underline = ::com::sun::star::awt::FontUnderline::SINGLE;
546 break;
547 case HTML_STRIKE_ON:
548 aFont.Strikeout = ::com::sun::star::awt::FontStrikeout::SINGLE;
549 break;
552 while((nTmpToken2 = GetNextToken()) != HTML_TABLEROW_OFF);
554 if ( m_sCurrent.Len() )
555 aColumnName = m_sCurrent;
556 if(aColumnName.Len())
557 CreateDefaultColumn(aColumnName);
559 if ( m_vDestVector.empty() )
560 return sal_False;
562 if(!aTableName.Len())
563 aTableName = aTempName;
565 m_bInTbl = sal_False;
566 m_bFoundTable = sal_True;
568 if ( isCheckEnabled() )
569 return sal_True;
571 return !executeWizard(aTableName,makeAny(nTextColor),aFont) && m_xTable.is();
573 // -----------------------------------------------------------------------------
574 void OHTMLReader::setTextEncoding()
576 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::setTextEncoding" );
577 DBG_CHKTHIS(OHTMLReader,NULL);
578 m_bMetaOptions = sal_True;
579 ParseMetaOptions(NULL, NULL);
582 // -----------------------------------------------------------------------------
583 void OHTMLReader::release()
585 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::release" );
586 DBG_CHKTHIS(OHTMLReader,NULL);
587 ReleaseRef();
589 // -----------------------------------------------------------------------------
590 TypeSelectionPageFactory OHTMLReader::getTypeSelectionPageFactory()
592 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::getTypeSelectionPageFactory" );
593 DBG_CHKTHIS(OHTMLReader,NULL);
594 return &OWizHTMLExtend::Create;
596 // -----------------------------------------------------------------------------