Version 3.6.0.2, tag libreoffice-3.6.0.2
[LibreOffice.git] / dbaccess / source / ui / misc / HtmlReader.cxx
blob56648b980475d8520082f071a40bcc6b19f16507
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #include "HtmlReader.hxx"
30 #include <connectivity/dbconversion.hxx>
31 #include <connectivity/dbtools.hxx>
32 #include <tools/tenccvt.hxx>
33 #include <comphelper/extract.hxx>
34 #include "dbu_misc.hrc"
35 #include "dbustrings.hrc"
36 #include <sfx2/sfxhtml.hxx>
37 #include <osl/diagnose.h>
38 #include "moduledbu.hxx"
39 #include <com/sun/star/sdbcx/XDataDescriptorFactory.hpp>
40 #include <com/sun/star/sdbcx/XColumnsSupplier.hpp>
41 #include <com/sun/star/sdbcx/XAppend.hpp>
42 #include <com/sun/star/sdbc/DataType.hpp>
43 #include <com/sun/star/sdbc/ColumnValue.hpp>
44 #include <com/sun/star/awt/FontDescriptor.hpp>
45 #include <com/sun/star/awt/FontWeight.hpp>
46 #include <com/sun/star/awt/FontStrikeout.hpp>
47 #include <com/sun/star/awt/FontSlant.hpp>
48 #include <com/sun/star/awt/FontUnderline.hpp>
49 #include <com/sun/star/util/NumberFormat.hpp>
50 #include <com/sun/star/util/XNumberFormatTypes.hpp>
51 #include <svtools/htmltokn.h>
52 #include <svtools/htmlkywd.hxx>
53 #include <tools/color.hxx>
54 #include "WCopyTable.hxx"
55 #include "WExtendPages.hxx"
56 #include "WNameMatch.hxx"
57 #include "WColumnSelect.hxx"
58 #include "QEnumTypes.hxx"
59 #include "WCPage.hxx"
60 #include <tools/inetmime.hxx>
61 #include <svl/inettype.hxx>
62 #include <rtl/tencinfo.h>
63 #include "UITools.hxx"
64 #include <vcl/svapp.hxx>
65 #include <rtl/logfile.hxx>
67 using namespace dbaui;
68 using namespace ::com::sun::star::uno;
69 using namespace ::com::sun::star::beans;
70 using namespace ::com::sun::star::container;
71 using namespace ::com::sun::star::sdbc;
72 using namespace ::com::sun::star::sdbcx;
73 using namespace ::com::sun::star::awt;
75 #define DBAUI_HTML_FONTSIZES 8 // wie Export, HTML-Options
76 #define HTML_META_NONE 0
77 #define HTML_META_AUTHOR 1
78 #define HTML_META_DESCRIPTION 2
79 #define HTML_META_KEYWORDS 3
80 #define HTML_META_REFRESH 4
81 #define HTML_META_CLASSIFICATION 5
82 #define HTML_META_CREATED 6
83 #define HTML_META_CHANGEDBY 7
84 #define HTML_META_CHANGED 8
85 #define HTML_META_GENERATOR 9
86 #define HTML_META_SDFOOTNOTE 10
87 #define HTML_META_SDENDNOTE 11
88 #define HTML_META_CONTENT_TYPE 12
90 // ==========================================================================
91 DBG_NAME(OHTMLReader)
92 // ==========================================================================
93 // OHTMLReader
94 // ==========================================================================
95 OHTMLReader::OHTMLReader(SvStream& rIn,const SharedConnection& _rxConnection,
96 const Reference< ::com::sun::star::util::XNumberFormatter >& _rxNumberF,
97 const ::com::sun::star::uno::Reference< ::com::sun::star::lang::XMultiServiceFactory >& _rM,
98 const TColumnVector* pList,
99 const OTypeInfoMap* _pInfoMap)
100 :HTMLParser(rIn)
101 ,ODatabaseExport( _rxConnection, _rxNumberF, _rM, pList, _pInfoMap, rIn )
102 ,m_nTableCount(0)
103 ,m_nColumnWidth(87)
104 ,m_bMetaOptions(sal_False)
105 ,m_bSDNum(sal_False)
107 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::OHTMLReader" );
108 DBG_CTOR(OHTMLReader,NULL);
109 SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1 ) );
110 // If the file starts with a BOM, switch to UCS2.
111 SetSwitchToUCS2( sal_True );
113 // ---------------------------------------------------------------------------
114 OHTMLReader::OHTMLReader(SvStream& rIn,
115 sal_Int32 nRows,
116 const TPositions &_rColumnPositions,
117 const Reference< ::com::sun::star::util::XNumberFormatter >& _rxNumberF,
118 const ::com::sun::star::uno::Reference< ::com::sun::star::lang::XMultiServiceFactory >& _rM,
119 const TColumnVector* pList,
120 const OTypeInfoMap* _pInfoMap,
121 sal_Bool _bAutoIncrementEnabled)
122 :HTMLParser(rIn)
123 ,ODatabaseExport( nRows, _rColumnPositions, _rxNumberF, _rM, pList, _pInfoMap, _bAutoIncrementEnabled, rIn )
124 ,m_nTableCount(0)
125 ,m_nColumnWidth(87)
126 ,m_bMetaOptions(sal_False)
127 ,m_bSDNum(sal_False)
129 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::OHTMLReader" );
130 DBG_CTOR(OHTMLReader,NULL);
131 SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1 ) );
132 // If the file starts with a BOM, switch to UCS2.
133 SetSwitchToUCS2( sal_True );
135 // ---------------------------------------------------------------------------
136 OHTMLReader::~OHTMLReader()
138 DBG_DTOR(OHTMLReader,NULL);
140 // ---------------------------------------------------------------------------
141 SvParserState OHTMLReader::CallParser()
143 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::CallParser" );
144 DBG_CHKTHIS(OHTMLReader,NULL);
145 rInput.Seek(STREAM_SEEK_TO_BEGIN);
146 rInput.ResetError();
147 SvParserState eParseState = HTMLParser::CallParser();
148 SetColumnTypes(m_pColumnList,m_pInfoMap);
149 return m_bFoundTable ? eParseState : SVPAR_ERROR;
151 // -----------------------------------------------------------------------------
152 void OHTMLReader::NextToken( int nToken )
154 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::NextToken" );
155 DBG_CHKTHIS(OHTMLReader,NULL);
156 if(m_bError || !m_nRows) // falls Fehler oder keine Rows mehr zur "Uberpr"ufung dann gleich zur"uck
157 return;
158 if ( nToken == HTML_META )
159 setTextEncoding();
161 if(m_xConnection.is()) // gibt an welcher CTOR gerufen wurde und damit, ob eine Tabelle erstellt werden soll
163 switch(nToken)
165 case HTML_TABLE_ON:
166 ++m_nTableCount;
167 { // es kann auch TD oder TH sein, wenn es vorher kein TABLE gab
168 const HTMLOptions& rHtmlOptions = GetOptions();
169 for (size_t i = 0, n = rHtmlOptions.size(); i < n; ++i)
171 const HTMLOption& rOption = rHtmlOptions[i];
172 switch( rOption.GetToken() )
174 case HTML_O_WIDTH:
175 { // Prozent: von Dokumentbreite bzw. aeusserer Zelle
176 m_nColumnWidth = GetWidthPixel( rOption );
178 break;
182 case HTML_THEAD_ON:
183 case HTML_TBODY_ON:
185 sal_uInt32 nTell = rInput.Tell(); // ver�ndert vielleicht die Position des Streams
186 if ( !m_xTable.is() )
187 {// erste Zeile als Header verwenden
188 m_bError = !CreateTable(nToken);
189 if ( m_bAppendFirstLine )
190 rInput.Seek(nTell);
193 break;
194 case HTML_TABLE_OFF:
195 if(!--m_nTableCount)
197 m_xTable = NULL;
199 break;
200 case HTML_TABLEROW_ON:
201 if ( m_pUpdateHelper.get() )
205 m_pUpdateHelper->moveToInsertRow(); // sonst neue Zeile anh"angen
207 catch(SQLException& e)
208 // UpdateFehlerbehandlung
210 showErrorDialog(e);
213 else
214 m_bError = sal_True;
215 break;
216 case HTML_TEXTTOKEN:
217 case HTML_SINGLECHAR:
218 if ( m_bInTbl ) //&& !m_bSDNum ) // wichtig, da wir sonst auch die Namen der Fonts bekommen
219 m_sTextToken += aToken;
220 break;
221 case HTML_PARABREAK_OFF:
222 m_sCurrent += m_sTextToken;
223 break;
224 case HTML_PARABREAK_ON:
225 m_sTextToken.Erase();
226 break;
227 case HTML_TABLEDATA_ON:
228 fetchOptions();
229 break;
230 case HTML_TABLEDATA_OFF:
232 if ( m_sCurrent.Len() )
233 m_sTextToken = m_sCurrent;
236 insertValueIntoColumn();
238 catch(SQLException& e)
239 // UpdateFehlerbehandlung
241 showErrorDialog(e);
243 m_sCurrent.Erase();
244 m_nColumnPos++;
245 eraseTokens();
246 m_bSDNum = m_bInTbl = sal_False;
248 break;
249 case HTML_TABLEROW_OFF:
250 if ( !m_pUpdateHelper.get() )
252 m_bError = sal_True;
253 break;
257 m_nRowCount++;
258 if (m_bIsAutoIncrement) // if bSetAutoIncrement then I have to set the autoincrement
259 m_pUpdateHelper->updateInt(1,m_nRowCount);
260 m_pUpdateHelper->insertRow();
262 catch(SQLException& e)
263 //////////////////////////////////////////////////////////////////////
264 // UpdateFehlerbehandlung
266 showErrorDialog(e);
268 m_nColumnPos = 0;
269 break;
272 else // Zweig nur f"ur Typpr"ufung g"ultig
274 switch(nToken)
276 case HTML_THEAD_ON:
277 case HTML_TBODY_ON:
278 // Der Spalten Kopf z"ahlt nicht mit
279 if(m_bHead)
283 while(GetNextToken() != HTML_TABLEROW_OFF);
284 m_bHead = sal_False;
286 break;
287 case HTML_TABLEDATA_ON:
288 case HTML_TABLEHEADER_ON:
289 fetchOptions();
290 break;
291 case HTML_TEXTTOKEN:
292 case HTML_SINGLECHAR:
293 if ( m_bInTbl ) // && !m_bSDNum ) // wichtig, da wir sonst auch die Namen der Fonts bekommen
294 m_sTextToken += aToken;
295 break;
296 case HTML_PARABREAK_OFF:
297 m_sCurrent += m_sTextToken;
298 break;
299 case HTML_PARABREAK_ON:
300 m_sTextToken.Erase();
301 break;
302 case HTML_TABLEDATA_OFF:
303 if ( m_sCurrent.Len() )
304 m_sTextToken = m_sCurrent;
305 adjustFormat();
306 m_nColumnPos++;
307 m_bSDNum = m_bInTbl = sal_False;
308 m_sCurrent.Erase();
309 break;
310 case HTML_TABLEROW_OFF:
311 if ( m_sCurrent.Len() )
312 m_sTextToken = m_sCurrent;
313 adjustFormat();
314 m_nColumnPos = 0;
315 m_nRows--;
316 m_sCurrent.Erase();
317 break;
321 // -----------------------------------------------------------------------------
322 void OHTMLReader::fetchOptions()
324 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::fetchOptions" );
325 m_bInTbl = sal_True;
326 const HTMLOptions& options = GetOptions();
327 for (size_t i = 0, n = options.size(); i < n; ++i)
329 const HTMLOption& rOption = options[i];
330 switch( rOption.GetToken() )
332 case HTML_O_SDVAL:
334 m_sValToken = rOption.GetString();
335 m_bSDNum = sal_True;
337 break;
338 case HTML_O_SDNUM:
339 m_sNumToken = rOption.GetString();
340 break;
344 //---------------------------------------------------------------------------------
345 void OHTMLReader::TableDataOn(SvxCellHorJustify& eVal)
347 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::TableDataOn" );
348 DBG_CHKTHIS(OHTMLReader,NULL);
349 const HTMLOptions& rHtmlOptions = GetOptions();
350 for (size_t i = 0, n = rHtmlOptions.size(); i < n; ++i)
352 const HTMLOption& rOption = rHtmlOptions[i];
353 switch( rOption.GetToken() )
355 case HTML_O_ALIGN:
357 const String& rOptVal = rOption.GetString();
358 if (rOptVal.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_AL_right ))
359 eVal = SVX_HOR_JUSTIFY_RIGHT;
360 else if (rOptVal.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_AL_center ))
361 eVal = SVX_HOR_JUSTIFY_CENTER;
362 else if (rOptVal.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_AL_left ))
363 eVal = SVX_HOR_JUSTIFY_LEFT;
364 else
365 eVal = SVX_HOR_JUSTIFY_STANDARD;
367 break;
368 case HTML_O_WIDTH:
369 m_nWidth = GetWidthPixel( rOption );
370 break;
375 //---------------------------------------------------------------------------------
376 void OHTMLReader::TableFontOn(FontDescriptor& _rFont,sal_Int32 &_rTextColor)
378 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::TableFontOn" );
379 DBG_CHKTHIS(OHTMLReader,NULL);
380 const HTMLOptions& rHtmlOptions = GetOptions();
381 for (size_t i = 0, n = rHtmlOptions.size(); i < n; ++i)
383 const HTMLOption& rOption = rHtmlOptions[i];
384 switch( rOption.GetToken() )
386 case HTML_O_COLOR:
388 Color aColor;
389 rOption.GetColor( aColor );
390 _rTextColor = aColor.GetRGBColor();
392 break;
393 case HTML_O_FACE :
395 const String& rFace = rOption.GetString();
396 String aFontName;
397 xub_StrLen nPos = 0;
398 while( nPos != STRING_NOTFOUND )
399 { // Fontliste, VCL: Semikolon als Separator, HTML: Komma
400 String aFName = rFace.GetToken( 0, ',', nPos );
401 aFName.EraseTrailingChars().EraseLeadingChars();
402 if( aFontName.Len() )
403 aFontName += ';';
404 aFontName += aFName;
406 if ( aFontName.Len() )
407 _rFont.Name = ::rtl::OUString(aFontName);
409 break;
410 case HTML_O_SIZE :
412 sal_Int16 nSize = (sal_Int16) rOption.GetNumber();
413 if ( nSize == 0 )
414 nSize = 1;
415 else if ( nSize < DBAUI_HTML_FONTSIZES )
416 nSize = DBAUI_HTML_FONTSIZES;
418 _rFont.Height = nSize;
420 break;
424 // ---------------------------------------------------------------------------
425 sal_Int16 OHTMLReader::GetWidthPixel( const HTMLOption& rOption )
427 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::GetWidthPixel" );
428 DBG_CHKTHIS(OHTMLReader,NULL);
429 const String& rOptVal = rOption.GetString();
430 if ( rOptVal.Search('%') != STRING_NOTFOUND )
431 { // Prozent
432 OSL_ENSURE( m_nColumnWidth, "WIDTH Option: m_nColumnWidth==0 und Width%" );
433 return (sal_Int16)((rOption.GetNumber() * m_nColumnWidth) / 100);
435 else
437 if ( rOptVal.Search('*') != STRING_NOTFOUND )
438 { // relativ zu was?!?
439 //TODO: ColArray aller relativen Werte sammeln und dann MakeCol
440 return 0;
442 else
443 return (sal_Int16)rOption.GetNumber(); // Pixel
446 // ---------------------------------------------------------------------------
447 sal_Bool OHTMLReader::CreateTable(int nToken)
449 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::CreateTable" );
450 DBG_CHKTHIS(OHTMLReader,NULL);
451 String aTempName(ModuleRes(STR_TBL_TITLE));
452 aTempName = aTempName.GetToken(0,' ');
453 aTempName = String(::dbtools::createUniqueName(m_xTables,::rtl::OUString(aTempName )));
455 int nTmpToken2 = nToken;
456 sal_Bool bCaption = sal_False;
457 sal_Bool bTableHeader = sal_False;
458 String aColumnName;
459 SvxCellHorJustify eVal;
461 String aTableName;
462 FontDescriptor aFont = ::dbaui::CreateFontDescriptor(Application::GetSettings().GetStyleSettings().GetAppFont());
463 sal_Int32 nTextColor = 0;
466 switch(nTmpToken2)
468 case HTML_TEXTTOKEN:
469 case HTML_SINGLECHAR:
470 if(bTableHeader)
471 aColumnName += aToken;
472 if(bCaption)
473 aTableName += aToken;
474 break;
475 case HTML_PARABREAK_OFF:
476 m_sCurrent += aColumnName;
477 break;
478 case HTML_PARABREAK_ON:
479 m_sTextToken.Erase();
480 break;
481 case HTML_TABLEDATA_ON:
482 case HTML_TABLEHEADER_ON:
483 TableDataOn(eVal);
484 bTableHeader = sal_True;
485 break;
486 case HTML_TABLEDATA_OFF:
487 case HTML_TABLEHEADER_OFF:
489 aColumnName.EraseLeadingChars();
490 aColumnName.EraseTrailingChars();
491 if (!aColumnName.Len() || m_bAppendFirstLine )
492 aColumnName = String(ModuleRes(STR_COLUMN_NAME));
493 else if ( m_sCurrent.Len() )
494 aColumnName = m_sCurrent;
496 aColumnName.EraseLeadingChars();
497 aColumnName.EraseTrailingChars();
498 CreateDefaultColumn(aColumnName);
499 aColumnName.Erase();
500 m_sCurrent.Erase();
502 eVal = SVX_HOR_JUSTIFY_STANDARD;
503 bTableHeader = sal_False;
505 break;
507 case HTML_TITLE_ON:
508 case HTML_CAPTION_ON:
509 bCaption = sal_True;
510 break;
511 case HTML_TITLE_OFF:
512 case HTML_CAPTION_OFF:
513 aTableName.EraseLeadingChars();
514 aTableName.EraseTrailingChars();
515 if(!aTableName.Len())
516 aTableName = String(::dbtools::createUniqueName(m_xTables,::rtl::OUString(aTableName)));
517 else
518 aTableName = aTempName;
519 bCaption = sal_False;
520 break;
521 case HTML_FONT_ON:
522 TableFontOn(aFont,nTextColor);
523 break;
524 case HTML_BOLD_ON:
525 aFont.Weight = ::com::sun::star::awt::FontWeight::BOLD;
526 break;
527 case HTML_ITALIC_ON:
528 aFont.Slant = ::com::sun::star::awt::FontSlant_ITALIC;
529 break;
530 case HTML_UNDERLINE_ON:
531 aFont.Underline = ::com::sun::star::awt::FontUnderline::SINGLE;
532 break;
533 case HTML_STRIKE_ON:
534 aFont.Strikeout = ::com::sun::star::awt::FontStrikeout::SINGLE;
535 break;
538 while((nTmpToken2 = GetNextToken()) != HTML_TABLEROW_OFF);
540 if ( m_sCurrent.Len() )
541 aColumnName = m_sCurrent;
542 aColumnName.EraseLeadingChars();
543 aColumnName.EraseTrailingChars();
544 if(aColumnName.Len())
545 CreateDefaultColumn(aColumnName);
547 if ( m_vDestVector.empty() )
548 return sal_False;
550 if(!aTableName.Len())
551 aTableName = aTempName;
553 m_bInTbl = sal_False;
554 m_bFoundTable = sal_True;
556 if ( isCheckEnabled() )
557 return sal_True;
559 return !executeWizard(aTableName,makeAny(nTextColor),aFont) && m_xTable.is();
561 // -----------------------------------------------------------------------------
562 void OHTMLReader::setTextEncoding()
564 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::setTextEncoding" );
565 DBG_CHKTHIS(OHTMLReader,NULL);
566 m_bMetaOptions = sal_True;
567 ParseMetaOptions(NULL, NULL);
570 // -----------------------------------------------------------------------------
571 void OHTMLReader::release()
573 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::release" );
574 DBG_CHKTHIS(OHTMLReader,NULL);
575 ReleaseRef();
577 // -----------------------------------------------------------------------------
578 TypeSelectionPageFactory OHTMLReader::getTypeSelectionPageFactory()
580 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::getTypeSelectionPageFactory" );
581 DBG_CHKTHIS(OHTMLReader,NULL);
582 return &OWizHTMLExtend::Create;
584 // -----------------------------------------------------------------------------
586 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */