1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: HtmlReader.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_dbaccess.hxx"
33 #include "HtmlReader.hxx"
34 #include <connectivity/dbconversion.hxx>
35 #include <connectivity/dbtools.hxx>
36 #include <tools/tenccvt.hxx>
37 #include <comphelper/extract.hxx>
38 #include "dbu_misc.hrc"
39 #include "dbustrings.hrc"
40 #include <sfx2/sfxhtml.hxx>
41 #include <tools/debug.hxx>
42 #include <tools/tenccvt.hxx>
43 #include "moduledbu.hxx"
44 #include <com/sun/star/sdbcx/XDataDescriptorFactory.hpp>
45 #include <com/sun/star/sdbcx/XColumnsSupplier.hpp>
46 #include <com/sun/star/sdbcx/XAppend.hpp>
47 #include <com/sun/star/sdbc/DataType.hpp>
48 #include <com/sun/star/sdbc/ColumnValue.hpp>
49 #include <com/sun/star/awt/FontDescriptor.hpp>
50 #include <com/sun/star/awt/FontWeight.hpp>
51 #include <com/sun/star/awt/FontStrikeout.hpp>
52 #include <com/sun/star/awt/FontSlant.hpp>
53 #include <com/sun/star/awt/FontUnderline.hpp>
54 #include <com/sun/star/util/NumberFormat.hpp>
55 #include <com/sun/star/util/XNumberFormatTypes.hpp>
56 #include <svtools/htmltokn.h>
57 #include <svtools/htmlkywd.hxx>
58 #include <tools/color.hxx>
59 #include "WCopyTable.hxx"
60 #include "WExtendPages.hxx"
61 #include "WNameMatch.hxx"
62 #include "WColumnSelect.hxx"
63 #include "QEnumTypes.hxx"
65 #include <tools/inetmime.hxx>
66 #include <svtools/inettype.hxx>
67 #include <rtl/tencinfo.h>
68 #include "UITools.hxx"
69 #include <vcl/svapp.hxx>
70 #include <rtl/logfile.hxx>
72 using namespace dbaui
;
73 using namespace ::com::sun::star::uno
;
74 using namespace ::com::sun::star::beans
;
75 using namespace ::com::sun::star::container
;
76 using namespace ::com::sun::star::sdbc
;
77 using namespace ::com::sun::star::sdbcx
;
78 using namespace ::com::sun::star::awt
;
80 #define DBAUI_HTML_FONTSIZES 8 // wie Export, HTML-Options
81 #define HTML_META_NONE 0
82 #define HTML_META_AUTHOR 1
83 #define HTML_META_DESCRIPTION 2
84 #define HTML_META_KEYWORDS 3
85 #define HTML_META_REFRESH 4
86 #define HTML_META_CLASSIFICATION 5
87 #define HTML_META_CREATED 6
88 #define HTML_META_CHANGEDBY 7
89 #define HTML_META_CHANGED 8
90 #define HTML_META_GENERATOR 9
91 #define HTML_META_SDFOOTNOTE 10
92 #define HTML_META_SDENDNOTE 11
93 #define HTML_META_CONTENT_TYPE 12
95 // ==========================================================================
97 // ==========================================================================
99 // ==========================================================================
100 OHTMLReader::OHTMLReader(SvStream
& rIn
,const SharedConnection
& _rxConnection
,
101 const Reference
< ::com::sun::star::util::XNumberFormatter
>& _rxNumberF
,
102 const ::com::sun::star::uno::Reference
< ::com::sun::star::lang::XMultiServiceFactory
>& _rM
,
103 const TColumnVector
* pList
,
104 const OTypeInfoMap
* _pInfoMap
)
106 ,ODatabaseExport( _rxConnection
, _rxNumberF
, _rM
, pList
, _pInfoMap
, rIn
)
109 ,m_bMetaOptions(sal_False
)
112 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::OHTMLReader" );
113 DBG_CTOR(OHTMLReader
,NULL
);
114 SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1
) );
115 // If the file starts with a BOM, switch to UCS2.
116 SetSwitchToUCS2( TRUE
);
118 // ---------------------------------------------------------------------------
119 OHTMLReader::OHTMLReader(SvStream
& rIn
,
121 const TPositions
&_rColumnPositions
,
122 const Reference
< ::com::sun::star::util::XNumberFormatter
>& _rxNumberF
,
123 const ::com::sun::star::uno::Reference
< ::com::sun::star::lang::XMultiServiceFactory
>& _rM
,
124 const TColumnVector
* pList
,
125 const OTypeInfoMap
* _pInfoMap
,
126 sal_Bool _bAutoIncrementEnabled
)
128 ,ODatabaseExport( nRows
, _rColumnPositions
, _rxNumberF
, _rM
, pList
, _pInfoMap
, _bAutoIncrementEnabled
, rIn
)
131 ,m_bMetaOptions(sal_False
)
134 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::OHTMLReader" );
135 DBG_CTOR(OHTMLReader
,NULL
);
136 SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1
) );
137 // If the file starts with a BOM, switch to UCS2.
138 SetSwitchToUCS2( TRUE
);
140 // ---------------------------------------------------------------------------
141 OHTMLReader::~OHTMLReader()
143 DBG_DTOR(OHTMLReader
,NULL
);
145 // ---------------------------------------------------------------------------
146 SvParserState
OHTMLReader::CallParser()
148 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::CallParser" );
149 DBG_CHKTHIS(OHTMLReader
,NULL
);
150 rInput
.Seek(STREAM_SEEK_TO_BEGIN
);
152 SvParserState eParseState
= HTMLParser::CallParser();
153 SetColumnTypes(m_pColumnList
,m_pInfoMap
);
154 return m_bFoundTable
? eParseState
: SVPAR_ERROR
;
156 // -----------------------------------------------------------------------------
157 void OHTMLReader::NextToken( int nToken
)
159 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::NextToken" );
160 DBG_CHKTHIS(OHTMLReader
,NULL
);
161 if(m_bError
|| !m_nRows
) // falls Fehler oder keine Rows mehr zur "Uberpr"ufung dann gleich zur"uck
163 if ( nToken
== HTML_META
)
166 if(m_xConnection
.is()) // gibt an welcher CTOR gerufen wurde und damit, ob eine Tabelle erstellt werden soll
172 { // es kann auch TD oder TH sein, wenn es vorher kein TABLE gab
173 const HTMLOptions
* pHtmlOptions
= GetOptions();
174 sal_Int16 nArrLen
= pHtmlOptions
->Count();
175 for ( sal_Int16 i
= 0; i
< nArrLen
; i
++ )
177 const HTMLOption
* pOption
= (*pHtmlOptions
)[i
];
178 switch( pOption
->GetToken() )
181 { // Prozent: von Dokumentbreite bzw. aeusserer Zelle
182 m_nColumnWidth
= GetWidthPixel( pOption
);
191 sal_uInt32 nTell
= rInput
.Tell(); // verändert vielleicht die Position des Streams
192 if ( !m_xTable
.is() )
193 {// erste Zeile als Header verwenden
194 m_bError
= !CreateTable(nToken
);
195 if ( m_bAppendFirstLine
)
206 case HTML_TABLEROW_ON
:
207 if ( m_pUpdateHelper
.get() )
211 m_pUpdateHelper
->moveToInsertRow(); // sonst neue Zeile anh"angen
213 catch(SQLException
& e
)
214 // UpdateFehlerbehandlung
223 case HTML_SINGLECHAR
:
224 if ( m_bInTbl
) //&& !m_bSDNum ) // wichtig, da wir sonst auch die Namen der Fonts bekommen
225 m_sTextToken
+= aToken
;
227 case HTML_PARABREAK_OFF
:
228 m_sCurrent
+= m_sTextToken
;
230 case HTML_PARABREAK_ON
:
231 m_sTextToken
.Erase();
233 case HTML_TABLEDATA_ON
:
236 case HTML_TABLEDATA_OFF
:
238 if ( m_sCurrent
.Len() )
239 m_sTextToken
= m_sCurrent
;
242 insertValueIntoColumn();
244 catch(SQLException
& e
)
245 // UpdateFehlerbehandlung
252 m_bSDNum
= m_bInTbl
= sal_False
;
255 case HTML_TABLEROW_OFF
:
256 if ( !m_pUpdateHelper
.get() )
264 if (m_bIsAutoIncrement
) // if bSetAutoIncrement then I have to set the autoincrement
265 m_pUpdateHelper
->updateInt(1,m_nRowCount
);
266 m_pUpdateHelper
->insertRow();
268 catch(SQLException
& e
)
269 //////////////////////////////////////////////////////////////////////
270 // UpdateFehlerbehandlung
278 else // Zweig nur f"ur Typpr"ufung g"ultig
284 // Der Spalten Kopf z"ahlt nicht mit
289 while(GetNextToken() != HTML_TABLEROW_OFF
);
293 case HTML_TABLEDATA_ON
:
294 case HTML_TABLEHEADER_ON
:
298 case HTML_SINGLECHAR
:
299 if ( m_bInTbl
) // && !m_bSDNum ) // wichtig, da wir sonst auch die Namen der Fonts bekommen
300 m_sTextToken
+= aToken
;
302 case HTML_PARABREAK_OFF
:
303 m_sCurrent
+= m_sTextToken
;
305 case HTML_PARABREAK_ON
:
306 m_sTextToken
.Erase();
308 case HTML_TABLEDATA_OFF
:
309 if ( m_sCurrent
.Len() )
310 m_sTextToken
= m_sCurrent
;
313 m_bSDNum
= m_bInTbl
= sal_False
;
316 case HTML_TABLEROW_OFF
:
317 if ( m_sCurrent
.Len() )
318 m_sTextToken
= m_sCurrent
;
327 // -----------------------------------------------------------------------------
328 void OHTMLReader::fetchOptions()
330 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::fetchOptions" );
332 const HTMLOptions
* options
= GetOptions();
333 sal_Int16 nArrLen
= options
->Count();
334 for ( sal_Int16 i
= 0; i
< nArrLen
; i
++ )
336 const HTMLOption
* pOption
= (*options
)[i
];
337 switch( pOption
->GetToken() )
341 m_sValToken
= pOption
->GetString();
342 //m_sTextToken = pOption->GetString();
347 m_sNumToken
= pOption
->GetString();
352 //---------------------------------------------------------------------------------
353 void OHTMLReader::TableDataOn(SvxCellHorJustify
& eVal
,int nToken
)
355 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::TableDataOn" );
356 DBG_CHKTHIS(OHTMLReader
,NULL
);
357 sal_Bool bHorJustifyCenterTH
= (nToken
== HTML_TABLEHEADER_ON
);
358 const HTMLOptions
* pHtmlOptions
= GetOptions();
359 sal_Int16 nArrLen
= pHtmlOptions
->Count();
360 for ( sal_Int16 i
= 0; i
< nArrLen
; i
++ )
362 const HTMLOption
* pOption
= (*pHtmlOptions
)[i
];
363 switch( pOption
->GetToken() )
367 bHorJustifyCenterTH
= sal_False
;
368 const String
& rOptVal
= pOption
->GetString();
369 if (rOptVal
.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_AL_right
))
370 eVal
= SVX_HOR_JUSTIFY_RIGHT
;
371 else if (rOptVal
.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_AL_center
))
372 eVal
= SVX_HOR_JUSTIFY_CENTER
;
373 else if (rOptVal
.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_AL_left
))
374 eVal
= SVX_HOR_JUSTIFY_LEFT
;
376 eVal
= SVX_HOR_JUSTIFY_STANDARD
;
380 m_nWidth
= GetWidthPixel( pOption
);
386 //---------------------------------------------------------------------------------
387 void OHTMLReader::TableFontOn(FontDescriptor
& _rFont
,sal_Int32
&_rTextColor
)
389 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::TableFontOn" );
390 DBG_CHKTHIS(OHTMLReader
,NULL
);
391 const HTMLOptions
* pHtmlOptions
= GetOptions();
392 sal_Int16 nArrLen
= pHtmlOptions
->Count();
393 for ( sal_Int16 i
= 0; i
< nArrLen
; i
++ )
395 const HTMLOption
* pOption
= (*pHtmlOptions
)[i
];
396 switch( pOption
->GetToken() )
401 pOption
->GetColor( aColor
);
402 _rTextColor
= aColor
.GetRGBColor();
407 const String
& rFace
= pOption
->GetString();
410 while( nPos
!= STRING_NOTFOUND
)
411 { // Fontliste, VCL: Semikolon als Separator, HTML: Komma
412 String aFName
= rFace
.GetToken( 0, ',', nPos
);
413 aFName
.EraseTrailingChars().EraseLeadingChars();
414 if( aFontName
.Len() )
418 if ( aFontName
.Len() )
419 _rFont
.Name
= ::rtl::OUString(aFontName
);
424 sal_Int16 nSize
= (sal_Int16
) pOption
->GetNumber();
427 else if ( nSize
< DBAUI_HTML_FONTSIZES
)
428 nSize
= DBAUI_HTML_FONTSIZES
;
430 _rFont
.Height
= nSize
;
436 // ---------------------------------------------------------------------------
437 sal_Int16
OHTMLReader::GetWidthPixel( const HTMLOption
* pOption
)
439 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::GetWidthPixel" );
440 DBG_CHKTHIS(OHTMLReader
,NULL
);
441 const String
& rOptVal
= pOption
->GetString();
442 if ( rOptVal
.Search('%') != STRING_NOTFOUND
)
444 DBG_ASSERT( m_nColumnWidth
, "WIDTH Option: m_nColumnWidth==0 und Width%" );
445 return (sal_Int16
)((pOption
->GetNumber() * m_nColumnWidth
) / 100);
449 if ( rOptVal
.Search('*') != STRING_NOTFOUND
)
450 { // relativ zu was?!?
451 //2do: ColArray aller relativen Werte sammeln und dann MakeCol
455 return (sal_Int16
)pOption
->GetNumber(); // Pixel
458 // ---------------------------------------------------------------------------
459 sal_Bool
OHTMLReader::CreateTable(int nToken
)
461 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::CreateTable" );
462 DBG_CHKTHIS(OHTMLReader
,NULL
);
463 String
aTempName(ModuleRes(STR_TBL_TITLE
));
464 aTempName
= aTempName
.GetToken(0,' ');
465 aTempName
= String(::dbtools::createUniqueName(m_xTables
,::rtl::OUString(aTempName
)));
467 int nTmpToken2
= nToken
;
468 sal_Bool bCaption
= sal_False
;
469 sal_Bool bTableHeader
= sal_False
;
471 SvxCellHorJustify eVal
;
474 FontDescriptor aFont
= ::dbaui::CreateFontDescriptor(Application::GetSettings().GetStyleSettings().GetAppFont());
475 sal_Int32 nTextColor
= 0;
481 case HTML_SINGLECHAR
:
483 aColumnName
+= aToken
;
485 aTableName
+= aToken
;
487 case HTML_PARABREAK_OFF
:
488 m_sCurrent
+= aColumnName
;
490 case HTML_PARABREAK_ON
:
491 m_sTextToken
.Erase();
493 case HTML_TABLEDATA_ON
:
494 // m_bAppendFirstLine = true;
496 case HTML_TABLEHEADER_ON
:
497 TableDataOn(eVal
,nTmpToken2
);
500 case HTML_TABLEDATA_OFF
:
501 // m_bAppendFirstLine = true;
503 case HTML_TABLEHEADER_OFF
:
505 aColumnName
.EraseLeadingChars();
506 aColumnName
.EraseTrailingChars();
507 if (!aColumnName
.Len() || m_bAppendFirstLine
)
508 aColumnName
= String(ModuleRes(STR_COLUMN_NAME
));
509 else if ( m_sCurrent
.Len() )
510 aColumnName
= m_sCurrent
;
512 CreateDefaultColumn(aColumnName
);
516 eVal
= SVX_HOR_JUSTIFY_STANDARD
;
517 bTableHeader
= sal_False
;
522 case HTML_CAPTION_ON
:
526 case HTML_CAPTION_OFF
:
527 aTableName
.EraseLeadingChars();
528 aTableName
.EraseTrailingChars();
529 if(!aTableName
.Len())
530 aTableName
= String(::dbtools::createUniqueName(m_xTables
,::rtl::OUString(aTableName
)));
532 aTableName
= aTempName
;
533 bCaption
= sal_False
;
536 TableFontOn(aFont
,nTextColor
);
539 aFont
.Weight
= ::com::sun::star::awt::FontWeight::BOLD
;
542 aFont
.Slant
= ::com::sun::star::awt::FontSlant_ITALIC
;
544 case HTML_UNDERLINE_ON
:
545 aFont
.Underline
= ::com::sun::star::awt::FontUnderline::SINGLE
;
548 aFont
.Strikeout
= ::com::sun::star::awt::FontStrikeout::SINGLE
;
552 while((nTmpToken2
= GetNextToken()) != HTML_TABLEROW_OFF
);
554 if ( m_sCurrent
.Len() )
555 aColumnName
= m_sCurrent
;
556 if(aColumnName
.Len())
557 CreateDefaultColumn(aColumnName
);
559 if ( m_vDestVector
.empty() )
562 if(!aTableName
.Len())
563 aTableName
= aTempName
;
565 m_bInTbl
= sal_False
;
566 m_bFoundTable
= sal_True
;
568 if ( isCheckEnabled() )
571 return !executeWizard(aTableName
,makeAny(nTextColor
),aFont
) && m_xTable
.is();
573 // -----------------------------------------------------------------------------
574 void OHTMLReader::setTextEncoding()
576 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::setTextEncoding" );
577 DBG_CHKTHIS(OHTMLReader
,NULL
);
578 m_bMetaOptions
= sal_True
;
579 ParseMetaOptions(NULL
, NULL
);
582 // -----------------------------------------------------------------------------
583 void OHTMLReader::release()
585 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::release" );
586 DBG_CHKTHIS(OHTMLReader
,NULL
);
589 // -----------------------------------------------------------------------------
590 TypeSelectionPageFactory
OHTMLReader::getTypeSelectionPageFactory()
592 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::getTypeSelectionPageFactory" );
593 DBG_CHKTHIS(OHTMLReader
,NULL
);
594 return &OWizHTMLExtend::Create
;
596 // -----------------------------------------------------------------------------