1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #include "HtmlReader.hxx"
30 #include <connectivity/dbconversion.hxx>
31 #include <connectivity/dbtools.hxx>
32 #include <tools/tenccvt.hxx>
33 #include <comphelper/extract.hxx>
34 #include "dbu_misc.hrc"
35 #include "dbustrings.hrc"
36 #include <sfx2/sfxhtml.hxx>
37 #include <osl/diagnose.h>
38 #include "moduledbu.hxx"
39 #include <com/sun/star/sdbcx/XDataDescriptorFactory.hpp>
40 #include <com/sun/star/sdbcx/XColumnsSupplier.hpp>
41 #include <com/sun/star/sdbcx/XAppend.hpp>
42 #include <com/sun/star/sdbc/DataType.hpp>
43 #include <com/sun/star/sdbc/ColumnValue.hpp>
44 #include <com/sun/star/awt/FontDescriptor.hpp>
45 #include <com/sun/star/awt/FontWeight.hpp>
46 #include <com/sun/star/awt/FontStrikeout.hpp>
47 #include <com/sun/star/awt/FontSlant.hpp>
48 #include <com/sun/star/awt/FontUnderline.hpp>
49 #include <com/sun/star/util/NumberFormat.hpp>
50 #include <com/sun/star/util/XNumberFormatTypes.hpp>
51 #include <svtools/htmltokn.h>
52 #include <svtools/htmlkywd.hxx>
53 #include <tools/color.hxx>
54 #include "WCopyTable.hxx"
55 #include "WExtendPages.hxx"
56 #include "WNameMatch.hxx"
57 #include "WColumnSelect.hxx"
58 #include "QEnumTypes.hxx"
60 #include <tools/inetmime.hxx>
61 #include <svl/inettype.hxx>
62 #include <rtl/tencinfo.h>
63 #include "UITools.hxx"
64 #include <vcl/svapp.hxx>
65 #include <rtl/logfile.hxx>
67 using namespace dbaui
;
68 using namespace ::com::sun::star::uno
;
69 using namespace ::com::sun::star::beans
;
70 using namespace ::com::sun::star::container
;
71 using namespace ::com::sun::star::sdbc
;
72 using namespace ::com::sun::star::sdbcx
;
73 using namespace ::com::sun::star::awt
;
75 #define DBAUI_HTML_FONTSIZES 8 // wie Export, HTML-Options
76 #define HTML_META_NONE 0
77 #define HTML_META_AUTHOR 1
78 #define HTML_META_DESCRIPTION 2
79 #define HTML_META_KEYWORDS 3
80 #define HTML_META_REFRESH 4
81 #define HTML_META_CLASSIFICATION 5
82 #define HTML_META_CREATED 6
83 #define HTML_META_CHANGEDBY 7
84 #define HTML_META_CHANGED 8
85 #define HTML_META_GENERATOR 9
86 #define HTML_META_SDFOOTNOTE 10
87 #define HTML_META_SDENDNOTE 11
88 #define HTML_META_CONTENT_TYPE 12
90 // ==========================================================================
92 // ==========================================================================
94 // ==========================================================================
95 OHTMLReader::OHTMLReader(SvStream
& rIn
,const SharedConnection
& _rxConnection
,
96 const Reference
< ::com::sun::star::util::XNumberFormatter
>& _rxNumberF
,
97 const ::com::sun::star::uno::Reference
< ::com::sun::star::lang::XMultiServiceFactory
>& _rM
,
98 const TColumnVector
* pList
,
99 const OTypeInfoMap
* _pInfoMap
)
101 ,ODatabaseExport( _rxConnection
, _rxNumberF
, _rM
, pList
, _pInfoMap
, rIn
)
104 ,m_bMetaOptions(sal_False
)
107 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::OHTMLReader" );
108 DBG_CTOR(OHTMLReader
,NULL
);
109 SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1
) );
110 // If the file starts with a BOM, switch to UCS2.
111 SetSwitchToUCS2( sal_True
);
113 // ---------------------------------------------------------------------------
114 OHTMLReader::OHTMLReader(SvStream
& rIn
,
116 const TPositions
&_rColumnPositions
,
117 const Reference
< ::com::sun::star::util::XNumberFormatter
>& _rxNumberF
,
118 const ::com::sun::star::uno::Reference
< ::com::sun::star::lang::XMultiServiceFactory
>& _rM
,
119 const TColumnVector
* pList
,
120 const OTypeInfoMap
* _pInfoMap
,
121 sal_Bool _bAutoIncrementEnabled
)
123 ,ODatabaseExport( nRows
, _rColumnPositions
, _rxNumberF
, _rM
, pList
, _pInfoMap
, _bAutoIncrementEnabled
, rIn
)
126 ,m_bMetaOptions(sal_False
)
129 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::OHTMLReader" );
130 DBG_CTOR(OHTMLReader
,NULL
);
131 SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1
) );
132 // If the file starts with a BOM, switch to UCS2.
133 SetSwitchToUCS2( sal_True
);
135 // ---------------------------------------------------------------------------
136 OHTMLReader::~OHTMLReader()
138 DBG_DTOR(OHTMLReader
,NULL
);
140 // ---------------------------------------------------------------------------
141 SvParserState
OHTMLReader::CallParser()
143 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::CallParser" );
144 DBG_CHKTHIS(OHTMLReader
,NULL
);
145 rInput
.Seek(STREAM_SEEK_TO_BEGIN
);
147 SvParserState eParseState
= HTMLParser::CallParser();
148 SetColumnTypes(m_pColumnList
,m_pInfoMap
);
149 return m_bFoundTable
? eParseState
: SVPAR_ERROR
;
151 // -----------------------------------------------------------------------------
152 void OHTMLReader::NextToken( int nToken
)
154 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::NextToken" );
155 DBG_CHKTHIS(OHTMLReader
,NULL
);
156 if(m_bError
|| !m_nRows
) // falls Fehler oder keine Rows mehr zur "Uberpr"ufung dann gleich zur"uck
158 if ( nToken
== HTML_META
)
161 if(m_xConnection
.is()) // gibt an welcher CTOR gerufen wurde und damit, ob eine Tabelle erstellt werden soll
167 { // es kann auch TD oder TH sein, wenn es vorher kein TABLE gab
168 const HTMLOptions
& rHtmlOptions
= GetOptions();
169 for (size_t i
= 0, n
= rHtmlOptions
.size(); i
< n
; ++i
)
171 const HTMLOption
& rOption
= rHtmlOptions
[i
];
172 switch( rOption
.GetToken() )
175 { // Prozent: von Dokumentbreite bzw. aeusserer Zelle
176 m_nColumnWidth
= GetWidthPixel( rOption
);
185 sal_uInt32 nTell
= rInput
.Tell(); // ver�ndert vielleicht die Position des Streams
186 if ( !m_xTable
.is() )
187 {// erste Zeile als Header verwenden
188 m_bError
= !CreateTable(nToken
);
189 if ( m_bAppendFirstLine
)
200 case HTML_TABLEROW_ON
:
201 if ( m_pUpdateHelper
.get() )
205 m_pUpdateHelper
->moveToInsertRow(); // sonst neue Zeile anh"angen
207 catch(SQLException
& e
)
208 // UpdateFehlerbehandlung
217 case HTML_SINGLECHAR
:
218 if ( m_bInTbl
) //&& !m_bSDNum ) // wichtig, da wir sonst auch die Namen der Fonts bekommen
219 m_sTextToken
+= aToken
;
221 case HTML_PARABREAK_OFF
:
222 m_sCurrent
+= m_sTextToken
;
224 case HTML_PARABREAK_ON
:
225 m_sTextToken
.Erase();
227 case HTML_TABLEDATA_ON
:
230 case HTML_TABLEDATA_OFF
:
232 if ( m_sCurrent
.Len() )
233 m_sTextToken
= m_sCurrent
;
236 insertValueIntoColumn();
238 catch(SQLException
& e
)
239 // UpdateFehlerbehandlung
246 m_bSDNum
= m_bInTbl
= sal_False
;
249 case HTML_TABLEROW_OFF
:
250 if ( !m_pUpdateHelper
.get() )
258 if (m_bIsAutoIncrement
) // if bSetAutoIncrement then I have to set the autoincrement
259 m_pUpdateHelper
->updateInt(1,m_nRowCount
);
260 m_pUpdateHelper
->insertRow();
262 catch(SQLException
& e
)
263 //////////////////////////////////////////////////////////////////////
264 // UpdateFehlerbehandlung
272 else // Zweig nur f"ur Typpr"ufung g"ultig
278 // Der Spalten Kopf z"ahlt nicht mit
283 while(GetNextToken() != HTML_TABLEROW_OFF
);
287 case HTML_TABLEDATA_ON
:
288 case HTML_TABLEHEADER_ON
:
292 case HTML_SINGLECHAR
:
293 if ( m_bInTbl
) // && !m_bSDNum ) // wichtig, da wir sonst auch die Namen der Fonts bekommen
294 m_sTextToken
+= aToken
;
296 case HTML_PARABREAK_OFF
:
297 m_sCurrent
+= m_sTextToken
;
299 case HTML_PARABREAK_ON
:
300 m_sTextToken
.Erase();
302 case HTML_TABLEDATA_OFF
:
303 if ( m_sCurrent
.Len() )
304 m_sTextToken
= m_sCurrent
;
307 m_bSDNum
= m_bInTbl
= sal_False
;
310 case HTML_TABLEROW_OFF
:
311 if ( m_sCurrent
.Len() )
312 m_sTextToken
= m_sCurrent
;
321 // -----------------------------------------------------------------------------
322 void OHTMLReader::fetchOptions()
324 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::fetchOptions" );
326 const HTMLOptions
& options
= GetOptions();
327 for (size_t i
= 0, n
= options
.size(); i
< n
; ++i
)
329 const HTMLOption
& rOption
= options
[i
];
330 switch( rOption
.GetToken() )
334 m_sValToken
= rOption
.GetString();
339 m_sNumToken
= rOption
.GetString();
344 //---------------------------------------------------------------------------------
345 void OHTMLReader::TableDataOn(SvxCellHorJustify
& eVal
)
347 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::TableDataOn" );
348 DBG_CHKTHIS(OHTMLReader
,NULL
);
349 const HTMLOptions
& rHtmlOptions
= GetOptions();
350 for (size_t i
= 0, n
= rHtmlOptions
.size(); i
< n
; ++i
)
352 const HTMLOption
& rOption
= rHtmlOptions
[i
];
353 switch( rOption
.GetToken() )
357 const String
& rOptVal
= rOption
.GetString();
358 if (rOptVal
.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_AL_right
))
359 eVal
= SVX_HOR_JUSTIFY_RIGHT
;
360 else if (rOptVal
.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_AL_center
))
361 eVal
= SVX_HOR_JUSTIFY_CENTER
;
362 else if (rOptVal
.EqualsIgnoreCaseAscii( OOO_STRING_SVTOOLS_HTML_AL_left
))
363 eVal
= SVX_HOR_JUSTIFY_LEFT
;
365 eVal
= SVX_HOR_JUSTIFY_STANDARD
;
369 m_nWidth
= GetWidthPixel( rOption
);
375 //---------------------------------------------------------------------------------
376 void OHTMLReader::TableFontOn(FontDescriptor
& _rFont
,sal_Int32
&_rTextColor
)
378 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::TableFontOn" );
379 DBG_CHKTHIS(OHTMLReader
,NULL
);
380 const HTMLOptions
& rHtmlOptions
= GetOptions();
381 for (size_t i
= 0, n
= rHtmlOptions
.size(); i
< n
; ++i
)
383 const HTMLOption
& rOption
= rHtmlOptions
[i
];
384 switch( rOption
.GetToken() )
389 rOption
.GetColor( aColor
);
390 _rTextColor
= aColor
.GetRGBColor();
395 const String
& rFace
= rOption
.GetString();
398 while( nPos
!= STRING_NOTFOUND
)
399 { // Fontliste, VCL: Semikolon als Separator, HTML: Komma
400 String aFName
= rFace
.GetToken( 0, ',', nPos
);
401 aFName
.EraseTrailingChars().EraseLeadingChars();
402 if( aFontName
.Len() )
406 if ( aFontName
.Len() )
407 _rFont
.Name
= ::rtl::OUString(aFontName
);
412 sal_Int16 nSize
= (sal_Int16
) rOption
.GetNumber();
415 else if ( nSize
< DBAUI_HTML_FONTSIZES
)
416 nSize
= DBAUI_HTML_FONTSIZES
;
418 _rFont
.Height
= nSize
;
424 // ---------------------------------------------------------------------------
425 sal_Int16
OHTMLReader::GetWidthPixel( const HTMLOption
& rOption
)
427 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::GetWidthPixel" );
428 DBG_CHKTHIS(OHTMLReader
,NULL
);
429 const String
& rOptVal
= rOption
.GetString();
430 if ( rOptVal
.Search('%') != STRING_NOTFOUND
)
432 OSL_ENSURE( m_nColumnWidth
, "WIDTH Option: m_nColumnWidth==0 und Width%" );
433 return (sal_Int16
)((rOption
.GetNumber() * m_nColumnWidth
) / 100);
437 if ( rOptVal
.Search('*') != STRING_NOTFOUND
)
438 { // relativ zu was?!?
439 //TODO: ColArray aller relativen Werte sammeln und dann MakeCol
443 return (sal_Int16
)rOption
.GetNumber(); // Pixel
446 // ---------------------------------------------------------------------------
447 sal_Bool
OHTMLReader::CreateTable(int nToken
)
449 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::CreateTable" );
450 DBG_CHKTHIS(OHTMLReader
,NULL
);
451 String
aTempName(ModuleRes(STR_TBL_TITLE
));
452 aTempName
= aTempName
.GetToken(0,' ');
453 aTempName
= String(::dbtools::createUniqueName(m_xTables
,::rtl::OUString(aTempName
)));
455 int nTmpToken2
= nToken
;
456 sal_Bool bCaption
= sal_False
;
457 sal_Bool bTableHeader
= sal_False
;
459 SvxCellHorJustify eVal
;
462 FontDescriptor aFont
= ::dbaui::CreateFontDescriptor(Application::GetSettings().GetStyleSettings().GetAppFont());
463 sal_Int32 nTextColor
= 0;
469 case HTML_SINGLECHAR
:
471 aColumnName
+= aToken
;
473 aTableName
+= aToken
;
475 case HTML_PARABREAK_OFF
:
476 m_sCurrent
+= aColumnName
;
478 case HTML_PARABREAK_ON
:
479 m_sTextToken
.Erase();
481 case HTML_TABLEDATA_ON
:
482 case HTML_TABLEHEADER_ON
:
484 bTableHeader
= sal_True
;
486 case HTML_TABLEDATA_OFF
:
487 case HTML_TABLEHEADER_OFF
:
489 aColumnName
.EraseLeadingChars();
490 aColumnName
.EraseTrailingChars();
491 if (!aColumnName
.Len() || m_bAppendFirstLine
)
492 aColumnName
= String(ModuleRes(STR_COLUMN_NAME
));
493 else if ( m_sCurrent
.Len() )
494 aColumnName
= m_sCurrent
;
496 aColumnName
.EraseLeadingChars();
497 aColumnName
.EraseTrailingChars();
498 CreateDefaultColumn(aColumnName
);
502 eVal
= SVX_HOR_JUSTIFY_STANDARD
;
503 bTableHeader
= sal_False
;
508 case HTML_CAPTION_ON
:
512 case HTML_CAPTION_OFF
:
513 aTableName
.EraseLeadingChars();
514 aTableName
.EraseTrailingChars();
515 if(!aTableName
.Len())
516 aTableName
= String(::dbtools::createUniqueName(m_xTables
,::rtl::OUString(aTableName
)));
518 aTableName
= aTempName
;
519 bCaption
= sal_False
;
522 TableFontOn(aFont
,nTextColor
);
525 aFont
.Weight
= ::com::sun::star::awt::FontWeight::BOLD
;
528 aFont
.Slant
= ::com::sun::star::awt::FontSlant_ITALIC
;
530 case HTML_UNDERLINE_ON
:
531 aFont
.Underline
= ::com::sun::star::awt::FontUnderline::SINGLE
;
534 aFont
.Strikeout
= ::com::sun::star::awt::FontStrikeout::SINGLE
;
538 while((nTmpToken2
= GetNextToken()) != HTML_TABLEROW_OFF
);
540 if ( m_sCurrent
.Len() )
541 aColumnName
= m_sCurrent
;
542 aColumnName
.EraseLeadingChars();
543 aColumnName
.EraseTrailingChars();
544 if(aColumnName
.Len())
545 CreateDefaultColumn(aColumnName
);
547 if ( m_vDestVector
.empty() )
550 if(!aTableName
.Len())
551 aTableName
= aTempName
;
553 m_bInTbl
= sal_False
;
554 m_bFoundTable
= sal_True
;
556 if ( isCheckEnabled() )
559 return !executeWizard(aTableName
,makeAny(nTextColor
),aFont
) && m_xTable
.is();
561 // -----------------------------------------------------------------------------
562 void OHTMLReader::setTextEncoding()
564 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::setTextEncoding" );
565 DBG_CHKTHIS(OHTMLReader
,NULL
);
566 m_bMetaOptions
= sal_True
;
567 ParseMetaOptions(NULL
, NULL
);
570 // -----------------------------------------------------------------------------
571 void OHTMLReader::release()
573 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::release" );
574 DBG_CHKTHIS(OHTMLReader
,NULL
);
577 // -----------------------------------------------------------------------------
578 TypeSelectionPageFactory
OHTMLReader::getTypeSelectionPageFactory()
580 RTL_LOGFILE_CONTEXT_AUTHOR( aLogger
, "misc", "Ocke.Janssen@sun.com", "OHTMLReader::getTypeSelectionPageFactory" );
581 DBG_CHKTHIS(OHTMLReader
,NULL
);
582 return &OWizHTMLExtend::Create
;
584 // -----------------------------------------------------------------------------
586 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */