1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "scdetect.hxx"
22 #include <sal/macros.h>
24 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
25 #include <com/sun/star/beans/PropertyValue.hpp>
26 #include <cppuhelper/supportsservice.hxx>
27 #include <com/sun/star/container/XNameAccess.hpp>
28 #include <com/sun/star/io/XInputStream.hpp>
29 #include <unotools/mediadescriptor.hxx>
30 #include <sfx2/docfile.hxx>
31 #include <sfx2/fcontnr.hxx>
33 using namespace ::com::sun::star
;
34 using utl::MediaDescriptor
;
38 const sal_Char pFilterLotus
[] = "Lotus";
39 const sal_Char pFilterQPro6
[] = "Quattro Pro 6.0";
40 const sal_Char pFilterDBase
[] = "dBase";
41 const sal_Char pFilterDif
[] = "DIF";
42 const sal_Char pFilterSylk
[] = "SYLK";
44 // Tabelle mit Suchmustern
45 // Bedeutung der Sequenzen
46 // 0x00??: genau Byte 0x?? muss an dieser Stelle stehen
47 // 0x0100: ein Byte ueberlesen (don't care)
48 // 0x02nn: ein Byte aus 0xnn Alternativen folgt
49 // 0x8000: Erkennung abgeschlossen
52 #define M_ALT(ANZ) (0x0200+(ANZ))
55 const sal_uInt16 pLotus
[] = // Lotus 1/1A/2
56 { 0x0000, 0x0000, 0x0002, 0x0000,
57 M_ALT(2), 0x0004, 0x0006,
60 const sal_uInt16 pLotusNew
[] = // Lotus >= 9.7
61 { 0x0000, 0x0000, M_DC
, 0x0000, // Rec# + Len (0x1a)
62 M_ALT(3), 0x0003, 0x0004, 0x0005, // File Revision Code 97->ME
63 0x0010, 0x0004, 0x0000, 0x0000,
66 const sal_uInt16 pLotus2
[] = // Lotus >3
67 { 0x0000, 0x0000, 0x001A, 0x0000, // Rec# + Len (26)
68 M_ALT(2), 0x0000, 0x0002, // File Revision Code
70 0x0004, 0x0000, // File Revision Subcode
73 const sal_uInt16 pQPro
[] =
74 { 0x0000, 0x0000, 0x0002, 0x0000,
75 M_ALT(4), 0x0001, 0x0002, // WB1, WB2
76 0x0006, 0x0007, // QPro 6/7 (?)
80 const sal_uInt16 pDIF1
[] = // DIF mit CR-LF
82 'T', 'A', 'B', 'L', 'E',
89 const sal_uInt16 pDIF2
[] = // DIF mit CR oder LF
91 'T', 'A', 'B', 'L', 'E',
98 const sal_uInt16 pSylk
[] = // Sylk
101 M_ALT(3), 'P', 'N', 'E', // 'P' plus undocumented Excel extensions 'N' and 'E'
104 bool detectThisFormat(SvStream
& rStr
, const sal_uInt16
* pSearch
)
107 rStr
.Seek( 0 ); // am Anfang war alles Uebel...
108 rStr
.ReadUChar( nByte
);
110 while( !rStr
.IsEof() && bSync
)
112 sal_uInt16 nMuster
= *pSearch
;
114 if( nMuster
< 0x0100 )
115 { // direkter Byte-Vergleich
116 if( ( sal_uInt8
) nMuster
!= nByte
)
119 else if( nMuster
& M_DC
)
122 else if( nMuster
& M_ALT(0) )
123 { // alternative Bytes
124 sal_uInt8 nAnzAlt
= ( sal_uInt8
) nMuster
;
125 bSync
= false; // zunaechst unsynchron
129 if( ( sal_uInt8
) *pSearch
== nByte
)
130 bSync
= true; // jetzt erst Synchronisierung
134 else if( nMuster
& M_ENDE
)
140 rStr
.ReadUChar( nByte
);
148 ScFilterDetect::ScFilterDetect( const uno::Reference
<uno::XComponentContext
>& /*xContext*/ )
152 ScFilterDetect::~ScFilterDetect()
157 // This method is no longer used, but I do want to keep this for now to see
158 // if we could transfer this check to the now centralized ascii detection
159 // code in the filter module.
160 static sal_Bool
lcl_MayBeAscii( SvStream
& rStream
)
162 // ASCII/CSV is considered possible if there are no null bytes, or a Byte
163 // Order Mark is present, or if, for Unicode UCS2/UTF-16, all null bytes
164 // are on either even or uneven byte positions.
166 rStream
.Seek(STREAM_SEEK_TO_BEGIN
);
168 const size_t nBufSize
= 2048;
169 sal_uInt16 aBuffer
[ nBufSize
];
170 sal_uInt8
* pByte
= reinterpret_cast<sal_uInt8
*>(aBuffer
);
171 sal_uLong nBytesRead
= rStream
.Read( pByte
, nBufSize
*2);
173 if ( nBytesRead
>= 2 && (aBuffer
[0] == 0xfffe || aBuffer
[0] == 0xfeff) )
175 // Unicode BOM file may contain null bytes.
179 const sal_uInt16
* p
= aBuffer
;
180 sal_uInt16 nMask
= 0xffff;
182 while( nBytesRead
-- && nMask
)
184 sal_uInt16 nVal
= *p
++ & nMask
;
185 if (!(nVal
& 0x00ff))
187 if (!(nVal
& 0xff00))
195 static bool lcl_MayBeDBase( SvStream
& rStream
)
197 // Look for dbf marker, see connectivity/source/inc/dbase/DTable.hxx
198 // DBFType for values.
199 const sal_uInt8 nValidMarks
[] = {
200 0x03, 0x04, 0x05, 0x30, 0x43, 0xB3, 0x83, 0x8b, 0x8e, 0xf5 };
202 rStream
.Seek(STREAM_SEEK_TO_BEGIN
);
203 rStream
.ReadUChar( nMark
);
204 bool bValidMark
= false;
205 for (size_t i
=0; i
< sizeof(nValidMarks
)/sizeof(nValidMarks
[0]) && !bValidMark
; ++i
)
207 if (nValidMarks
[i
] == nMark
)
213 const size_t nHeaderBlockSize
= 32;
214 // Empty dbf is >= 32*2+1 bytes in size.
215 const size_t nEmptyDbf
= nHeaderBlockSize
* 2 + 1;
217 rStream
.Seek(STREAM_SEEK_TO_END
);
218 sal_uLong nSize
= rStream
.Tell();
219 if ( nSize
< nEmptyDbf
)
222 // length of header starts at 8
224 sal_uInt16 nHeaderLen
;
225 rStream
.ReadUInt16( nHeaderLen
);
227 if ( nHeaderLen
< nEmptyDbf
|| nSize
< nHeaderLen
)
230 // Last byte of header must be 0x0d, this is how it's specified.
231 // #i9581#,#i26407# but some applications don't follow the specification
232 // and pad the header with one byte 0x00 to reach an
233 // even boundary. Some (#i88577# ) even pad more or pad using a 0x1a ^Z
234 // control character (#i8857#). This results in:
235 // Last byte of header must be 0x0d on 32 bytes boundary.
236 sal_uInt16 nBlocks
= (nHeaderLen
- 1) / nHeaderBlockSize
;
237 sal_uInt8 nEndFlag
= 0;
238 while ( nBlocks
> 1 && nEndFlag
!= 0x0d ) {
239 rStream
.Seek( nBlocks
-- * nHeaderBlockSize
);
240 rStream
.ReadUChar( nEndFlag
);
243 return ( 0x0d == nEndFlag
);
246 OUString SAL_CALL
ScFilterDetect::detect( uno::Sequence
<beans::PropertyValue
>& lDescriptor
)
247 throw( uno::RuntimeException
, std::exception
)
249 MediaDescriptor
aMediaDesc( lDescriptor
);
250 OUString aTypeName
= aMediaDesc
.getUnpackedValueOrDefault( MediaDescriptor::PROP_TYPENAME(), OUString() );
251 uno::Reference
< io::XInputStream
> xStream ( aMediaDesc
[MediaDescriptor::PROP_INPUTSTREAM()], uno::UNO_QUERY
);
256 aMedium
.UseInteractionHandler( false );
257 aMedium
.setStreamToLoadFrom( xStream
, true );
259 SvStream
* pStream
= aMedium
.GetInStream();
260 if ( !pStream
|| pStream
->GetError() )
261 // No stream, no detection.
264 const char* pSearchFilterName
= NULL
;
265 if (aTypeName
== "calc_Lotus")
267 if (!detectThisFormat(*pStream
, pLotus
) && !detectThisFormat(*pStream
, pLotusNew
) && !detectThisFormat(*pStream
, pLotus2
))
270 pSearchFilterName
= pFilterLotus
;
272 else if (aTypeName
== "calc_QPro")
274 if (!detectThisFormat(*pStream
, pQPro
))
277 pSearchFilterName
= pFilterQPro6
;
279 else if (aTypeName
== "calc_SYLK")
281 if (!detectThisFormat(*pStream
, pSylk
))
284 pSearchFilterName
= pFilterSylk
;
286 else if (aTypeName
== "calc_DIF")
288 if (!detectThisFormat(*pStream
, pDIF1
) && !detectThisFormat(*pStream
, pDIF2
))
291 pSearchFilterName
= pFilterDif
;
293 else if (aTypeName
== "calc_dBase")
295 if (!lcl_MayBeDBase(*pStream
))
298 pSearchFilterName
= pFilterDBase
;
303 SfxFilterMatcher
aMatcher("scalc");
304 const SfxFilter
* pFilter
= aMatcher
.GetFilter4FilterName(OUString::createFromAscii(pSearchFilterName
));
309 aMediaDesc
[MediaDescriptor::PROP_FILTERNAME()] <<= pFilter
->GetName();
310 aMediaDesc
>> lDescriptor
;
314 OUString SAL_CALL
ScFilterDetect::getImplementationName() throw (uno::RuntimeException
, std::exception
)
316 return OUString("com.sun.star.comp.calc.FormatDetector");
319 sal_Bool
ScFilterDetect::supportsService( const OUString
& sServiceName
)
320 throw (uno::RuntimeException
, std::exception
)
322 return cppu::supportsService(this, sServiceName
);
325 com::sun::star::uno::Sequence
<OUString
> ScFilterDetect::getSupportedServiceNames()
326 throw (uno::RuntimeException
, std::exception
)
328 uno::Sequence
<OUString
> seqServiceNames(1);
329 seqServiceNames
.getArray()[0] = "com.sun.star.frame.ExtendedTypeDetection";
330 return seqServiceNames
;
333 extern "C" SAL_DLLPUBLIC_EXPORT ::com::sun::star::uno::XInterface
* SAL_CALL
334 com_sun_star_comp_calc_FormatDetector_get_implementation(::com::sun::star::uno::XComponentContext
* context
,
335 ::com::sun::star::uno::Sequence
<css::uno::Any
> const &)
337 return cppu::acquire(new ScFilterDetect(context
));
341 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */