1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "scdetect.hxx"
22 #include <sal/macros.h>
24 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
25 #include <com/sun/star/beans/PropertyValue.hpp>
26 #include <cppuhelper/supportsservice.hxx>
27 #include <com/sun/star/container/XNameAccess.hpp>
28 #include <com/sun/star/io/XInputStream.hpp>
29 #include <unotools/mediadescriptor.hxx>
30 #include <sfx2/docfile.hxx>
31 #include <sfx2/fcontnr.hxx>
33 using namespace ::com::sun::star
;
34 using utl::MediaDescriptor
;
38 const sal_Char pFilterLotus
[] = "Lotus";
39 const sal_Char pFilterQPro6
[] = "Quattro Pro 6.0";
40 const sal_Char pFilterDBase
[] = "dBase";
41 const sal_Char pFilterDif
[] = "DIF";
42 const sal_Char pFilterSylk
[] = "SYLK";
44 // Tabelle mit Suchmustern
45 // Bedeutung der Sequenzen
46 // 0x00??: genau Byte 0x?? muss an dieser Stelle stehen
47 // 0x0100: ein Byte ueberlesen (don't care)
48 // 0x02nn: ein Byte aus 0xnn Alternativen folgt
49 // 0x8000: Erkennung abgeschlossen
53 #define M_ALT(ANZ) (0x0200+(ANZ))
56 const sal_uInt16 pLotus
[] = // Lotus 1/1A/2
57 { 0x0000, 0x0000, 0x0002, 0x0000,
58 M_ALT(2), 0x0004, 0x0006,
61 const sal_uInt16 pLotusNew
[] = // Lotus >= 9.7
62 { 0x0000, 0x0000, M_DC
, 0x0000, // Rec# + Len (0x1a)
63 M_ALT(3), 0x0003, 0x0004, 0x0005, // File Revision Code 97->ME
64 0x0010, 0x0004, 0x0000, 0x0000,
67 const sal_uInt16 pLotus2
[] = // Lotus >3
68 { 0x0000, 0x0000, 0x001A, 0x0000, // Rec# + Len (26)
69 M_ALT(2), 0x0000, 0x0002, // File Revision Code
71 0x0004, 0x0000, // File Revision Subcode
74 const sal_uInt16 pQPro
[] =
75 { 0x0000, 0x0000, 0x0002, 0x0000,
76 M_ALT(4), 0x0001, 0x0002, // WB1, WB2
77 0x0006, 0x0007, // QPro 6/7 (?)
81 const sal_uInt16 pDIF1
[] = // DIF mit CR-LF
83 'T', 'A', 'B', 'L', 'E',
90 const sal_uInt16 pDIF2
[] = // DIF mit CR oder LF
92 'T', 'A', 'B', 'L', 'E',
99 const sal_uInt16 pSylk
[] = // Sylk
102 M_ALT(3), 'P', 'N', 'E', // 'P' plus undocumented Excel extensions 'N' and 'E'
105 bool detectThisFormat(SvStream
& rStr
, const sal_uInt16
* pSearch
)
108 rStr
.Seek( 0 ); // am Anfang war alles Uebel...
109 rStr
.ReadUChar( nByte
);
111 while( !rStr
.IsEof() && bSync
)
113 sal_uInt16 nMuster
= *pSearch
;
115 if( nMuster
< 0x0100 )
116 { // direkter Byte-Vergleich
117 if( ( sal_uInt8
) nMuster
!= nByte
)
120 else if( nMuster
& M_DC
)
123 else if( nMuster
& M_ALT(0) )
124 { // alternative Bytes
125 sal_uInt8 nAnzAlt
= ( sal_uInt8
) nMuster
;
126 bSync
= false; // zunaechst unsynchron
130 if( ( sal_uInt8
) *pSearch
== nByte
)
131 bSync
= true; // jetzt erst Synchronisierung
135 else if( nMuster
& M_ENDE
)
141 rStr
.ReadUChar( nByte
);
149 ScFilterDetect::ScFilterDetect( const uno::Reference
<uno::XComponentContext
>& /*xContext*/ )
153 ScFilterDetect::~ScFilterDetect()
158 // This method is no longer used, but I do want to keep this for now to see
159 // if we could transfer this check to the now centralized ascii detection
160 // code in the filter module.
161 static sal_Bool
lcl_MayBeAscii( SvStream
& rStream
)
163 // ASCII/CSV is considered possible if there are no null bytes, or a Byte
164 // Order Mark is present, or if, for Unicode UCS2/UTF-16, all null bytes
165 // are on either even or uneven byte positions.
167 rStream
.Seek(STREAM_SEEK_TO_BEGIN
);
169 const size_t nBufSize
= 2048;
170 sal_uInt16 aBuffer
[ nBufSize
];
171 sal_uInt8
* pByte
= reinterpret_cast<sal_uInt8
*>(aBuffer
);
172 sal_uLong nBytesRead
= rStream
.Read( pByte
, nBufSize
*2);
174 if ( nBytesRead
>= 2 && (aBuffer
[0] == 0xfffe || aBuffer
[0] == 0xfeff) )
176 // Unicode BOM file may contain null bytes.
180 const sal_uInt16
* p
= aBuffer
;
181 sal_uInt16 nMask
= 0xffff;
183 while( nBytesRead
-- && nMask
)
185 sal_uInt16 nVal
= *p
++ & nMask
;
186 if (!(nVal
& 0x00ff))
188 if (!(nVal
& 0xff00))
196 static bool lcl_MayBeDBase( SvStream
& rStream
)
198 // Look for dbf marker, see connectivity/source/inc/dbase/DTable.hxx
199 // DBFType for values.
200 const sal_uInt8 nValidMarks
[] = {
201 0x03, 0x04, 0x05, 0x30, 0x43, 0xB3, 0x83, 0x8b, 0x8e, 0xf5 };
203 rStream
.Seek(STREAM_SEEK_TO_BEGIN
);
204 rStream
.ReadUChar( nMark
);
205 bool bValidMark
= false;
206 for (size_t i
=0; i
< sizeof(nValidMarks
)/sizeof(nValidMarks
[0]) && !bValidMark
; ++i
)
208 if (nValidMarks
[i
] == nMark
)
214 const size_t nHeaderBlockSize
= 32;
215 // Empty dbf is >= 32*2+1 bytes in size.
216 const size_t nEmptyDbf
= nHeaderBlockSize
* 2 + 1;
218 rStream
.Seek(STREAM_SEEK_TO_END
);
219 sal_uLong nSize
= rStream
.Tell();
220 if ( nSize
< nEmptyDbf
)
223 // length of header starts at 8
225 sal_uInt16 nHeaderLen
;
226 rStream
.ReadUInt16( nHeaderLen
);
228 if ( nHeaderLen
< nEmptyDbf
|| nSize
< nHeaderLen
)
231 // Last byte of header must be 0x0d, this is how it's specified.
232 // #i9581#,#i26407# but some applications don't follow the specification
233 // and pad the header with one byte 0x00 to reach an
234 // even boundary. Some (#i88577# ) even pad more or pad using a 0x1a ^Z
235 // control character (#i8857#). This results in:
236 // Last byte of header must be 0x0d on 32 bytes boundary.
237 sal_uInt16 nBlocks
= (nHeaderLen
- 1) / nHeaderBlockSize
;
238 sal_uInt8 nEndFlag
= 0;
239 while ( nBlocks
> 1 && nEndFlag
!= 0x0d ) {
240 rStream
.Seek( nBlocks
-- * nHeaderBlockSize
);
241 rStream
.ReadUChar( nEndFlag
);
244 return ( 0x0d == nEndFlag
);
247 OUString SAL_CALL
ScFilterDetect::detect( uno::Sequence
<beans::PropertyValue
>& lDescriptor
)
248 throw( uno::RuntimeException
, std::exception
)
250 MediaDescriptor
aMediaDesc( lDescriptor
);
251 OUString aTypeName
= aMediaDesc
.getUnpackedValueOrDefault( MediaDescriptor::PROP_TYPENAME(), OUString() );
252 uno::Reference
< io::XInputStream
> xStream ( aMediaDesc
[MediaDescriptor::PROP_INPUTSTREAM()], uno::UNO_QUERY
);
257 aMedium
.UseInteractionHandler( false );
258 aMedium
.setStreamToLoadFrom( xStream
, true );
260 SvStream
* pStream
= aMedium
.GetInStream();
261 if ( !pStream
|| pStream
->GetError() )
262 // No stream, no detection.
265 const char* pSearchFilterName
= NULL
;
266 if (aTypeName
== "calc_Lotus")
268 if (!detectThisFormat(*pStream
, pLotus
) && !detectThisFormat(*pStream
, pLotusNew
) && !detectThisFormat(*pStream
, pLotus2
))
271 pSearchFilterName
= pFilterLotus
;
273 else if (aTypeName
== "calc_QPro")
275 if (!detectThisFormat(*pStream
, pQPro
))
278 pSearchFilterName
= pFilterQPro6
;
280 else if (aTypeName
== "calc_SYLK")
282 if (!detectThisFormat(*pStream
, pSylk
))
285 pSearchFilterName
= pFilterSylk
;
287 else if (aTypeName
== "calc_DIF")
289 if (!detectThisFormat(*pStream
, pDIF1
) && !detectThisFormat(*pStream
, pDIF2
))
292 pSearchFilterName
= pFilterDif
;
294 else if (aTypeName
== "calc_dBase")
296 if (!lcl_MayBeDBase(*pStream
))
299 pSearchFilterName
= pFilterDBase
;
304 SfxFilterMatcher
aMatcher("scalc");
305 const SfxFilter
* pFilter
= aMatcher
.GetFilter4FilterName(OUString::createFromAscii(pSearchFilterName
));
310 aMediaDesc
[MediaDescriptor::PROP_FILTERNAME()] <<= pFilter
->GetName();
311 aMediaDesc
>> lDescriptor
;
315 OUString SAL_CALL
ScFilterDetect::getImplementationName() throw (uno::RuntimeException
, std::exception
)
317 return impl_getStaticImplementationName();
320 sal_Bool
ScFilterDetect::supportsService( const OUString
& sServiceName
)
321 throw (uno::RuntimeException
, std::exception
)
323 return cppu::supportsService(this, sServiceName
);
326 com::sun::star::uno::Sequence
<OUString
> ScFilterDetect::getSupportedServiceNames()
327 throw (uno::RuntimeException
, std::exception
)
329 return impl_getStaticSupportedServiceNames();
332 uno::Sequence
<OUString
> ScFilterDetect::impl_getStaticSupportedServiceNames()
334 uno::Sequence
<OUString
> seqServiceNames(1);
335 seqServiceNames
.getArray()[0] = "com.sun.star.frame.ExtendedTypeDetection";
336 return seqServiceNames
;
339 OUString
ScFilterDetect::impl_getStaticImplementationName()
341 return OUString("com.sun.star.comp.calc.FormatDetector");
344 uno::Reference
<uno::XInterface
> ScFilterDetect::impl_createInstance(
345 const uno::Reference
<uno::XComponentContext
>& xContext
) throw (uno::Exception
)
347 return static_cast<cppu::OWeakObject
*>(new ScFilterDetect(xContext
));
350 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */