1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: iodetect.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_sw.hxx"
34 #include <iodetect.hxx>
37 #include <osl/endian.h>
38 #include <sot/storage.hxx>
39 #include <svtools/parhtml.hxx>
40 #include <tools/urlobj.hxx>
42 bool IsDocShellRegistered();
44 SwIoDetect aFilterDetect
[] =
46 SwIoDetect( FILTER_RTF
, STRING_LEN
),
47 SwIoDetect( FILTER_BAS
, STRING_LEN
),
48 SwIoDetect( sWW6
, STRING_LEN
),
49 SwIoDetect( FILTER_WW8
, STRING_LEN
),
50 SwIoDetect( sRtfWH
, STRING_LEN
),
51 SwIoDetect( sHTML
, 4 ),
52 SwIoDetect( sWW1
, STRING_LEN
),
53 SwIoDetect( sWW5
, STRING_LEN
),
54 SwIoDetect( FILTER_XML
, 4 ),
55 SwIoDetect( FILTER_TEXT_DLG
, 8 ),
56 SwIoDetect( FILTER_TEXT
, 4 )
59 const sal_Char
* SwIoDetect::IsReader(const sal_Char
* pHeader
, ULONG nLen_
,
60 const String
& /*rFileName*/, const String
& /*rUserData*/) const
65 SVBT16 wIdent
; // 0x0 int magic number
66 SVBT16 nFib
; // 0x2 FIB version written
67 SVBT16 nProduct
; // 0x4 product version written by
68 SVBT16 nlocale
; // 0x6 language stamp---localized version;
72 USHORT
nFibGet() { return SVBT16ToShort(nFib
); }
73 USHORT
wIdentGet() { return SVBT16ToShort(wIdent
); }
74 USHORT
fFlagsGet() { return SVBT16ToShort(fFlags
); }
75 // SVBT16 fComplex :1;// 0004 when 1, file is in complex, fast-saved format.
76 BOOL
fComplexGet() { return static_cast< BOOL
>((fFlagsGet() >> 2) & 1); }
80 rtl::OString
aName( pName
);
82 bRet
= HTMLParser::IsHTMLFormat( pHeader
, TRUE
, RTL_TEXTENCODING_DONTKNOW
);
83 else if ( FILTER_RTF
== aName
)
84 bRet
= 0 == strncmp( "{\\rtf", pHeader
, 5 );
85 else if ( sWW5
== aName
)
87 W1_FIB
*pW1Header
= (W1_FIB
*)pHeader
;
88 if (pW1Header
->wIdentGet() == 0xA5DC && pW1Header
->nFibGet() == 0x65)
90 else if (pW1Header
->wIdentGet() == 0xA5DB && pW1Header
->nFibGet() == 0x2D)
93 else if ( sWW1
== aName
)
95 bRet
= (( ((W1_FIB
*)pHeader
)->wIdentGet() == 0xA59C
96 && ((W1_FIB
*)pHeader
)->nFibGet() == 0x21)
97 && ((W1_FIB
*)pHeader
)->fComplexGet() == 0);
99 else if ( FILTER_TEXT
== aName
)
100 bRet
= SwIoSystem::IsDetectableText(pHeader
, nLen_
);
101 else if ( FILTER_TEXT_DLG
== aName
)
102 bRet
= SwIoSystem::IsDetectableText( pHeader
, nLen_
, 0, 0, 0, true);
103 return bRet
? pName
: 0;
106 const String
SwIoSystem::GetSubStorageName( const SfxFilter
& rFltr
)
108 /* bei den StorageFiltern noch den SubStorageNamen setzen */
109 const String
& rUserData
= rFltr
.GetUserData();
110 if( rUserData
.EqualsAscii(FILTER_XML
) ||
111 rUserData
.EqualsAscii(FILTER_XMLV
) ||
112 rUserData
.EqualsAscii(FILTER_XMLVW
) )
113 return String::CreateFromAscii(
114 RTL_CONSTASCII_STRINGPARAM( "content.xml" ));
115 if( rUserData
.EqualsAscii(sWW6
) || rUserData
.EqualsAscii(FILTER_WW8
) )
116 return String::CreateFromAscii(
117 RTL_CONSTASCII_STRINGPARAM( "WordDocument" ));
118 return String::CreateFromAscii( RTL_CONSTASCII_STRINGPARAM( "" ));
121 const SfxFilter
* SwIoSystem::GetFilterOfFormat(const String
& rFmtNm
,
122 const SfxFilterContainer
* pCnt
)
124 SfxFilterContainer
aCntSw( String::CreateFromAscii( sSWRITER
) );
125 SfxFilterContainer
aCntSwWeb( String::CreateFromAscii( sSWRITERWEB
) );
126 const SfxFilterContainer
* pFltCnt
= pCnt
? pCnt
: ( IsDocShellRegistered() ? &aCntSw
: &aCntSwWeb
);
131 SfxFilterMatcher
aMatcher( pFltCnt
->GetName() );
132 SfxFilterMatcherIter
aIter( &aMatcher
);
133 const SfxFilter
* pFilter
= aIter
.First();
136 if( pFilter
->GetUserData() == rFmtNm
)
138 pFilter
= aIter
.Next();
141 if( pCnt
|| pFltCnt
== &aCntSwWeb
)
143 pFltCnt
= &aCntSwWeb
;
148 BOOL
SwIoSystem::IsValidStgFilter( const com::sun::star::uno::Reference
< com::sun::star::embed::XStorage
>& rStg
, const SfxFilter
& rFilter
)
153 ULONG nStgFmtId
= SotStorage::GetFormatID( rStg
);
154 bRet
= rStg
->isStreamElement( ::rtl::OUString::createFromAscii("content.xml") );
156 bRet
= ( nStgFmtId
&& ( rFilter
.GetFormat() == nStgFmtId
) );
158 catch ( com::sun::star::uno::Exception
& )
165 BOOL
SwIoSystem::IsValidStgFilter(SotStorage
& rStg
, const SfxFilter
& rFilter
)
167 ULONG nStgFmtId
= rStg
.GetFormat();
168 /*#i8409# We cannot trust the clipboard id anymore :-(*/
169 if( rFilter
.GetUserData().EqualsAscii(FILTER_WW8
) ||
170 rFilter
.GetUserData().EqualsAscii(sWW6
) )
175 BOOL bRet
= SVSTREAM_OK
== rStg
.GetError() &&
176 ( !nStgFmtId
|| rFilter
.GetFormat() == nStgFmtId
) &&
177 ( rStg
.IsContained( SwIoSystem::GetSubStorageName( rFilter
)) );
180 /* Bug 53445 - es gibt Excel Docs ohne ClipBoardId! */
181 /* Bug 62703 - und auch WinWord Docs ohne ClipBoardId! */
182 if( rFilter
.GetUserData().EqualsAscii(FILTER_WW8
) ||
183 rFilter
.GetUserData().EqualsAscii(sWW6
) )
185 bRet
= !((rStg
.IsContained( String::CreateFromAscii("0Table" )) ||
186 rStg
.IsContained( String::CreateFromAscii("1Table" ))) ^
187 rFilter
.GetUserData().EqualsAscii(FILTER_WW8
));
188 if (bRet
&& !rFilter
.IsAllowedAsTemplate())
190 SotStorageStreamRef xRef
=
191 rStg
.OpenSotStream(String::CreateFromAscii("WordDocument"),
192 STREAM_STD_READ
| STREAM_NOCREATE
);
199 // else if( !rFilter.GetUserData().EqualsAscii(sCExcel) )
200 // bRet = rFilter.GetFormat() == nStgFmtId;
205 void TerminateBuffer(sal_Char
*pBuffer
, ULONG nBytesRead
, ULONG nBufferLen
)
207 ASSERT(nBytesRead
<= nBufferLen
- 2,
208 "what you read must be less than the max + null termination");
209 ASSERT(!(nBufferLen
& 0x00000001), "nMaxReadBuf must be an even number");
210 if (nBytesRead
<= nBufferLen
- 2)
212 pBuffer
[nBytesRead
] = '\0';
213 pBuffer
[nBytesRead
+1] = '\0';
214 if (nBytesRead
& 0x00000001)
215 pBuffer
[nBytesRead
+2] = '\0';
219 /* Feststellen ob das File in dem entsprechenden Format vorliegt. */
220 /* Z.z werden nur unsere eigene Filter unterstuetzt */
221 BOOL
SwIoSystem::IsFileFilter( SfxMedium
& rMedium
, const String
& rFmtName
,
222 const SfxFilter
** ppFilter
)
226 SfxFilterContainer
aCntSw( String::CreateFromAscii( sSWRITER
) );
227 SfxFilterContainer
aCntSwWeb( String::CreateFromAscii( sSWRITERWEB
) );
228 const SfxFilterContainer
& rFltContainer
= IsDocShellRegistered() ? aCntSw
: aCntSwWeb
;
230 com::sun::star::uno::Reference
< com::sun::star::embed::XStorage
> xStor
;
232 if (rMedium
.IsStorage())
233 xStor
= rMedium
.GetStorage();
236 SvStream
* pStream
= rMedium
.GetInStream();
237 if ( pStream
&& SotStorage::IsStorageFile(pStream
) )
238 xStg
= new SotStorage( pStream
, FALSE
);
241 SfxFilterMatcher
aMatcher( rFltContainer
.GetName() );
242 SfxFilterMatcherIter
aIter( &aMatcher
);
243 const SfxFilter
* pFltr
= aIter
.First();
246 if( pFltr
->GetUserData() == rFmtName
)
248 const String
& rUserData
= pFltr
->GetUserData();
249 if( 'C' == *rUserData
.GetBuffer() )
252 bRet
= IsValidStgFilter( xStor
, *pFltr
);
253 else if ( xStg
.Is() )
254 bRet
= xStg
.Is() && IsValidStgFilter( *xStg
, *pFltr
);
255 bRet
= bRet
&& (pFltr
->GetUserData() == rFmtName
);
257 else if( !xStg
.Is() && !xStor
.is() )
259 SvStream
* pStrm
= rMedium
.GetInStream();
260 if( pStrm
&& !pStrm
->GetError() )
262 sal_Char aBuffer
[4098];
263 const ULONG nMaxRead
= sizeof(aBuffer
) - 2;
264 ULONG nBytesRead
= pStrm
->Read(aBuffer
, nMaxRead
);
265 pStrm
->Seek(STREAM_SEEK_TO_BEGIN
);
266 TerminateBuffer(aBuffer
, nBytesRead
, sizeof(aBuffer
));
267 for (USHORT i
= 0; i
< MAXFILTER
; ++i
)
269 if (aFilterDetect
[i
].IsFilter(rFmtName
))
271 bRet
= 0 != aFilterDetect
[i
].IsReader( aBuffer
, nBytesRead
,
272 rMedium
.GetPhysicalName(), rUserData
);
279 if( bRet
&& ppFilter
)
283 pFltr
= aIter
.Next();
289 /* die Methode stellt fest, von welchem Typ der stream (File) ist. */
290 /* Es wird versucht, eine dem Filter entsprechende Byte-Folge zu finden. */
291 /* Wird kein entsprechender gefunden, wird zur Zeit der ASCII-Reader */
292 /* returnt !! Der Returnwert ist der interne Filtername! */
293 /* rPrefFltName ist der interne Name des Filters, den der Benutzer im */
294 /* Open-Dialog eingestellt hat. */
295 const SfxFilter
* SwIoSystem::GetFileFilter(const String
& rFileName
,
296 const String
& rPrefFltName
, SfxMedium
* pMedium
)
298 SfxFilterContainer
aCntSw( String::CreateFromAscii( sSWRITER
) );
299 SfxFilterContainer
aCntSwWeb( String::CreateFromAscii( sSWRITERWEB
) );
300 const SfxFilterContainer
* pFCntnr
= IsDocShellRegistered() ? &aCntSw
: &aCntSwWeb
;
305 SfxFilterMatcher
aMatcher( pFCntnr
->GetName() );
306 SfxFilterMatcherIter
aIter( &aMatcher
);
307 const SfxFilter
* pFilter
= aIter
.First();
311 if( pMedium
? ( pMedium
->IsStorage() || SotStorage::IsStorageFile( pMedium
->GetInStream() ) ) : SotStorage::IsStorageFile( rFileName
) )
313 // package storage or OLEStorage based format
315 BOOL bDeleteMedium
= FALSE
;
319 aObj
.SetSmartProtocol( INET_PROT_FILE
);
320 aObj
.SetSmartURL( rFileName
);
321 pMedium
= new SfxMedium( aObj
.GetMainURL( INetURLObject::NO_DECODE
), STREAM_STD_READ
, FALSE
);
322 bDeleteMedium
= TRUE
;
325 // templates should not get precedence over "normal" filters (#i35508, #i33168)
326 const SfxFilter
* pTemplateFilter
= 0;
327 const SfxFilter
* pOldFilter
= pFCntnr
->GetFilter4FilterName( rPrefFltName
);
328 BOOL bLookForTemplate
= pOldFilter
&& pOldFilter
->IsOwnTemplateFormat();
329 if ( pMedium
->IsStorage() )
331 com::sun::star::uno::Reference
< com::sun::star::embed::XStorage
> xStor
= pMedium
->GetStorage();
336 if( 'C' == *pFilter
->GetUserData().GetBuffer() && IsValidStgFilter( xStor
, *pFilter
) )
338 if ( pFilter
->IsOwnTemplateFormat() && !bLookForTemplate
)
339 // found template filter; maybe there's a "normal" one also
340 pTemplateFilter
= pFilter
;
345 pFilter
= aIter
.Next();
348 // there's only a template filter that could be found
349 if ( pTemplateFilter
)
350 pFilter
= pTemplateFilter
;
355 SvStream
* pStream
= pMedium
->GetInStream();
356 if ( pStream
&& SotStorage::IsStorageFile(pStream
) )
357 xStg
= new SotStorage( pStream
, FALSE
);
359 if( xStg
.Is() && ( xStg
->GetError() == SVSTREAM_OK
) )
363 if( 'C' == *pFilter
->GetUserData().GetBuffer() && IsValidStgFilter( *xStg
, *pFilter
) )
365 if ( pFilter
->IsOwnTemplateFormat() && !bLookForTemplate
)
366 // found template filter; maybe there's a "normal" one also
367 pTemplateFilter
= pFilter
;
372 pFilter
= aIter
.Next();
375 // there's only a template filter that could be found
376 if ( pTemplateFilter
)
377 pFilter
= pTemplateFilter
;
385 sal_Char aBuffer
[4098];
386 const ULONG nMaxRead
= sizeof(aBuffer
) - 2;
387 ULONG nBytesRead
= 0;
390 SvStream
* pIStrm
= pMedium
->GetInStream();
391 if( !pIStrm
|| SVSTREAM_OK
!= pIStrm
->GetError() )
393 ULONG nCurrPos
= pIStrm
->Tell();
394 nBytesRead
= pIStrm
->Read(aBuffer
, nMaxRead
);
395 pIStrm
->Seek( nCurrPos
);
400 SvFileStream aStrm( rFileName, STREAM_READ );
402 // ohne FileName oder ohne Stream gibts nur den ANSI-Filter
403 if( !rFileName.Len() || SVSTREAM_OK != aStrm.GetError() )
406 nBytesRead = aStrm.Read(aBuffer, nMaxRead);
410 TerminateBuffer(aBuffer
, nBytesRead
, sizeof(aBuffer
));
413 /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
414 /* suche nach dem bestimmten Filter, falls kein entsprechender */
415 /* gefunden wird, so wird der ASCII-Filter returnt. */
416 /* Gibt es Filter ohne einen Identifizierungs-String, so werden diese */
417 /* nie erkannt und es wird auch der ASCII-Filter returnt. */
418 /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
420 const SfxFilter
* pFilterTmp
= 0;
422 for( USHORT n
= 0; n
< MAXFILTER
; ++n
)
424 String sEmptyUserData
;
425 pNm
= aFilterDetect
[n
].IsReader(aBuffer
, nBytesRead
, rFileName
, sEmptyUserData
);
426 pFilterTmp
= pNm
? SwIoSystem::GetFilterOfFormat(String::CreateFromAscii(pNm
), pFCntnr
) : 0;
427 if (pNm
&& pFilterTmp
)
434 /* Ok, bis jetzt kein Filter gefunden, also befrage mal die */
435 /* "WORD 4 WORD" Filter */
436 if( rFileName
.Len() )
439 pMedium
->CloseInStream();
442 return SwIoSystem::GetTextFilter( aBuffer
, nBytesRead
);
445 bool SwIoSystem::IsDetectableText(const sal_Char
* pBuf
, ULONG
&rLen
,
446 CharSet
*pCharSet
, bool *pSwap
, LineEnd
*pLineEnd
, bool bEncodedFilter
)
449 CharSet eCharSet
= RTL_TEXTENCODING_DONTKNOW
;
452 /*See if its a known unicode type*/
455 if (rLen
> 2 && BYTE(pBuf
[0]) == 0xEF && BYTE(pBuf
[1]) == 0xBB &&
456 BYTE(pBuf
[2]) == 0xBF)
458 eCharSet
= RTL_TEXTENCODING_UTF8
;
461 else if (BYTE(pBuf
[0]) == 0xFE && BYTE(pBuf
[1]) == 0xFF)
463 eCharSet
= RTL_TEXTENCODING_UCS2
;
467 else if (BYTE(pBuf
[1]) == 0xFE && BYTE(pBuf
[0]) == 0xFF)
469 eCharSet
= RTL_TEXTENCODING_UCS2
;
476 bool bCR
= false, bLF
= false, bNoNormalChar
= false,
477 bIsBareUnicode
= false;
479 if (eCharSet
!= RTL_TEXTENCODING_DONTKNOW
)
482 sal_Unicode
*pNewBuf
= sWork
.AllocBuffer( static_cast< xub_StrLen
>(rLen
));
484 if (eCharSet
!= RTL_TEXTENCODING_UCS2
)
487 rtl_TextToUnicodeConverter hConverter
=
488 rtl_createTextToUnicodeConverter(eCharSet
);
489 rtl_TextToUnicodeContext hContext
=
490 rtl_createTextToUnicodeContext(hConverter
);
494 nNewLen
= rtl_convertTextToUnicode( hConverter
, hContext
, pBuf
,
495 rLen
, pNewBuf
, nNewLen
,
496 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT
|
497 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT
|
498 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
), &nInfo
, &nCntBytes
);
500 rtl_destroyTextToUnicodeContext(hConverter
, hContext
);
501 rtl_destroyTextToUnicodeConverter(hConverter
);
506 memcpy(pNewBuf
, pBuf
, rLen
);
508 bool bNativeLE
= true;
510 bool bNativeLE
= false;
512 if (bLE
!= bNativeLE
)
515 sal_Char
* pF
= (sal_Char
*)pNewBuf
;
517 for(xub_StrLen n
= 0; n
< nNewLen
; ++n
, pF
+=2, pN
+=2)
526 sWork
.ReleaseBufferAccess( static_cast< xub_StrLen
>(nNewLen
) );
527 pNewBuf
= sWork
.GetBufferAccess();
529 for (ULONG nCnt
= 0; nCnt
< nNewLen
; ++nCnt
, ++pNewBuf
)
546 for( ULONG nCnt
= 0; nCnt
< rLen
; ++nCnt
, ++pBuf
)
551 if( nCnt
+ 1 < rLen
&& !*(pBuf
+1) )
553 bIsBareUnicode
= true;
566 if (0x20 > (BYTE
)*pBuf
)
567 bNoNormalChar
= true;
573 LineEnd eSysLE
= GetSystemLineEnd();
578 eLineEnd
= bCR
? ( bLF
? LINEEND_CRLF
: LINEEND_CR
) : LINEEND_LF
;
581 *pCharSet
= eCharSet
;
585 *pLineEnd
= eLineEnd
;
587 return bEncodedFilter
|| (!bIsBareUnicode
&& eSysLE
== eLineEnd
);
590 const SfxFilter
* SwIoSystem::GetTextFilter( const sal_Char
* pBuf
, ULONG nLen
)
592 bool bAuto
= IsDetectableText(pBuf
, nLen
);
593 const sal_Char
* pNm
= bAuto
? FILTER_TEXT
: FILTER_TEXT_DLG
;
594 return SwIoSystem::GetFilterOfFormat( String::CreateFromAscii(pNm
), 0 );