update dev300-m58
[ooovba.git] / sw / source / filter / basflt / iodetect.cxx
blobda6d0a7b6ec31b1d2333f2fd7d49e6493862f79d
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: iodetect.cxx,v $
10 * $Revision: 1.31 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_sw.hxx"
34 #include <iodetect.hxx>
36 #include <errhdl.hxx>
37 #include <osl/endian.h>
38 #include <sot/storage.hxx>
39 #include <svtools/parhtml.hxx>
40 #include <tools/urlobj.hxx>
42 bool IsDocShellRegistered();
44 SwIoDetect aFilterDetect[] =
46 SwIoDetect( FILTER_RTF, STRING_LEN ),
47 SwIoDetect( FILTER_BAS, STRING_LEN ),
48 SwIoDetect( sWW6, STRING_LEN ),
49 SwIoDetect( FILTER_WW8, STRING_LEN ),
50 SwIoDetect( sRtfWH, STRING_LEN ),
51 SwIoDetect( sHTML, 4 ),
52 SwIoDetect( sWW1, STRING_LEN ),
53 SwIoDetect( sWW5, STRING_LEN ),
54 SwIoDetect( FILTER_XML, 4 ),
55 SwIoDetect( FILTER_TEXT_DLG, 8 ),
56 SwIoDetect( FILTER_TEXT, 4 )
59 const sal_Char* SwIoDetect::IsReader(const sal_Char* pHeader, ULONG nLen_,
60 const String & /*rFileName*/, const String& /*rUserData*/) const
62 // Filter erkennung
63 struct W1_FIB
65 SVBT16 wIdent; // 0x0 int magic number
66 SVBT16 nFib; // 0x2 FIB version written
67 SVBT16 nProduct; // 0x4 product version written by
68 SVBT16 nlocale; // 0x6 language stamp---localized version;
69 SVBT16 pnNext; // 0x8
70 SVBT16 fFlags;
72 USHORT nFibGet() { return SVBT16ToShort(nFib); }
73 USHORT wIdentGet() { return SVBT16ToShort(wIdent); }
74 USHORT fFlagsGet() { return SVBT16ToShort(fFlags); }
75 // SVBT16 fComplex :1;// 0004 when 1, file is in complex, fast-saved format.
76 BOOL fComplexGet() { return static_cast< BOOL >((fFlagsGet() >> 2) & 1); }
79 int bRet = FALSE;
80 rtl::OString aName( pName );
81 if ( sHTML == aName )
82 bRet = HTMLParser::IsHTMLFormat( pHeader, TRUE, RTL_TEXTENCODING_DONTKNOW );
83 else if ( FILTER_RTF == aName )
84 bRet = 0 == strncmp( "{\\rtf", pHeader, 5 );
85 else if ( sWW5 == aName )
87 W1_FIB *pW1Header = (W1_FIB*)pHeader;
88 if (pW1Header->wIdentGet() == 0xA5DC && pW1Header->nFibGet() == 0x65)
89 bRet = true; /*WW5*/
90 else if (pW1Header->wIdentGet() == 0xA5DB && pW1Header->nFibGet() == 0x2D)
91 bRet = true; /*WW2*/
93 else if ( sWW1 == aName )
95 bRet = (( ((W1_FIB*)pHeader)->wIdentGet() == 0xA59C
96 && ((W1_FIB*)pHeader)->nFibGet() == 0x21)
97 && ((W1_FIB*)pHeader)->fComplexGet() == 0);
99 else if ( FILTER_TEXT == aName )
100 bRet = SwIoSystem::IsDetectableText(pHeader, nLen_);
101 else if ( FILTER_TEXT_DLG == aName)
102 bRet = SwIoSystem::IsDetectableText( pHeader, nLen_, 0, 0, 0, true);
103 return bRet ? pName : 0;
106 const String SwIoSystem::GetSubStorageName( const SfxFilter& rFltr )
108 /* bei den StorageFiltern noch den SubStorageNamen setzen */
109 const String& rUserData = rFltr.GetUserData();
110 if( rUserData.EqualsAscii(FILTER_XML) ||
111 rUserData.EqualsAscii(FILTER_XMLV) ||
112 rUserData.EqualsAscii(FILTER_XMLVW) )
113 return String::CreateFromAscii(
114 RTL_CONSTASCII_STRINGPARAM( "content.xml" ));
115 if( rUserData.EqualsAscii(sWW6) || rUserData.EqualsAscii(FILTER_WW8) )
116 return String::CreateFromAscii(
117 RTL_CONSTASCII_STRINGPARAM( "WordDocument" ));
118 return String::CreateFromAscii( RTL_CONSTASCII_STRINGPARAM( "" ));
121 const SfxFilter* SwIoSystem::GetFilterOfFormat(const String& rFmtNm,
122 const SfxFilterContainer* pCnt)
124 SfxFilterContainer aCntSw( String::CreateFromAscii( sSWRITER ) );
125 SfxFilterContainer aCntSwWeb( String::CreateFromAscii( sSWRITERWEB ) );
126 const SfxFilterContainer* pFltCnt = pCnt ? pCnt : ( IsDocShellRegistered() ? &aCntSw : &aCntSwWeb );
128 do {
129 if( pFltCnt )
131 SfxFilterMatcher aMatcher( pFltCnt->GetName() );
132 SfxFilterMatcherIter aIter( &aMatcher );
133 const SfxFilter* pFilter = aIter.First();
134 while ( pFilter )
136 if( pFilter->GetUserData() == rFmtNm )
137 return pFilter;
138 pFilter = aIter.Next();
141 if( pCnt || pFltCnt == &aCntSwWeb )
142 break;
143 pFltCnt = &aCntSwWeb;
144 } while( TRUE );
145 return 0;
148 BOOL SwIoSystem::IsValidStgFilter( const com::sun::star::uno::Reference < com::sun::star::embed::XStorage >& rStg, const SfxFilter& rFilter)
150 BOOL bRet = FALSE;
153 ULONG nStgFmtId = SotStorage::GetFormatID( rStg );
154 bRet = rStg->isStreamElement( ::rtl::OUString::createFromAscii("content.xml") );
155 if ( bRet )
156 bRet = ( nStgFmtId && ( rFilter.GetFormat() == nStgFmtId ) );
158 catch ( com::sun::star::uno::Exception& )
162 return bRet;
165 BOOL SwIoSystem::IsValidStgFilter(SotStorage& rStg, const SfxFilter& rFilter)
167 ULONG nStgFmtId = rStg.GetFormat();
168 /*#i8409# We cannot trust the clipboard id anymore :-(*/
169 if( rFilter.GetUserData().EqualsAscii(FILTER_WW8) ||
170 rFilter.GetUserData().EqualsAscii(sWW6) )
172 nStgFmtId = 0;
175 BOOL bRet = SVSTREAM_OK == rStg.GetError() &&
176 ( !nStgFmtId || rFilter.GetFormat() == nStgFmtId ) &&
177 ( rStg.IsContained( SwIoSystem::GetSubStorageName( rFilter )) );
178 if( bRet )
180 /* Bug 53445 - es gibt Excel Docs ohne ClipBoardId! */
181 /* Bug 62703 - und auch WinWord Docs ohne ClipBoardId! */
182 if( rFilter.GetUserData().EqualsAscii(FILTER_WW8) ||
183 rFilter.GetUserData().EqualsAscii(sWW6) )
185 bRet = !((rStg.IsContained( String::CreateFromAscii("0Table" )) ||
186 rStg.IsContained( String::CreateFromAscii("1Table" ))) ^
187 rFilter.GetUserData().EqualsAscii(FILTER_WW8));
188 if (bRet && !rFilter.IsAllowedAsTemplate())
190 SotStorageStreamRef xRef =
191 rStg.OpenSotStream(String::CreateFromAscii("WordDocument"),
192 STREAM_STD_READ | STREAM_NOCREATE );
193 xRef->Seek(10);
194 BYTE nByte;
195 *xRef >> nByte;
196 bRet = !(nByte & 1);
199 // else if( !rFilter.GetUserData().EqualsAscii(sCExcel) )
200 // bRet = rFilter.GetFormat() == nStgFmtId;
202 return bRet;
205 void TerminateBuffer(sal_Char *pBuffer, ULONG nBytesRead, ULONG nBufferLen)
207 ASSERT(nBytesRead <= nBufferLen - 2,
208 "what you read must be less than the max + null termination");
209 ASSERT(!(nBufferLen & 0x00000001), "nMaxReadBuf must be an even number");
210 if (nBytesRead <= nBufferLen - 2)
212 pBuffer[nBytesRead] = '\0';
213 pBuffer[nBytesRead+1] = '\0';
214 if (nBytesRead & 0x00000001)
215 pBuffer[nBytesRead+2] = '\0';
219 /* Feststellen ob das File in dem entsprechenden Format vorliegt. */
220 /* Z.z werden nur unsere eigene Filter unterstuetzt */
221 BOOL SwIoSystem::IsFileFilter( SfxMedium& rMedium, const String& rFmtName,
222 const SfxFilter** ppFilter )
224 BOOL bRet = FALSE;
226 SfxFilterContainer aCntSw( String::CreateFromAscii( sSWRITER ) );
227 SfxFilterContainer aCntSwWeb( String::CreateFromAscii( sSWRITERWEB ) );
228 const SfxFilterContainer& rFltContainer = IsDocShellRegistered() ? aCntSw : aCntSwWeb;
230 com::sun::star::uno::Reference < com::sun::star::embed::XStorage > xStor;
231 SotStorageRef xStg;
232 if (rMedium.IsStorage())
233 xStor = rMedium.GetStorage();
234 else
236 SvStream* pStream = rMedium.GetInStream();
237 if ( pStream && SotStorage::IsStorageFile(pStream) )
238 xStg = new SotStorage( pStream, FALSE );
241 SfxFilterMatcher aMatcher( rFltContainer.GetName() );
242 SfxFilterMatcherIter aIter( &aMatcher );
243 const SfxFilter* pFltr = aIter.First();
244 while ( pFltr )
246 if( pFltr->GetUserData() == rFmtName )
248 const String& rUserData = pFltr->GetUserData();
249 if( 'C' == *rUserData.GetBuffer() )
251 if ( xStor.is() )
252 bRet = IsValidStgFilter( xStor, *pFltr );
253 else if ( xStg.Is() )
254 bRet = xStg.Is() && IsValidStgFilter( *xStg, *pFltr );
255 bRet = bRet && (pFltr->GetUserData() == rFmtName);
257 else if( !xStg.Is() && !xStor.is() )
259 SvStream* pStrm = rMedium.GetInStream();
260 if( pStrm && !pStrm->GetError() )
262 sal_Char aBuffer[4098];
263 const ULONG nMaxRead = sizeof(aBuffer) - 2;
264 ULONG nBytesRead = pStrm->Read(aBuffer, nMaxRead);
265 pStrm->Seek(STREAM_SEEK_TO_BEGIN);
266 TerminateBuffer(aBuffer, nBytesRead, sizeof(aBuffer));
267 for (USHORT i = 0; i < MAXFILTER; ++i)
269 if (aFilterDetect[i].IsFilter(rFmtName))
271 bRet = 0 != aFilterDetect[i].IsReader( aBuffer, nBytesRead,
272 rMedium.GetPhysicalName(), rUserData );
273 break;
279 if( bRet && ppFilter )
280 *ppFilter = pFltr;
283 pFltr = aIter.Next();
286 return bRet;
289 /* die Methode stellt fest, von welchem Typ der stream (File) ist. */
290 /* Es wird versucht, eine dem Filter entsprechende Byte-Folge zu finden. */
291 /* Wird kein entsprechender gefunden, wird zur Zeit der ASCII-Reader */
292 /* returnt !! Der Returnwert ist der interne Filtername! */
293 /* rPrefFltName ist der interne Name des Filters, den der Benutzer im */
294 /* Open-Dialog eingestellt hat. */
295 const SfxFilter* SwIoSystem::GetFileFilter(const String& rFileName,
296 const String& rPrefFltName, SfxMedium* pMedium)
298 SfxFilterContainer aCntSw( String::CreateFromAscii( sSWRITER ) );
299 SfxFilterContainer aCntSwWeb( String::CreateFromAscii( sSWRITERWEB ) );
300 const SfxFilterContainer* pFCntnr = IsDocShellRegistered() ? &aCntSw : &aCntSwWeb;
302 if( !pFCntnr )
303 return 0;
305 SfxFilterMatcher aMatcher( pFCntnr->GetName() );
306 SfxFilterMatcherIter aIter( &aMatcher );
307 const SfxFilter* pFilter = aIter.First();
308 if ( !pFilter )
309 return 0;
311 if( pMedium ? ( pMedium->IsStorage() || SotStorage::IsStorageFile( pMedium->GetInStream() ) ) : SotStorage::IsStorageFile( rFileName ) )
313 // package storage or OLEStorage based format
314 SotStorageRef xStg;
315 BOOL bDeleteMedium = FALSE;
316 if (!pMedium )
318 INetURLObject aObj;
319 aObj.SetSmartProtocol( INET_PROT_FILE );
320 aObj.SetSmartURL( rFileName );
321 pMedium = new SfxMedium( aObj.GetMainURL( INetURLObject::NO_DECODE ), STREAM_STD_READ, FALSE );
322 bDeleteMedium = TRUE;
325 // templates should not get precedence over "normal" filters (#i35508, #i33168)
326 const SfxFilter* pTemplateFilter = 0;
327 const SfxFilter* pOldFilter = pFCntnr->GetFilter4FilterName( rPrefFltName );
328 BOOL bLookForTemplate = pOldFilter && pOldFilter->IsOwnTemplateFormat();
329 if ( pMedium->IsStorage() )
331 com::sun::star::uno::Reference < com::sun::star::embed::XStorage > xStor = pMedium->GetStorage();
332 if ( xStor.is() )
334 while ( pFilter )
336 if( 'C' == *pFilter->GetUserData().GetBuffer() && IsValidStgFilter( xStor, *pFilter ) )
338 if ( pFilter->IsOwnTemplateFormat() && !bLookForTemplate )
339 // found template filter; maybe there's a "normal" one also
340 pTemplateFilter = pFilter;
341 else
342 return pFilter;
345 pFilter = aIter.Next();
348 // there's only a template filter that could be found
349 if ( pTemplateFilter )
350 pFilter = pTemplateFilter;
353 else
355 SvStream* pStream = pMedium->GetInStream();
356 if ( pStream && SotStorage::IsStorageFile(pStream) )
357 xStg = new SotStorage( pStream, FALSE );
359 if( xStg.Is() && ( xStg->GetError() == SVSTREAM_OK ) )
361 while ( pFilter )
363 if( 'C' == *pFilter->GetUserData().GetBuffer() && IsValidStgFilter( *xStg, *pFilter ) )
365 if ( pFilter->IsOwnTemplateFormat() && !bLookForTemplate )
366 // found template filter; maybe there's a "normal" one also
367 pTemplateFilter = pFilter;
368 else
369 return pFilter;
372 pFilter = aIter.Next();
375 // there's only a template filter that could be found
376 if ( pTemplateFilter )
377 pFilter = pTemplateFilter;
382 return pFilter;
385 sal_Char aBuffer[4098];
386 const ULONG nMaxRead = sizeof(aBuffer) - 2;
387 ULONG nBytesRead = 0;
388 if (pMedium)
390 SvStream* pIStrm = pMedium->GetInStream();
391 if( !pIStrm || SVSTREAM_OK != pIStrm->GetError() )
392 return 0;
393 ULONG nCurrPos = pIStrm->Tell();
394 nBytesRead = pIStrm->Read(aBuffer, nMaxRead);
395 pIStrm->Seek( nCurrPos );
398 else
400 SvFileStream aStrm( rFileName, STREAM_READ );
402 // ohne FileName oder ohne Stream gibts nur den ANSI-Filter
403 if( !rFileName.Len() || SVSTREAM_OK != aStrm.GetError() )
404 return 0;
406 nBytesRead = aStrm.Read(aBuffer, nMaxRead);
407 aStrm.Close();
410 TerminateBuffer(aBuffer, nBytesRead, sizeof(aBuffer));
413 /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
414 /* suche nach dem bestimmten Filter, falls kein entsprechender */
415 /* gefunden wird, so wird der ASCII-Filter returnt. */
416 /* Gibt es Filter ohne einen Identifizierungs-String, so werden diese */
417 /* nie erkannt und es wird auch der ASCII-Filter returnt. */
418 /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
420 const SfxFilter* pFilterTmp = 0;
421 const sal_Char* pNm;
422 for( USHORT n = 0; n < MAXFILTER; ++n )
424 String sEmptyUserData;
425 pNm = aFilterDetect[n].IsReader(aBuffer, nBytesRead, rFileName, sEmptyUserData);
426 pFilterTmp = pNm ? SwIoSystem::GetFilterOfFormat(String::CreateFromAscii(pNm), pFCntnr) : 0;
427 if (pNm && pFilterTmp)
429 return pFilterTmp;
434 /* Ok, bis jetzt kein Filter gefunden, also befrage mal die */
435 /* "WORD 4 WORD" Filter */
436 if( rFileName.Len() )
438 if( pMedium )
439 pMedium->CloseInStream();
442 return SwIoSystem::GetTextFilter( aBuffer, nBytesRead);
445 bool SwIoSystem::IsDetectableText(const sal_Char* pBuf, ULONG &rLen,
446 CharSet *pCharSet, bool *pSwap, LineEnd *pLineEnd, bool bEncodedFilter)
448 bool bSwap = false;
449 CharSet eCharSet = RTL_TEXTENCODING_DONTKNOW;
450 bool bLE = true;
451 ULONG nHead=0;
452 /*See if its a known unicode type*/
453 if (rLen >= 2)
455 if (rLen > 2 && BYTE(pBuf[0]) == 0xEF && BYTE(pBuf[1]) == 0xBB &&
456 BYTE(pBuf[2]) == 0xBF)
458 eCharSet = RTL_TEXTENCODING_UTF8;
459 nHead = 3;
461 else if (BYTE(pBuf[0]) == 0xFE && BYTE(pBuf[1]) == 0xFF)
463 eCharSet = RTL_TEXTENCODING_UCS2;
464 bLE = false;
465 nHead = 2;
467 else if (BYTE(pBuf[1]) == 0xFE && BYTE(pBuf[0]) == 0xFF)
469 eCharSet = RTL_TEXTENCODING_UCS2;
470 nHead = 2;
472 pBuf+=nHead;
473 rLen-=nHead;
476 bool bCR = false, bLF = false, bNoNormalChar = false,
477 bIsBareUnicode = false;
479 if (eCharSet != RTL_TEXTENCODING_DONTKNOW)
481 String sWork;
482 sal_Unicode *pNewBuf = sWork.AllocBuffer( static_cast< xub_StrLen >(rLen));
483 sal_Size nNewLen;
484 if (eCharSet != RTL_TEXTENCODING_UCS2)
486 nNewLen = rLen;
487 rtl_TextToUnicodeConverter hConverter =
488 rtl_createTextToUnicodeConverter(eCharSet);
489 rtl_TextToUnicodeContext hContext =
490 rtl_createTextToUnicodeContext(hConverter);
492 sal_Size nCntBytes;
493 sal_uInt32 nInfo;
494 nNewLen = rtl_convertTextToUnicode( hConverter, hContext, pBuf,
495 rLen, pNewBuf, nNewLen,
496 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
497 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
498 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT), &nInfo, &nCntBytes);
500 rtl_destroyTextToUnicodeContext(hConverter, hContext);
501 rtl_destroyTextToUnicodeConverter(hConverter);
503 else
505 nNewLen = rLen/2;
506 memcpy(pNewBuf, pBuf, rLen);
507 #ifdef OSL_LITENDIAN
508 bool bNativeLE = true;
509 #else
510 bool bNativeLE = false;
511 #endif
512 if (bLE != bNativeLE)
514 bSwap = true;
515 sal_Char* pF = (sal_Char*)pNewBuf;
516 sal_Char* pN = pF+1;
517 for(xub_StrLen n = 0; n < nNewLen; ++n, pF+=2, pN+=2)
519 sal_Char c = *pF;
520 *pF = *pN;
521 *pN = c;
526 sWork.ReleaseBufferAccess( static_cast< xub_StrLen >(nNewLen) );
527 pNewBuf = sWork.GetBufferAccess();
529 for (ULONG nCnt = 0; nCnt < nNewLen; ++nCnt, ++pNewBuf)
531 switch (*pNewBuf)
533 case 0xA:
534 bLF = true;
535 break;
536 case 0xD:
537 bCR = true;
538 break;
539 default:
540 break;
544 else
546 for( ULONG nCnt = 0; nCnt < rLen; ++nCnt, ++pBuf )
548 switch (*pBuf)
550 case 0x0:
551 if( nCnt + 1 < rLen && !*(pBuf+1) )
552 return 0;
553 bIsBareUnicode = true;
554 break;
555 case 0xA:
556 bLF = true;
557 break;
558 case 0xD:
559 bCR = true;
560 break;
561 case 0xC:
562 case 0x1A:
563 case 0x9:
564 break;
565 default:
566 if (0x20 > (BYTE)*pBuf)
567 bNoNormalChar = true;
568 break;
573 LineEnd eSysLE = GetSystemLineEnd();
574 LineEnd eLineEnd;
575 if (!bCR && !bLF)
576 eLineEnd = eSysLE;
577 else
578 eLineEnd = bCR ? ( bLF ? LINEEND_CRLF : LINEEND_CR ) : LINEEND_LF;
580 if (pCharSet)
581 *pCharSet = eCharSet;
582 if (pSwap)
583 *pSwap = bSwap;
584 if (pLineEnd)
585 *pLineEnd = eLineEnd;
587 return bEncodedFilter || (!bIsBareUnicode && eSysLE == eLineEnd);
590 const SfxFilter* SwIoSystem::GetTextFilter( const sal_Char* pBuf, ULONG nLen)
592 bool bAuto = IsDetectableText(pBuf, nLen);
593 const sal_Char* pNm = bAuto ? FILTER_TEXT : FILTER_TEXT_DLG;
594 return SwIoSystem::GetFilterOfFormat( String::CreateFromAscii(pNm), 0 );