Version 7.6.3.2-android, tag libreoffice-7.6.3.2-android
[LibreOffice.git] / sw / source / filter / basflt / iodetect.cxx
blob86395100501db9c036db85e020eaf1a8ad562ad0
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <iodetect.hxx>
21 #include <memory>
22 #include <osl/endian.h>
23 #include <sot/storage.hxx>
24 #include <tools/urlobj.hxx>
25 #include <unotools/moduleoptions.hxx>
26 #include <sfx2/docfilt.hxx>
27 #include <sfx2/fcontnr.hxx>
28 #include <sfx2/docfile.hxx>
29 #include <com/sun/star/ucb/ContentCreationException.hpp>
30 #include <com/sun/star/embed/XStorage.hpp>
31 #include <unicode/ucsdet.h>
33 using namespace ::com::sun::star;
35 static bool IsDocShellRegistered()
37 return SvtModuleOptions().IsWriter();
40 SwIoDetect aFilterDetect[] =
42 SwIoDetect( FILTER_RTF ),
43 SwIoDetect( FILTER_BAS ),
44 SwIoDetect( sWW6 ),
45 SwIoDetect( FILTER_WW8 ),
46 SwIoDetect( sRtfWH ),
47 SwIoDetect( sHTML ),
48 SwIoDetect( sWW5 ),
49 SwIoDetect( FILTER_XML ),
50 SwIoDetect( FILTER_TEXT_DLG ),
51 SwIoDetect( FILTER_TEXT ),
52 SwIoDetect( FILTER_DOCX )
55 OUString SwIoSystem::GetSubStorageName( const SfxFilter& rFltr )
57 // for StorageFilters also set the SubStorageName
58 const OUString& rUserData = rFltr.GetUserData();
59 if (rUserData == FILTER_XML ||
60 rUserData == FILTER_XMLV ||
61 rUserData == FILTER_XMLVW)
62 return "content.xml";
63 if (rUserData == sWW6 || rUserData == FILTER_WW8)
64 return "WordDocument";
65 return OUString();
68 std::shared_ptr<const SfxFilter> SwIoSystem::GetFilterOfFormat(std::u16string_view rFormatNm,
69 const SfxFilterContainer* pCnt)
71 SfxFilterContainer aCntSw( sSWRITER );
72 SfxFilterContainer aCntSwWeb( sSWRITERWEB );
73 const SfxFilterContainer* pFltCnt = pCnt ? pCnt : ( IsDocShellRegistered() ? &aCntSw : &aCntSwWeb );
75 do {
76 if( pFltCnt )
78 SfxFilterMatcher aMatcher( pFltCnt->GetName() );
79 SfxFilterMatcherIter aIter( aMatcher );
80 std::shared_ptr<const SfxFilter> pFilter = aIter.First();
81 while ( pFilter )
83 if( pFilter->GetUserData() == rFormatNm )
84 return pFilter;
85 pFilter = aIter.Next();
88 if( pCnt || pFltCnt == &aCntSwWeb )
89 break;
90 pFltCnt = &aCntSwWeb;
91 } while( true );
92 return nullptr;
95 bool SwIoSystem::IsValidStgFilter( const css::uno::Reference < css::embed::XStorage >& rStg, const SfxFilter& rFilter)
97 bool bRet = false;
98 try
100 SotClipboardFormatId nStgFormatId = SotStorage::GetFormatID( rStg );
101 bRet = rStg->isStreamElement( "content.xml" );
102 if ( bRet )
103 bRet = ( nStgFormatId != SotClipboardFormatId::NONE && ( rFilter.GetFormat() == nStgFormatId ) );
105 catch (const css::uno::Exception& )
109 return bRet;
112 bool SwIoSystem::IsValidStgFilter(SotStorage& rStg, const SfxFilter& rFilter)
114 SotClipboardFormatId nStgFormatId = rStg.GetFormat();
115 /*#i8409# We cannot trust the clipboard id anymore :-(*/
116 if (rFilter.GetUserData() == FILTER_WW8 || rFilter.GetUserData() == sWW6)
117 nStgFormatId = SotClipboardFormatId::NONE;
119 bool bRet = ERRCODE_NONE == rStg.GetError() &&
120 ( nStgFormatId == SotClipboardFormatId::NONE || rFilter.GetFormat() == nStgFormatId ) &&
121 ( rStg.IsContained( SwIoSystem::GetSubStorageName( rFilter )) );
122 if( bRet )
124 /* Bug 53445 - there are Excel Docs w/o ClipBoardId! */
125 /* Bug 62703 - and also WinWord Docs w/o ClipBoardId! */
126 if (rFilter.GetUserData() == FILTER_WW8 || rFilter.GetUserData() == sWW6)
128 bRet = (rStg.IsContained("0Table")
129 || rStg.IsContained("1Table"))
130 == (rFilter.GetUserData() == FILTER_WW8);
131 if (bRet && !rFilter.IsAllowedAsTemplate())
133 tools::SvRef<SotStorageStream> xRef =
134 rStg.OpenSotStream("WordDocument",
135 StreamMode::STD_READ );
136 xRef->Seek(10);
137 sal_uInt8 nByte;
138 xRef->ReadUChar( nByte );
139 bRet = !(nByte & 1);
143 return bRet;
146 // Check the type of the stream (file) by searching for corresponding set of bytes.
147 // If no known type is found, return ASCII for now!
148 // Returns the internal FilterName.
149 std::shared_ptr<const SfxFilter> SwIoSystem::GetFileFilter(const OUString& rFileName)
151 SfxFilterContainer aCntSw( sSWRITER );
152 SfxFilterContainer aCntSwWeb( sSWRITERWEB );
153 const SfxFilterContainer* pFCntnr = IsDocShellRegistered() ? &aCntSw : &aCntSwWeb;
155 SfxFilterMatcher aMatcher( pFCntnr->GetName() );
156 SfxFilterMatcherIter aIter( aMatcher );
157 std::shared_ptr<const SfxFilter> pFilter = aIter.First();
158 if ( !pFilter )
159 return nullptr;
161 if (SotStorage::IsStorageFile(rFileName))
163 // package storage or OLEStorage based format
164 tools::SvRef<SotStorage> xStg;
165 INetURLObject aObj;
166 aObj.SetSmartProtocol( INetProtocol::File );
167 aObj.SetSmartURL( rFileName );
168 SfxMedium aMedium(aObj.GetMainURL(INetURLObject::DecodeMechanism::NONE), StreamMode::STD_READ);
170 // templates should not get precedence over "normal" filters (#i35508, #i33168)
171 std::shared_ptr<const SfxFilter> pTemplateFilter;
172 if (aMedium.IsStorage())
174 uno::Reference<embed::XStorage> const xStor = aMedium.GetStorage();
175 if ( xStor.is() )
177 while ( pFilter )
179 if (pFilter->GetUserData().startsWith("C") && IsValidStgFilter(xStor, *pFilter ))
181 if (pFilter->IsOwnTemplateFormat())
183 // found template filter; maybe there's a "normal" one also
184 pTemplateFilter = pFilter;
186 else
187 return pFilter;
190 pFilter = aIter.Next();
193 // there's only a template filter that could be found
194 if ( pTemplateFilter )
195 pFilter = pTemplateFilter;
198 else
202 SvStream *const pStream = aMedium.GetInStream();
203 if ( pStream && SotStorage::IsStorageFile(pStream) )
204 xStg = new SotStorage( pStream, false );
206 catch (const css::ucb::ContentCreationException &)
210 if( xStg.is() && ( xStg->GetError() == ERRCODE_NONE ) )
212 while ( pFilter )
214 if (pFilter->GetUserData().startsWith("C") && IsValidStgFilter(*xStg, *pFilter))
216 if (pFilter->IsOwnTemplateFormat())
218 // found template filter; maybe there's a "normal" one also
219 pTemplateFilter = pFilter;
221 else
222 return pFilter;
225 pFilter = aIter.Next();
228 // there's only a template filter that could be found
229 if ( pTemplateFilter )
230 pFilter = pTemplateFilter;
235 return pFilter;
238 return SwIoSystem::GetFilterOfFormat(FILTER_TEXT);
241 rtl_TextEncoding SwIoSystem::GetTextEncoding(SvStream& rStrm)
243 sal_Size nLen, nOrig;
244 char aBuf[4096];
245 nOrig = nLen = rStrm.ReadBytes(aBuf, sizeof(aBuf));
247 rtl_TextEncoding eCharSet;
248 const bool bRet = SwIoSystem::IsDetectableText(aBuf, nLen, &eCharSet, nullptr, nullptr, nullptr);
249 if (bRet && eCharSet != RTL_TEXTENCODING_DONTKNOW)
250 rStrm.SeekRel(-(tools::Long(nLen)));
251 else
252 rStrm.SeekRel(-(tools::Long(nOrig)));
254 return eCharSet;
257 bool SwIoSystem::IsDetectableText(const char* pBuf, sal_uLong &rLen,
258 rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd, bool *pBom)
260 bool bSwap = false;
261 rtl_TextEncoding eCharSet = RTL_TEXTENCODING_DONTKNOW;
262 bool bLE = true;
263 bool bBom = false;
264 /*See if it's a known unicode type*/
265 if (rLen >= 2)
267 sal_uLong nHead=0;
268 if (rLen > 2 && sal_uInt8(pBuf[0]) == 0xEF && sal_uInt8(pBuf[1]) == 0xBB &&
269 sal_uInt8(pBuf[2]) == 0xBF)
271 eCharSet = RTL_TEXTENCODING_UTF8;
272 nHead = 3;
273 bBom = true;
275 else if (sal_uInt8(pBuf[0]) == 0xFE && sal_uInt8(pBuf[1]) == 0xFF)
277 eCharSet = RTL_TEXTENCODING_UCS2;
278 bLE = false;
279 nHead = 2;
280 bBom = true;
282 else if (sal_uInt8(pBuf[1]) == 0xFE && sal_uInt8(pBuf[0]) == 0xFF)
284 eCharSet = RTL_TEXTENCODING_UCS2;
285 nHead = 2;
286 bBom = true;
288 pBuf+=nHead;
289 rLen-=nHead;
291 /*See unicode type again without BOM*/
292 if (rLen >= 1 && eCharSet == RTL_TEXTENCODING_DONTKNOW)
294 UErrorCode uerr = U_ZERO_ERROR;
295 UCharsetDetector* ucd = ucsdet_open(&uerr);
296 ucsdet_setText(ucd, pBuf, rLen, &uerr);
297 if (const UCharsetMatch* match = ucsdet_detect(ucd, &uerr))
299 const char* pEncodingName = ucsdet_getName(match, &uerr);
301 if (U_SUCCESS(uerr) && !strcmp("UTF-8", pEncodingName))
303 eCharSet = RTL_TEXTENCODING_UTF8; // UTF-8
305 else if (U_SUCCESS(uerr) && !strcmp("UTF-16BE", pEncodingName))
307 eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16BE
308 bLE = false;
310 else if (U_SUCCESS(uerr) && !strcmp("UTF-16LE", pEncodingName))
312 eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16LE
314 else if (U_SUCCESS(uerr) && !strcmp("GB18030", pEncodingName))
316 eCharSet = RTL_TEXTENCODING_GB_18030;
320 ucsdet_close(ucd);
323 bool bCR = false, bLF = false, bIsBareUnicode = false;
325 if (eCharSet != RTL_TEXTENCODING_DONTKNOW)
327 std::unique_ptr<sal_Unicode[]> aWork(new sal_Unicode[rLen+1]);
328 sal_Unicode *pNewBuf = aWork.get();
329 std::size_t nNewLen;
330 if (eCharSet != RTL_TEXTENCODING_UCS2)
332 nNewLen = rLen;
333 rtl_TextToUnicodeConverter hConverter =
334 rtl_createTextToUnicodeConverter(eCharSet);
335 rtl_TextToUnicodeContext hContext =
336 rtl_createTextToUnicodeContext(hConverter);
338 sal_Size nCntBytes;
339 sal_uInt32 nInfo;
340 nNewLen = rtl_convertTextToUnicode( hConverter, hContext, pBuf,
341 rLen, pNewBuf, nNewLen,
342 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
343 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
344 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT), &nInfo, &nCntBytes);
346 rtl_destroyTextToUnicodeContext(hConverter, hContext);
347 rtl_destroyTextToUnicodeConverter(hConverter);
349 else
351 nNewLen = rLen/2;
352 memcpy(pNewBuf, pBuf, rLen);
353 #ifdef OSL_LITENDIAN
354 bool const bNativeLE = true;
355 #else
356 bool const bNativeLE = false;
357 #endif
358 if (bLE != bNativeLE)
360 bSwap = true;
361 char* pF = reinterpret_cast<char*>(pNewBuf);
362 char* pN = pF+1;
363 for(sal_uLong n = 0; n < nNewLen; ++n, pF+=2, pN+=2 )
365 char c = *pF;
366 *pF = *pN;
367 *pN = c;
372 for (sal_uLong nCnt = 0; nCnt < nNewLen; ++nCnt, ++pNewBuf)
374 switch (*pNewBuf)
376 case 0xA:
377 bLF = true;
378 break;
379 case 0xD:
380 bCR = true;
381 break;
382 default:
383 break;
387 else
389 for( sal_uLong nCnt = 0; nCnt < rLen; ++nCnt, ++pBuf )
391 switch (*pBuf)
393 case 0x0:
394 if( nCnt + 1 < rLen && !*(pBuf+1) )
395 return false;
396 bIsBareUnicode = true;
397 break;
398 case 0xA:
399 bLF = true;
400 break;
401 case 0xD:
402 bCR = true;
403 break;
404 case 0xC:
405 case 0x1A:
406 case 0x9:
407 break;
408 default:
409 break;
414 LineEnd eSysLE = GetSystemLineEnd();
415 LineEnd eLineEnd;
416 if (!bCR && !bLF)
417 eLineEnd = eSysLE;
418 else
419 eLineEnd = bCR ? ( bLF ? LINEEND_CRLF : LINEEND_CR ) : LINEEND_LF;
421 if (pCharSet)
422 *pCharSet = eCharSet;
423 if (pSwap)
424 *pSwap = bSwap;
425 if (pLineEnd)
426 *pLineEnd = eLineEnd;
427 if (pBom)
428 *pBom = bBom;
430 return !bIsBareUnicode;
433 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */