1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <iodetect.hxx>
22 #include <osl/endian.h>
23 #include <sot/storage.hxx>
24 #include <tools/urlobj.hxx>
25 #include <unotools/moduleoptions.hxx>
26 #include <sfx2/docfilt.hxx>
27 #include <sfx2/fcontnr.hxx>
28 #include <sfx2/docfile.hxx>
29 #include <com/sun/star/ucb/ContentCreationException.hpp>
30 #include <com/sun/star/embed/XStorage.hpp>
31 #include <unicode/ucsdet.h>
33 using namespace ::com::sun::star
;
35 static bool IsDocShellRegistered()
37 return SvtModuleOptions().IsWriter();
40 SwIoDetect aFilterDetect
[] =
42 SwIoDetect( FILTER_RTF
),
43 SwIoDetect( FILTER_BAS
),
45 SwIoDetect( FILTER_WW8
),
49 SwIoDetect( FILTER_XML
),
50 SwIoDetect( FILTER_TEXT_DLG
),
51 SwIoDetect( FILTER_TEXT
),
52 SwIoDetect( FILTER_DOCX
)
55 OUString
SwIoSystem::GetSubStorageName( const SfxFilter
& rFltr
)
57 // for StorageFilters also set the SubStorageName
58 const OUString
& rUserData
= rFltr
.GetUserData();
59 if (rUserData
== FILTER_XML
||
60 rUserData
== FILTER_XMLV
||
61 rUserData
== FILTER_XMLVW
)
63 if (rUserData
== sWW6
|| rUserData
== FILTER_WW8
)
64 return "WordDocument";
68 std::shared_ptr
<const SfxFilter
> SwIoSystem::GetFilterOfFormat(std::u16string_view rFormatNm
,
69 const SfxFilterContainer
* pCnt
)
71 SfxFilterContainer
aCntSw( sSWRITER
);
72 SfxFilterContainer
aCntSwWeb( sSWRITERWEB
);
73 const SfxFilterContainer
* pFltCnt
= pCnt
? pCnt
: ( IsDocShellRegistered() ? &aCntSw
: &aCntSwWeb
);
78 SfxFilterMatcher
aMatcher( pFltCnt
->GetName() );
79 SfxFilterMatcherIter
aIter( aMatcher
);
80 std::shared_ptr
<const SfxFilter
> pFilter
= aIter
.First();
83 if( pFilter
->GetUserData() == rFormatNm
)
85 pFilter
= aIter
.Next();
88 if( pCnt
|| pFltCnt
== &aCntSwWeb
)
95 bool SwIoSystem::IsValidStgFilter( const css::uno::Reference
< css::embed::XStorage
>& rStg
, const SfxFilter
& rFilter
)
100 SotClipboardFormatId nStgFormatId
= SotStorage::GetFormatID( rStg
);
101 bRet
= rStg
->isStreamElement( "content.xml" );
103 bRet
= ( nStgFormatId
!= SotClipboardFormatId::NONE
&& ( rFilter
.GetFormat() == nStgFormatId
) );
105 catch (const css::uno::Exception
& )
112 bool SwIoSystem::IsValidStgFilter(SotStorage
& rStg
, const SfxFilter
& rFilter
)
114 SotClipboardFormatId nStgFormatId
= rStg
.GetFormat();
115 /*#i8409# We cannot trust the clipboard id anymore :-(*/
116 if (rFilter
.GetUserData() == FILTER_WW8
|| rFilter
.GetUserData() == sWW6
)
117 nStgFormatId
= SotClipboardFormatId::NONE
;
119 bool bRet
= ERRCODE_NONE
== rStg
.GetError() &&
120 ( nStgFormatId
== SotClipboardFormatId::NONE
|| rFilter
.GetFormat() == nStgFormatId
) &&
121 ( rStg
.IsContained( SwIoSystem::GetSubStorageName( rFilter
)) );
124 /* Bug 53445 - there are Excel Docs w/o ClipBoardId! */
125 /* Bug 62703 - and also WinWord Docs w/o ClipBoardId! */
126 if (rFilter
.GetUserData() == FILTER_WW8
|| rFilter
.GetUserData() == sWW6
)
128 bRet
= (rStg
.IsContained("0Table")
129 || rStg
.IsContained("1Table"))
130 == (rFilter
.GetUserData() == FILTER_WW8
);
131 if (bRet
&& !rFilter
.IsAllowedAsTemplate())
133 tools::SvRef
<SotStorageStream
> xRef
=
134 rStg
.OpenSotStream("WordDocument",
135 StreamMode::STD_READ
);
138 xRef
->ReadUChar( nByte
);
146 // Check the type of the stream (file) by searching for corresponding set of bytes.
147 // If no known type is found, return ASCII for now!
148 // Returns the internal FilterName.
149 std::shared_ptr
<const SfxFilter
> SwIoSystem::GetFileFilter(const OUString
& rFileName
)
151 SfxFilterContainer
aCntSw( sSWRITER
);
152 SfxFilterContainer
aCntSwWeb( sSWRITERWEB
);
153 const SfxFilterContainer
* pFCntnr
= IsDocShellRegistered() ? &aCntSw
: &aCntSwWeb
;
155 SfxFilterMatcher
aMatcher( pFCntnr
->GetName() );
156 SfxFilterMatcherIter
aIter( aMatcher
);
157 std::shared_ptr
<const SfxFilter
> pFilter
= aIter
.First();
161 if (SotStorage::IsStorageFile(rFileName
))
163 // package storage or OLEStorage based format
164 tools::SvRef
<SotStorage
> xStg
;
166 aObj
.SetSmartProtocol( INetProtocol::File
);
167 aObj
.SetSmartURL( rFileName
);
168 SfxMedium
aMedium(aObj
.GetMainURL(INetURLObject::DecodeMechanism::NONE
), StreamMode::STD_READ
);
170 // templates should not get precedence over "normal" filters (#i35508, #i33168)
171 std::shared_ptr
<const SfxFilter
> pTemplateFilter
;
172 if (aMedium
.IsStorage())
174 uno::Reference
<embed::XStorage
> const xStor
= aMedium
.GetStorage();
179 if (pFilter
->GetUserData().startsWith("C") && IsValidStgFilter(xStor
, *pFilter
))
181 if (pFilter
->IsOwnTemplateFormat())
183 // found template filter; maybe there's a "normal" one also
184 pTemplateFilter
= pFilter
;
190 pFilter
= aIter
.Next();
193 // there's only a template filter that could be found
194 if ( pTemplateFilter
)
195 pFilter
= pTemplateFilter
;
202 SvStream
*const pStream
= aMedium
.GetInStream();
203 if ( pStream
&& SotStorage::IsStorageFile(pStream
) )
204 xStg
= new SotStorage( pStream
, false );
206 catch (const css::ucb::ContentCreationException
&)
210 if( xStg
.is() && ( xStg
->GetError() == ERRCODE_NONE
) )
214 if (pFilter
->GetUserData().startsWith("C") && IsValidStgFilter(*xStg
, *pFilter
))
216 if (pFilter
->IsOwnTemplateFormat())
218 // found template filter; maybe there's a "normal" one also
219 pTemplateFilter
= pFilter
;
225 pFilter
= aIter
.Next();
228 // there's only a template filter that could be found
229 if ( pTemplateFilter
)
230 pFilter
= pTemplateFilter
;
238 return SwIoSystem::GetFilterOfFormat(FILTER_TEXT
);
241 rtl_TextEncoding
SwIoSystem::GetTextEncoding(SvStream
& rStrm
)
243 sal_Size nLen
, nOrig
;
245 nOrig
= nLen
= rStrm
.ReadBytes(aBuf
, sizeof(aBuf
));
247 rtl_TextEncoding eCharSet
;
248 const bool bRet
= SwIoSystem::IsDetectableText(aBuf
, nLen
, &eCharSet
, nullptr, nullptr, nullptr);
249 if (bRet
&& eCharSet
!= RTL_TEXTENCODING_DONTKNOW
)
250 rStrm
.SeekRel(-(tools::Long(nLen
)));
252 rStrm
.SeekRel(-(tools::Long(nOrig
)));
257 bool SwIoSystem::IsDetectableText(const char* pBuf
, sal_uLong
&rLen
,
258 rtl_TextEncoding
*pCharSet
, bool *pSwap
, LineEnd
*pLineEnd
, bool *pBom
)
261 rtl_TextEncoding eCharSet
= RTL_TEXTENCODING_DONTKNOW
;
264 /*See if it's a known unicode type*/
268 if (rLen
> 2 && sal_uInt8(pBuf
[0]) == 0xEF && sal_uInt8(pBuf
[1]) == 0xBB &&
269 sal_uInt8(pBuf
[2]) == 0xBF)
271 eCharSet
= RTL_TEXTENCODING_UTF8
;
275 else if (sal_uInt8(pBuf
[0]) == 0xFE && sal_uInt8(pBuf
[1]) == 0xFF)
277 eCharSet
= RTL_TEXTENCODING_UCS2
;
282 else if (sal_uInt8(pBuf
[1]) == 0xFE && sal_uInt8(pBuf
[0]) == 0xFF)
284 eCharSet
= RTL_TEXTENCODING_UCS2
;
291 /*See unicode type again without BOM*/
292 if (rLen
>= 1 && eCharSet
== RTL_TEXTENCODING_DONTKNOW
)
294 UErrorCode uerr
= U_ZERO_ERROR
;
295 UCharsetDetector
* ucd
= ucsdet_open(&uerr
);
296 ucsdet_setText(ucd
, pBuf
, rLen
, &uerr
);
297 if (const UCharsetMatch
* match
= ucsdet_detect(ucd
, &uerr
))
299 const char* pEncodingName
= ucsdet_getName(match
, &uerr
);
301 if (U_SUCCESS(uerr
) && !strcmp("UTF-8", pEncodingName
))
303 eCharSet
= RTL_TEXTENCODING_UTF8
; // UTF-8
305 else if (U_SUCCESS(uerr
) && !strcmp("UTF-16BE", pEncodingName
))
307 eCharSet
= RTL_TEXTENCODING_UCS2
; // UTF-16BE
310 else if (U_SUCCESS(uerr
) && !strcmp("UTF-16LE", pEncodingName
))
312 eCharSet
= RTL_TEXTENCODING_UCS2
; // UTF-16LE
314 else if (U_SUCCESS(uerr
) && !strcmp("GB18030", pEncodingName
))
316 eCharSet
= RTL_TEXTENCODING_GB_18030
;
323 bool bCR
= false, bLF
= false, bIsBareUnicode
= false;
325 if (eCharSet
!= RTL_TEXTENCODING_DONTKNOW
)
327 std::unique_ptr
<sal_Unicode
[]> aWork(new sal_Unicode
[rLen
+1]);
328 sal_Unicode
*pNewBuf
= aWork
.get();
330 if (eCharSet
!= RTL_TEXTENCODING_UCS2
)
333 rtl_TextToUnicodeConverter hConverter
=
334 rtl_createTextToUnicodeConverter(eCharSet
);
335 rtl_TextToUnicodeContext hContext
=
336 rtl_createTextToUnicodeContext(hConverter
);
340 nNewLen
= rtl_convertTextToUnicode( hConverter
, hContext
, pBuf
,
341 rLen
, pNewBuf
, nNewLen
,
342 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT
|
343 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT
|
344 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
), &nInfo
, &nCntBytes
);
346 rtl_destroyTextToUnicodeContext(hConverter
, hContext
);
347 rtl_destroyTextToUnicodeConverter(hConverter
);
352 memcpy(pNewBuf
, pBuf
, rLen
);
354 bool const bNativeLE
= true;
356 bool const bNativeLE
= false;
358 if (bLE
!= bNativeLE
)
361 char* pF
= reinterpret_cast<char*>(pNewBuf
);
363 for(sal_uLong n
= 0; n
< nNewLen
; ++n
, pF
+=2, pN
+=2 )
372 for (sal_uLong nCnt
= 0; nCnt
< nNewLen
; ++nCnt
, ++pNewBuf
)
389 for( sal_uLong nCnt
= 0; nCnt
< rLen
; ++nCnt
, ++pBuf
)
394 if( nCnt
+ 1 < rLen
&& !*(pBuf
+1) )
396 bIsBareUnicode
= true;
414 LineEnd eSysLE
= GetSystemLineEnd();
419 eLineEnd
= bCR
? ( bLF
? LINEEND_CRLF
: LINEEND_CR
) : LINEEND_LF
;
422 *pCharSet
= eCharSet
;
426 *pLineEnd
= eLineEnd
;
430 return !bIsBareUnicode
;
433 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */