Avoid potential negative array index access to cached text.
[LibreOffice.git] / io / source / TextInputStream / TextInputStream.cxx
blob1ce12a6e796e6fb839a9be8758c080f33b005be6
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <string.h>
22 #include <comphelper/sequence.hxx>
23 #include <cppuhelper/implbase.hxx>
24 #include <cppuhelper/supportsservice.hxx>
26 #include <rtl/textenc.h>
27 #include <rtl/tencinfo.h>
29 #include <com/sun/star/io/BufferSizeExceededException.hpp>
30 #include <com/sun/star/io/IOException.hpp>
31 #include <com/sun/star/io/NotConnectedException.hpp>
32 #include <com/sun/star/io/XTextInputStream2.hpp>
33 #include <com/sun/star/lang/XServiceInfo.hpp>
35 #include <vector>
37 namespace com::sun::star::uno { class XComponentContext; }
39 using namespace ::osl;
40 using namespace ::cppu;
41 using namespace ::com::sun::star::uno;
42 using namespace ::com::sun::star::lang;
43 using namespace ::com::sun::star::io;
46 // Implementation XTextInputStream
48 #define INITIAL_UNICODE_BUFFER_CAPACITY 0x100
49 #define READ_BYTE_COUNT 0x100
51 namespace {
53 class OTextInputStream : public WeakImplHelper< XTextInputStream2, XServiceInfo >
55 Reference< XInputStream > mxStream;
57 // Encoding
58 bool mbEncodingInitialized;
59 rtl_TextToUnicodeConverter mConvText2Unicode;
60 rtl_TextToUnicodeContext mContextText2Unicode;
61 Sequence<sal_Int8> mSeqSource;
63 // Internal buffer for characters that are already converted successfully
64 std::vector<sal_Unicode> mvBuffer;
65 sal_Int32 mnCharsInBuffer;
66 bool mbReachedEOF;
68 /// @throws IOException
69 /// @throws RuntimeException
70 OUString implReadString( const Sequence< sal_Unicode >& Delimiters,
71 bool bRemoveDelimiter, bool bFindLineEnd );
72 /// @throws IOException
73 /// @throws RuntimeException
74 sal_Int32 implReadNext();
75 /// @throws RuntimeException
76 void checkNull();
78 public:
79 OTextInputStream();
80 virtual ~OTextInputStream() override;
82 // Methods XTextInputStream
83 virtual OUString SAL_CALL readLine( ) override;
84 virtual OUString SAL_CALL readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter ) override;
85 virtual sal_Bool SAL_CALL isEOF( ) override;
86 virtual void SAL_CALL setEncoding( const OUString& Encoding ) override;
88 // Methods XInputStream
89 virtual sal_Int32 SAL_CALL readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead ) override;
90 virtual sal_Int32 SAL_CALL readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead ) override;
91 virtual void SAL_CALL skipBytes( sal_Int32 nBytesToSkip ) override;
92 virtual sal_Int32 SAL_CALL available( ) override;
93 virtual void SAL_CALL closeInput( ) override;
95 // Methods XActiveDataSink
96 virtual void SAL_CALL setInputStream( const Reference< XInputStream >& aStream ) override;
97 virtual Reference< XInputStream > SAL_CALL getInputStream() override;
99 // Methods XServiceInfo
100 virtual OUString SAL_CALL getImplementationName() override;
101 virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override;
102 virtual sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override;
107 OTextInputStream::OTextInputStream()
108 : mbEncodingInitialized(false)
109 , mConvText2Unicode(nullptr)
110 , mContextText2Unicode(nullptr)
111 , mSeqSource(READ_BYTE_COUNT)
112 , mvBuffer(INITIAL_UNICODE_BUFFER_CAPACITY, 0)
113 , mnCharsInBuffer(0)
114 , mbReachedEOF(false)
118 OTextInputStream::~OTextInputStream()
120 if( mbEncodingInitialized )
122 rtl_destroyTextToUnicodeContext( mConvText2Unicode, mContextText2Unicode );
123 rtl_destroyTextToUnicodeConverter( mConvText2Unicode );
127 // Check uninitialized object
129 void OTextInputStream::checkNull()
131 if (mxStream==nullptr){
132 throw RuntimeException("Uninitialized object");
136 // XTextInputStream
138 OUString OTextInputStream::readLine( )
140 checkNull();
141 static Sequence< sal_Unicode > aDummySeq;
142 return implReadString( aDummySeq, true, true );
145 OUString OTextInputStream::readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter )
147 checkNull();
148 return implReadString( Delimiters, bRemoveDelimiter, false );
151 sal_Bool OTextInputStream::isEOF()
153 checkNull();
154 bool bRet = false;
155 if( mnCharsInBuffer == 0 && mbReachedEOF )
156 bRet = true;
157 return bRet;
161 OUString OTextInputStream::implReadString( const Sequence< sal_Unicode >& Delimiters,
162 bool bRemoveDelimiter, bool bFindLineEnd )
164 OUString aRetStr;
165 if( !mbEncodingInitialized )
167 setEncoding( "utf8" );
169 if( !mbEncodingInitialized )
170 return aRetStr;
172 // Only for bFindLineEnd
173 sal_Unicode cLineEndChar1 = 0x0D;
174 sal_Unicode cLineEndChar2 = 0x0A;
176 sal_Int32 nBufferReadPos = 0;
177 sal_Int32 nCopyLen = 0;
178 bool bFound = false;
179 bool bFoundFirstLineEndChar = false;
180 sal_Unicode cFirstLineEndChar = 0;
181 while( !bFound )
183 // Still characters available?
184 if( nBufferReadPos == mnCharsInBuffer )
186 // Already reached EOF? Then we can't read any more
187 if( mbReachedEOF )
188 break;
190 // No, so read new characters
191 if( !implReadNext() )
192 break;
195 // Now there should be characters available
196 // (otherwise the loop should have been broken before)
197 sal_Unicode c = mvBuffer[ nBufferReadPos++ ];
199 if( bFindLineEnd )
201 if( bFoundFirstLineEndChar )
203 bFound = true;
204 nCopyLen = nBufferReadPos - 2;
205 if( c == cLineEndChar1 || c == cLineEndChar2 )
207 // Same line end char -> new line break
208 if( c == cFirstLineEndChar )
210 nBufferReadPos--;
213 else
215 // No second line end char
216 nBufferReadPos--;
219 else if( c == cLineEndChar1 || c == cLineEndChar2 )
221 bFoundFirstLineEndChar = true;
222 cFirstLineEndChar = c;
225 else if( comphelper::findValue(Delimiters, c) != -1 )
227 bFound = true;
228 nCopyLen = nBufferReadPos;
229 if( bRemoveDelimiter )
230 nCopyLen--;
234 // Nothing found? Return all
235 if( !nCopyLen && !bFound && mbReachedEOF )
236 nCopyLen = nBufferReadPos;
238 // Create string
239 if( nCopyLen )
240 aRetStr = OUString( mvBuffer.data(), nCopyLen );
242 // Copy rest of buffer
243 memmove( mvBuffer.data(), mvBuffer.data() + nBufferReadPos,
244 (mnCharsInBuffer - nBufferReadPos) * sizeof( sal_Unicode ) );
245 mnCharsInBuffer -= nBufferReadPos;
247 return aRetStr;
251 sal_Int32 OTextInputStream::implReadNext()
253 sal_Int32 nFreeBufferSize = mvBuffer.size() - mnCharsInBuffer;
254 if( nFreeBufferSize < READ_BYTE_COUNT )
255 mvBuffer.resize(mvBuffer.size() * 2);
256 nFreeBufferSize = mvBuffer.size() - mnCharsInBuffer;
260 sal_Int32 nRead = mxStream->readSomeBytes( mSeqSource, READ_BYTE_COUNT );
261 sal_Int32 nTotalRead = nRead;
262 if( nRead == 0 )
263 mbReachedEOF = true;
265 // Try to convert
266 sal_uInt32 uiInfo;
267 sal_Size nSrcCvtBytes = 0;
268 sal_Size nTargetCount = 0;
269 sal_Size nSourceCount = 0;
270 while( true )
272 const sal_Int8 *pbSource = mSeqSource.getConstArray();
274 // All invalid characters are transformed to the unicode undefined char
275 nTargetCount += rtl_convertTextToUnicode(
276 mConvText2Unicode,
277 mContextText2Unicode,
278 reinterpret_cast<const char*>(&( pbSource[nSourceCount] )),
279 nTotalRead - nSourceCount,
280 mvBuffer.data() + mnCharsInBuffer + nTargetCount,
281 nFreeBufferSize - nTargetCount,
282 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
283 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
284 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
285 &uiInfo,
286 &nSrcCvtBytes );
287 nSourceCount += nSrcCvtBytes;
289 bool bCont = false;
290 if( uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL )
292 mvBuffer.resize(mvBuffer.size() * 2);
293 bCont = true;
296 if( uiInfo & RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL )
298 // read next byte
299 static Sequence< sal_Int8 > aOneByteSeq( 1 );
300 nRead = mxStream->readSomeBytes( aOneByteSeq, 1 );
301 if( nRead == 0 )
303 mbReachedEOF = true;
304 break;
307 sal_Int32 nOldLen = mSeqSource.getLength();
308 nTotalRead++;
309 if( nTotalRead > nOldLen )
311 mSeqSource.realloc( nTotalRead );
313 mSeqSource.getArray()[ nOldLen ] = aOneByteSeq.getConstArray()[ 0 ];
314 bCont = true;
317 if( bCont )
318 continue;
319 break;
322 mnCharsInBuffer += nTargetCount;
323 return nTargetCount;
325 catch( NotConnectedException& )
327 throw IOException("Not connected");
328 //throw IOException( L"OTextInputStream::implReadString failed" );
330 catch( BufferSizeExceededException& )
332 throw IOException("Buffer size exceeded");
336 void OTextInputStream::setEncoding( const OUString& Encoding )
338 OString aOEncodingStr = OUStringToOString( Encoding, RTL_TEXTENCODING_ASCII_US );
339 rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( aOEncodingStr.getStr() );
340 if( RTL_TEXTENCODING_DONTKNOW == encoding )
341 return;
343 mbEncodingInitialized = true;
344 mConvText2Unicode = rtl_createTextToUnicodeConverter( encoding );
345 mContextText2Unicode = rtl_createTextToUnicodeContext( mConvText2Unicode );
349 // XInputStream
351 sal_Int32 OTextInputStream::readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead )
353 checkNull();
354 return mxStream->readBytes( aData, nBytesToRead );
357 sal_Int32 OTextInputStream::readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead )
359 checkNull();
360 return mxStream->readSomeBytes( aData, nMaxBytesToRead );
363 void OTextInputStream::skipBytes( sal_Int32 nBytesToSkip )
365 checkNull();
366 mxStream->skipBytes( nBytesToSkip );
369 sal_Int32 OTextInputStream::available( )
371 checkNull();
372 return mxStream->available();
375 void OTextInputStream::closeInput( )
377 checkNull();
378 mxStream->closeInput();
382 // XActiveDataSink
384 void OTextInputStream::setInputStream( const Reference< XInputStream >& aStream )
386 mxStream = aStream;
389 Reference< XInputStream > OTextInputStream::getInputStream()
391 return mxStream;
394 OUString OTextInputStream::getImplementationName()
396 return "com.sun.star.comp.io.TextInputStream";
399 sal_Bool OTextInputStream::supportsService(const OUString& ServiceName)
401 return cppu::supportsService(this, ServiceName);
404 Sequence< OUString > OTextInputStream::getSupportedServiceNames()
406 return { "com.sun.star.io.TextInputStream" };
409 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
410 io_OTextInputStream_get_implementation(
411 css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
413 return cppu::acquire(new OTextInputStream());
417 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */