Updated core
[LibreOffice.git] / svtools / source / svrtf / svparser.cxx
blob50c231390e0d4e8226dbcc7181bf50932c5ed94b
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <svtools/svparser.hxx>
21 #include <tools/stream.hxx>
22 #include <tools/debug.hxx>
23 #include <rtl/textcvt.h>
24 #include <rtl/tencinfo.h>
26 #include <boost/ptr_container/ptr_vector.hpp>
28 // structure to store the actuel data
29 struct SvParser_Impl
31 OUString aToken; // gescanntes Token
32 sal_uLong nFilePos; // actual position in stream
33 sal_uLong nlLineNr; // actual line number
34 sal_uLong nlLinePos; // actual column number
35 long nTokenValue; // extra value (RTF)
36 bool bTokenHasValue; // indicates whether nTokenValue is valid
37 int nToken; // actual Token
38 sal_Unicode nNextCh; // actual character
39 int nSaveToken; // the token from Continue
41 rtl_TextToUnicodeConverter hConv;
42 rtl_TextToUnicodeContext hContext;
44 SvParser_Impl()
45 : nFilePos(0)
46 , nlLineNr(0)
47 , nlLinePos(0)
48 , nTokenValue(0)
49 , bTokenHasValue(false)
50 , nToken(0)
51 , nNextCh(0)
52 , nSaveToken(0)
53 , hConv( 0 )
54 , hContext( reinterpret_cast<rtl_TextToUnicodeContext>(1) )
62 // Construktor
63 SvParser::SvParser( SvStream& rIn, sal_uInt8 nStackSize )
64 : rInput( rIn )
65 , nlLineNr( 1 )
66 , nlLinePos( 1 )
67 , pImplData( 0 )
68 , nTokenValue( 0 )
69 , bTokenHasValue( false )
70 , eState( SVPAR_NOTSTARTED )
71 , eSrcEnc( RTL_TEXTENCODING_DONTKNOW )
72 , nNextChPos(0)
73 , nNextCh(0)
74 , bDownloadingFile(false)
75 , bUCS2BSrcEnc(false)
76 , bSwitchToUCS2(false)
77 , bRTF_InTextRead(false)
78 , nTokenStackSize( nStackSize )
79 , nTokenStackPos( 0 )
81 eState = SVPAR_NOTSTARTED;
82 if( nTokenStackSize < 3 )
83 nTokenStackSize = 3;
84 pTokenStack = new TokenStackType[ nTokenStackSize ];
85 pTokenStackPos = pTokenStack;
88 SvParser::~SvParser()
90 if( pImplData && pImplData->hConv )
92 rtl_destroyTextToUnicodeContext( pImplData->hConv,
93 pImplData->hContext );
94 rtl_destroyTextToUnicodeConverter( pImplData->hConv );
97 delete pImplData;
99 delete [] pTokenStack;
102 void SvParser::ClearTxtConvContext()
104 if( pImplData && pImplData->hConv )
105 rtl_resetTextToUnicodeContext( pImplData->hConv, pImplData->hContext );
108 void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc )
111 if( eEnc != eSrcEnc )
113 if( pImplData && pImplData->hConv )
115 rtl_destroyTextToUnicodeContext( pImplData->hConv,
116 pImplData->hContext );
117 rtl_destroyTextToUnicodeConverter( pImplData->hConv );
118 pImplData->hConv = 0;
119 pImplData->hContext = reinterpret_cast<rtl_TextToUnicodeContext>(1);
122 if( rtl_isOctetTextEncoding(eEnc) ||
123 RTL_TEXTENCODING_UCS2 == eEnc )
125 eSrcEnc = eEnc;
126 if( !pImplData )
127 pImplData = new SvParser_Impl;
128 pImplData->hConv = rtl_createTextToUnicodeConverter( eSrcEnc );
129 DBG_ASSERT( pImplData->hConv,
130 "SvParser::SetSrcEncoding: no converter for source encoding" );
131 if( !pImplData->hConv )
132 eSrcEnc = RTL_TEXTENCODING_DONTKNOW;
133 else
134 pImplData->hContext =
135 rtl_createTextToUnicodeContext( pImplData->hConv );
137 else
139 DBG_ASSERT( false,
140 "SvParser::SetSrcEncoding: invalid source encoding" );
141 eSrcEnc = RTL_TEXTENCODING_DONTKNOW;
146 void SvParser::RereadLookahead()
148 rInput.Seek(nNextChPos);
149 nNextCh = GetNextChar();
152 sal_Unicode SvParser::GetNextChar()
154 sal_Unicode c = 0U;
156 // When reading muliple bytes, we don't have to care about the file
157 // position when we run inti the pending state. The file position is
158 // maintained by SaveState/RestoreState.
159 bool bErr;
160 if( bSwitchToUCS2 && 0 == rInput.Tell() )
162 unsigned char c1, c2;
163 bool bSeekBack = true;
165 rInput.ReadUChar( c1 );
166 bErr = rInput.IsEof() || rInput.GetError();
167 if( !bErr )
169 if( 0xff == c1 || 0xfe == c1 )
171 rInput.ReadUChar( c2 );
172 bErr = rInput.IsEof() || rInput.GetError();
173 if( !bErr )
175 if( 0xfe == c1 && 0xff == c2 )
177 eSrcEnc = RTL_TEXTENCODING_UCS2;
178 bUCS2BSrcEnc = true;
179 bSeekBack = false;
181 else if( 0xff == c1 && 0xfe == c2 )
183 eSrcEnc = RTL_TEXTENCODING_UCS2;
184 bUCS2BSrcEnc = false;
185 bSeekBack = false;
189 else if( 0xef == c1 || 0xbb == c1 ) // check for UTF-8 BOM
191 rInput.ReadUChar( c2 );
192 bErr = rInput.IsEof() || rInput.GetError();
193 if( !bErr )
195 if( ( 0xef == c1 && 0xbb == c2 ) || ( 0xbb == c1 && 0xef == c2 ) )
197 unsigned char c3(0);
198 rInput.ReadUChar( c3 );
199 bErr = rInput.IsEof() || rInput.GetError();
200 if( !bErr && ( 0xbf == c3 ) )
202 eSrcEnc = RTL_TEXTENCODING_UTF8;
203 bSeekBack = false;
209 if( bSeekBack )
210 rInput.Seek( 0 );
212 bSwitchToUCS2 = false;
215 nNextChPos = rInput.Tell();
217 if( RTL_TEXTENCODING_UCS2 == eSrcEnc )
219 sal_Unicode cUC = USHRT_MAX;
220 unsigned char c1, c2;
222 rInput.ReadUChar( c1 ).ReadUChar( c2 );
223 if( 2 == rInput.Tell() &&
224 !(rInput.IsEof() || rInput.GetError()) &&
225 ( (bUCS2BSrcEnc && 0xfe == c1 && 0xff == c2) ||
226 (!bUCS2BSrcEnc && 0xff == c1 && 0xfe == c2) ) )
227 rInput.ReadUChar( c1 ).ReadUChar( c2 );
229 bErr = rInput.IsEof() || rInput.GetError();
230 if( !bErr )
232 if( bUCS2BSrcEnc )
233 cUC = (sal_Unicode(c1) << 8) | c2;
234 else
235 cUC = (sal_Unicode(c2) << 8) | c1;
238 if( !bErr )
240 c = cUC;
243 else
245 sal_Size nChars = 0;
248 sal_Char c1; // signed, that's the text converter expects
249 rInput.ReadChar( c1 );
250 bErr = rInput.IsEof() || rInput.GetError();
251 if( !bErr )
253 if (
254 RTL_TEXTENCODING_DONTKNOW == eSrcEnc ||
255 RTL_TEXTENCODING_SYMBOL == eSrcEnc
258 // no convserion shall take place
259 c = (sal_Unicode)c1;
260 nChars = 1;
262 else
264 assert(pImplData && pImplData->hConv && "no text converter!");
266 sal_Unicode cUC;
267 sal_uInt32 nInfo = 0;
268 sal_Size nCvtBytes;
269 nChars = rtl_convertTextToUnicode(
270 pImplData->hConv, pImplData->hContext,
271 &c1, 1, &cUC, 1,
272 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
273 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
274 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
275 &nInfo, &nCvtBytes);
276 if( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 )
278 // The conversion wasn't successful because we haven't
279 // read enough characters.
280 if( pImplData->hContext != reinterpret_cast<rtl_TextToUnicodeContext>(1) )
282 while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 )
284 rInput.ReadChar( c1 );
285 bErr = rInput.IsEof() || rInput.GetError();
286 if( bErr )
287 break;
289 nChars = rtl_convertTextToUnicode(
290 pImplData->hConv, pImplData->hContext,
291 &c1, 1, &cUC, 1,
292 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
293 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
294 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
295 &nInfo, &nCvtBytes);
297 if( !bErr )
299 if( 1 == nChars && 0 == nInfo )
301 c = cUC;
303 else if( 0 != nChars || 0 != nInfo )
305 DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0,
306 "source buffer is to small" );
307 DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL)) == 0,
308 "there is a conversion error" );
309 DBG_ASSERT( 0 == nChars,
310 "there is a converted character, but an error" );
311 // There are still errors, but nothing we can
312 // do
313 c = (sal_Unicode)'?';
314 nChars = 1;
318 else
320 sal_Char sBuffer[10];
321 sBuffer[0] = c1;
322 sal_uInt16 nLen = 1;
323 while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 &&
324 nLen < 10 )
326 rInput.ReadChar( c1 );
327 bErr = rInput.IsEof() || rInput.GetError();
328 if( bErr )
329 break;
331 sBuffer[nLen++] = c1;
332 nChars = rtl_convertTextToUnicode(
333 pImplData->hConv, 0, sBuffer, nLen, &cUC, 1,
334 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
335 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
336 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
337 &nInfo, &nCvtBytes);
339 if( !bErr )
341 if( 1 == nChars && 0 == nInfo )
343 DBG_ASSERT( nCvtBytes == nLen,
344 "no all bytes have been converted!" );
345 c = cUC;
347 else
349 DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0,
350 "source buffer is to small" );
351 DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL)) == 0,
352 "there is a conversion error" );
353 DBG_ASSERT( 0 == nChars,
354 "there is a converted character, but an error" );
356 // There are still errors, so we use the first
357 // character and restart after that.
358 c = (sal_Unicode)sBuffer[0];
359 rInput.SeekRel( -(nLen-1) );
360 nChars = 1;
365 else if( 1 == nChars && 0 == nInfo )
367 // The conversion was successful
368 DBG_ASSERT( nCvtBytes == 1,
369 "no all bytes have been converted!" );
370 c = cUC;
372 else if( 0 != nChars || 0 != nInfo )
374 DBG_ASSERT( 0 == nChars,
375 "there is a converted character, but an error" );
376 DBG_ASSERT( 0 != nInfo,
377 "there is no converted character and no error" );
378 // #73398#: If the character could not be converted,
379 // because a conversion is not available, do no conversion at all.
380 c = (sal_Unicode)c1;
381 nChars = 1;
387 while( 0 == nChars && !bErr );
389 if( bErr )
391 if( ERRCODE_IO_PENDING == rInput.GetError() )
393 eState = SVPAR_PENDING;
394 return c;
396 else
397 return sal_Unicode(EOF);
400 if( c == '\n' )
402 IncLineNr();
403 SetLinePos( 1L );
405 else
406 IncLinePos();
407 return c;
410 int SvParser::GetNextToken()
412 int nRet = 0;
414 if( !nTokenStackPos )
416 aToken.clear(); // empty token buffer
417 nTokenValue = -1; // marker for no value read
418 bTokenHasValue = false;
420 nRet = _GetNextToken();
421 if( SVPAR_PENDING == eState )
422 return nRet;
425 ++pTokenStackPos;
426 if( pTokenStackPos == pTokenStack + nTokenStackSize )
427 pTokenStackPos = pTokenStack;
429 // pop from stack ??
430 if( nTokenStackPos )
432 --nTokenStackPos;
433 nTokenValue = pTokenStackPos->nTokenValue;
434 bTokenHasValue = pTokenStackPos->bTokenHasValue;
435 aToken = pTokenStackPos->sToken;
436 nRet = pTokenStackPos->nTokenId;
438 // no, now push actual value on stack
439 else if( SVPAR_WORKING == eState )
441 pTokenStackPos->sToken = aToken;
442 pTokenStackPos->nTokenValue = nTokenValue;
443 pTokenStackPos->bTokenHasValue = bTokenHasValue;
444 pTokenStackPos->nTokenId = nRet;
446 else if( SVPAR_ACCEPTED != eState && SVPAR_PENDING != eState )
447 eState = SVPAR_ERROR; // an error occurred
449 return nRet;
452 int SvParser::SkipToken( short nCnt ) // "skip" n Tokens backward
454 pTokenStackPos = GetStackPtr( nCnt );
455 short nTmp = nTokenStackPos - nCnt;
456 if( nTmp < 0 )
457 nTmp = 0;
458 else if( nTmp > nTokenStackSize )
459 nTmp = nTokenStackSize;
460 nTokenStackPos = sal_uInt8(nTmp);
462 // restore values
463 aToken = pTokenStackPos->sToken;
464 nTokenValue = pTokenStackPos->nTokenValue;
465 bTokenHasValue = pTokenStackPos->bTokenHasValue;
467 return pTokenStackPos->nTokenId;
470 SvParser::TokenStackType* SvParser::GetStackPtr( short nCnt )
472 sal_uInt8 nAktPos = sal_uInt8(pTokenStackPos - pTokenStack );
473 if( nCnt > 0 )
475 if( nCnt >= nTokenStackSize )
476 nCnt = (nTokenStackSize-1);
477 if( nAktPos + nCnt < nTokenStackSize )
478 nAktPos = sal::static_int_cast< sal_uInt8 >(nAktPos + nCnt);
479 else
480 nAktPos = sal::static_int_cast< sal_uInt8 >(
481 nAktPos + (nCnt - nTokenStackSize));
483 else if( nCnt < 0 )
485 if( -nCnt >= nTokenStackSize )
486 nCnt = -nTokenStackSize+1;
487 if( -nCnt <= nAktPos )
488 nAktPos = sal::static_int_cast< sal_uInt8 >(nAktPos + nCnt);
489 else
490 nAktPos = sal::static_int_cast< sal_uInt8 >(
491 nAktPos + (nCnt + nTokenStackSize));
493 return pTokenStack + nAktPos;
496 // is called for each token which is recognised by CallParser
497 void SvParser::NextToken( int )
502 // to read asynchronous from SvStream
504 int SvParser::GetSaveToken() const
506 return pImplData ? pImplData->nSaveToken : 0;
509 void SvParser::SaveState( int nToken )
511 // save actual status
512 if( !pImplData )
514 pImplData = new SvParser_Impl;
515 pImplData->nSaveToken = 0;
518 pImplData->nFilePos = rInput.Tell();
519 pImplData->nToken = nToken;
521 pImplData->aToken = aToken;
522 pImplData->nlLineNr = nlLineNr;
523 pImplData->nlLinePos = nlLinePos;
524 pImplData->nTokenValue= nTokenValue;
525 pImplData->bTokenHasValue = bTokenHasValue;
526 pImplData->nNextCh = nNextCh;
529 void SvParser::RestoreState()
531 // restore old status
532 if( pImplData )
534 if( ERRCODE_IO_PENDING == rInput.GetError() )
535 rInput.ResetError();
536 aToken = pImplData->aToken;
537 nlLineNr = pImplData->nlLineNr;
538 nlLinePos = pImplData->nlLinePos;
539 nTokenValue= pImplData->nTokenValue;
540 bTokenHasValue=pImplData->bTokenHasValue;
541 nNextCh = pImplData->nNextCh;
543 pImplData->nSaveToken = pImplData->nToken;
545 rInput.Seek( pImplData->nFilePos );
549 void SvParser::Continue( int )
553 void SvParser::BuildWhichTable( std::vector<sal_uInt16> &rWhichMap,
554 sal_uInt16 *pWhichIds,
555 sal_uInt16 nWhichIds )
557 sal_uInt16 aNewRange[2];
559 for( sal_uInt16 nCnt = 0; nCnt < nWhichIds; ++nCnt, ++pWhichIds )
560 if( *pWhichIds )
562 aNewRange[0] = aNewRange[1] = *pWhichIds;
563 bool bIns = true;
565 // search position
566 for ( sal_uInt16 nOfs = 0; rWhichMap[nOfs]; nOfs += 2 )
568 if( *pWhichIds < rWhichMap[nOfs] - 1 )
570 // new range before
571 rWhichMap.insert( rWhichMap.begin() + nOfs, aNewRange, aNewRange + 2 );
572 bIns = false;
573 break;
575 else if( *pWhichIds == rWhichMap[nOfs] - 1 )
577 // extend range downwards
578 rWhichMap[nOfs] = *pWhichIds;
579 bIns = false;
580 break;
582 else if( *pWhichIds == rWhichMap[nOfs+1] + 1 )
584 if( rWhichMap[nOfs+2] != 0 && rWhichMap[nOfs+2] == *pWhichIds + 1 )
586 // merge with next field
587 rWhichMap[nOfs+1] = rWhichMap[nOfs+3];
588 rWhichMap.erase( rWhichMap.begin() + nOfs + 2,
589 rWhichMap.begin() + nOfs + 4 );
591 else
592 // extend range upwards
593 rWhichMap[nOfs+1] = *pWhichIds;
594 bIns = false;
595 break;
599 // append range
600 if( bIns )
602 rWhichMap.insert( rWhichMap.begin() + rWhichMap.size() - 1,
603 aNewRange, aNewRange + 2 );
609 IMPL_LINK_NOARG( SvParser, NewDataRead )
611 switch( eState )
613 case SVPAR_PENDING:
614 // if file is loaded we are not allowed to continue
615 // instead should ignore the call.
616 if( IsDownloadingFile() )
617 break;
619 eState = SVPAR_WORKING;
620 RestoreState();
622 Continue( pImplData->nToken );
624 if( ERRCODE_IO_PENDING == rInput.GetError() )
625 rInput.ResetError();
627 if( SVPAR_PENDING != eState )
628 ReleaseRef(); // ready otherwise!
629 break;
631 case SVPAR_WAITFORDATA:
632 eState = SVPAR_WORKING;
633 break;
635 case SVPAR_NOTSTARTED:
636 case SVPAR_WORKING:
637 break;
639 default:
640 ReleaseRef(); // ready otherwise!
641 break;
644 return 0;
647 /*========================================================================
649 * SvKeyValueIterator.
651 *======================================================================*/
653 typedef boost::ptr_vector<SvKeyValue> SvKeyValueList_Impl;
655 struct SvKeyValueIterator::Impl
657 SvKeyValueList_Impl maList;
658 sal_uInt16 mnPos;
660 Impl() : mnPos(0) {}
663 SvKeyValueIterator::SvKeyValueIterator() : mpImpl(new Impl) {}
665 SvKeyValueIterator::~SvKeyValueIterator()
667 delete mpImpl;
670 bool SvKeyValueIterator::GetFirst (SvKeyValue &rKeyVal)
672 mpImpl->mnPos = mpImpl->maList.size();
673 return GetNext (rKeyVal);
676 bool SvKeyValueIterator::GetNext (SvKeyValue &rKeyVal)
678 if (mpImpl->mnPos > 0)
680 rKeyVal = mpImpl->maList[--mpImpl->mnPos];
681 return true;
683 else
685 // Nothing to do.
686 return false;
690 void SvKeyValueIterator::Append (const SvKeyValue &rKeyVal)
692 mpImpl->maList.push_back(new SvKeyValue(rKeyVal));
695 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */