1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <svtools/svparser.hxx>
21 #include <tools/stream.hxx>
22 #include <tools/debug.hxx>
23 #include <rtl/textcvt.h>
24 #include <rtl/tencinfo.h>
26 #include <boost/ptr_container/ptr_vector.hpp>
28 // structure to store the actuel data
31 OUString aToken
; // gescanntes Token
32 sal_uLong nFilePos
; // actual position in stream
33 sal_uLong nlLineNr
; // actual line number
34 sal_uLong nlLinePos
; // actual column number
35 long nTokenValue
; // extra value (RTF)
36 bool bTokenHasValue
; // indicates whether nTokenValue is valid
37 int nToken
; // actual Token
38 sal_Unicode nNextCh
; // actual character
39 int nSaveToken
; // the token from Continue
41 rtl_TextToUnicodeConverter hConv
;
42 rtl_TextToUnicodeContext hContext
;
49 , bTokenHasValue(false)
54 , hContext( reinterpret_cast<rtl_TextToUnicodeContext
>(1) )
63 SvParser::SvParser( SvStream
& rIn
, sal_uInt8 nStackSize
)
69 , bTokenHasValue( false )
70 , eState( SVPAR_NOTSTARTED
)
71 , eSrcEnc( RTL_TEXTENCODING_DONTKNOW
)
74 , bDownloadingFile(false)
76 , bSwitchToUCS2(false)
77 , bRTF_InTextRead(false)
78 , nTokenStackSize( nStackSize
)
81 eState
= SVPAR_NOTSTARTED
;
82 if( nTokenStackSize
< 3 )
84 pTokenStack
= new TokenStackType
[ nTokenStackSize
];
85 pTokenStackPos
= pTokenStack
;
90 if( pImplData
&& pImplData
->hConv
)
92 rtl_destroyTextToUnicodeContext( pImplData
->hConv
,
93 pImplData
->hContext
);
94 rtl_destroyTextToUnicodeConverter( pImplData
->hConv
);
99 delete [] pTokenStack
;
102 void SvParser::ClearTxtConvContext()
104 if( pImplData
&& pImplData
->hConv
)
105 rtl_resetTextToUnicodeContext( pImplData
->hConv
, pImplData
->hContext
);
108 void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc
)
111 if( eEnc
!= eSrcEnc
)
113 if( pImplData
&& pImplData
->hConv
)
115 rtl_destroyTextToUnicodeContext( pImplData
->hConv
,
116 pImplData
->hContext
);
117 rtl_destroyTextToUnicodeConverter( pImplData
->hConv
);
118 pImplData
->hConv
= 0;
119 pImplData
->hContext
= reinterpret_cast<rtl_TextToUnicodeContext
>(1);
122 if( rtl_isOctetTextEncoding(eEnc
) ||
123 RTL_TEXTENCODING_UCS2
== eEnc
)
127 pImplData
= new SvParser_Impl
;
128 pImplData
->hConv
= rtl_createTextToUnicodeConverter( eSrcEnc
);
129 DBG_ASSERT( pImplData
->hConv
,
130 "SvParser::SetSrcEncoding: no converter for source encoding" );
131 if( !pImplData
->hConv
)
132 eSrcEnc
= RTL_TEXTENCODING_DONTKNOW
;
134 pImplData
->hContext
=
135 rtl_createTextToUnicodeContext( pImplData
->hConv
);
140 "SvParser::SetSrcEncoding: invalid source encoding" );
141 eSrcEnc
= RTL_TEXTENCODING_DONTKNOW
;
146 void SvParser::RereadLookahead()
148 rInput
.Seek(nNextChPos
);
149 nNextCh
= GetNextChar();
152 sal_Unicode
SvParser::GetNextChar()
156 // When reading muliple bytes, we don't have to care about the file
157 // position when we run inti the pending state. The file position is
158 // maintained by SaveState/RestoreState.
160 if( bSwitchToUCS2
&& 0 == rInput
.Tell() )
162 unsigned char c1
, c2
;
163 bool bSeekBack
= true;
165 rInput
.ReadUChar( c1
);
166 bErr
= rInput
.IsEof() || rInput
.GetError();
169 if( 0xff == c1
|| 0xfe == c1
)
171 rInput
.ReadUChar( c2
);
172 bErr
= rInput
.IsEof() || rInput
.GetError();
175 if( 0xfe == c1
&& 0xff == c2
)
177 eSrcEnc
= RTL_TEXTENCODING_UCS2
;
181 else if( 0xff == c1
&& 0xfe == c2
)
183 eSrcEnc
= RTL_TEXTENCODING_UCS2
;
184 bUCS2BSrcEnc
= false;
189 else if( 0xef == c1
|| 0xbb == c1
) // check for UTF-8 BOM
191 rInput
.ReadUChar( c2
);
192 bErr
= rInput
.IsEof() || rInput
.GetError();
195 if( ( 0xef == c1
&& 0xbb == c2
) || ( 0xbb == c1
&& 0xef == c2
) )
198 rInput
.ReadUChar( c3
);
199 bErr
= rInput
.IsEof() || rInput
.GetError();
200 if( !bErr
&& ( 0xbf == c3
) )
202 eSrcEnc
= RTL_TEXTENCODING_UTF8
;
212 bSwitchToUCS2
= false;
215 nNextChPos
= rInput
.Tell();
217 if( RTL_TEXTENCODING_UCS2
== eSrcEnc
)
219 sal_Unicode cUC
= USHRT_MAX
;
220 unsigned char c1
, c2
;
222 rInput
.ReadUChar( c1
).ReadUChar( c2
);
223 if( 2 == rInput
.Tell() &&
224 !(rInput
.IsEof() || rInput
.GetError()) &&
225 ( (bUCS2BSrcEnc
&& 0xfe == c1
&& 0xff == c2
) ||
226 (!bUCS2BSrcEnc
&& 0xff == c1
&& 0xfe == c2
) ) )
227 rInput
.ReadUChar( c1
).ReadUChar( c2
);
229 bErr
= rInput
.IsEof() || rInput
.GetError();
233 cUC
= (sal_Unicode(c1
) << 8) | c2
;
235 cUC
= (sal_Unicode(c2
) << 8) | c1
;
248 sal_Char c1
; // signed, that's the text converter expects
249 rInput
.ReadChar( c1
);
250 bErr
= rInput
.IsEof() || rInput
.GetError();
254 RTL_TEXTENCODING_DONTKNOW
== eSrcEnc
||
255 RTL_TEXTENCODING_SYMBOL
== eSrcEnc
258 // no convserion shall take place
264 assert(pImplData
&& pImplData
->hConv
&& "no text converter!");
267 sal_uInt32 nInfo
= 0;
269 nChars
= rtl_convertTextToUnicode(
270 pImplData
->hConv
, pImplData
->hContext
,
272 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
|
273 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
|
274 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
,
276 if( (nInfo
&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
) != 0 )
278 // The conversion wasn't successful because we haven't
279 // read enough characters.
280 if( pImplData
->hContext
!= reinterpret_cast<rtl_TextToUnicodeContext
>(1) )
282 while( (nInfo
&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
) != 0 )
284 rInput
.ReadChar( c1
);
285 bErr
= rInput
.IsEof() || rInput
.GetError();
289 nChars
= rtl_convertTextToUnicode(
290 pImplData
->hConv
, pImplData
->hContext
,
292 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
|
293 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
|
294 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
,
299 if( 1 == nChars
&& 0 == nInfo
)
303 else if( 0 != nChars
|| 0 != nInfo
)
305 DBG_ASSERT( (nInfo
&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
) == 0,
306 "source buffer is to small" );
307 DBG_ASSERT( (nInfo
&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
)) == 0,
308 "there is a conversion error" );
309 DBG_ASSERT( 0 == nChars
,
310 "there is a converted character, but an error" );
311 // There are still errors, but nothing we can
313 c
= (sal_Unicode
)'?';
320 sal_Char sBuffer
[10];
323 while( (nInfo
&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
) != 0 &&
326 rInput
.ReadChar( c1
);
327 bErr
= rInput
.IsEof() || rInput
.GetError();
331 sBuffer
[nLen
++] = c1
;
332 nChars
= rtl_convertTextToUnicode(
333 pImplData
->hConv
, 0, sBuffer
, nLen
, &cUC
, 1,
334 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
|
335 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
|
336 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
,
341 if( 1 == nChars
&& 0 == nInfo
)
343 DBG_ASSERT( nCvtBytes
== nLen
,
344 "no all bytes have been converted!" );
349 DBG_ASSERT( (nInfo
&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
) == 0,
350 "source buffer is to small" );
351 DBG_ASSERT( (nInfo
&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
)) == 0,
352 "there is a conversion error" );
353 DBG_ASSERT( 0 == nChars
,
354 "there is a converted character, but an error" );
356 // There are still errors, so we use the first
357 // character and restart after that.
358 c
= (sal_Unicode
)sBuffer
[0];
359 rInput
.SeekRel( -(nLen
-1) );
365 else if( 1 == nChars
&& 0 == nInfo
)
367 // The conversion was successful
368 DBG_ASSERT( nCvtBytes
== 1,
369 "no all bytes have been converted!" );
372 else if( 0 != nChars
|| 0 != nInfo
)
374 DBG_ASSERT( 0 == nChars
,
375 "there is a converted character, but an error" );
376 DBG_ASSERT( 0 != nInfo
,
377 "there is no converted character and no error" );
378 // #73398#: If the character could not be converted,
379 // because a conversion is not available, do no conversion at all.
387 while( 0 == nChars
&& !bErr
);
391 if( ERRCODE_IO_PENDING
== rInput
.GetError() )
393 eState
= SVPAR_PENDING
;
397 return sal_Unicode(EOF
);
410 int SvParser::GetNextToken()
414 if( !nTokenStackPos
)
416 aToken
.clear(); // empty token buffer
417 nTokenValue
= -1; // marker for no value read
418 bTokenHasValue
= false;
420 nRet
= _GetNextToken();
421 if( SVPAR_PENDING
== eState
)
426 if( pTokenStackPos
== pTokenStack
+ nTokenStackSize
)
427 pTokenStackPos
= pTokenStack
;
433 nTokenValue
= pTokenStackPos
->nTokenValue
;
434 bTokenHasValue
= pTokenStackPos
->bTokenHasValue
;
435 aToken
= pTokenStackPos
->sToken
;
436 nRet
= pTokenStackPos
->nTokenId
;
438 // no, now push actual value on stack
439 else if( SVPAR_WORKING
== eState
)
441 pTokenStackPos
->sToken
= aToken
;
442 pTokenStackPos
->nTokenValue
= nTokenValue
;
443 pTokenStackPos
->bTokenHasValue
= bTokenHasValue
;
444 pTokenStackPos
->nTokenId
= nRet
;
446 else if( SVPAR_ACCEPTED
!= eState
&& SVPAR_PENDING
!= eState
)
447 eState
= SVPAR_ERROR
; // an error occurred
452 int SvParser::SkipToken( short nCnt
) // "skip" n Tokens backward
454 pTokenStackPos
= GetStackPtr( nCnt
);
455 short nTmp
= nTokenStackPos
- nCnt
;
458 else if( nTmp
> nTokenStackSize
)
459 nTmp
= nTokenStackSize
;
460 nTokenStackPos
= sal_uInt8(nTmp
);
463 aToken
= pTokenStackPos
->sToken
;
464 nTokenValue
= pTokenStackPos
->nTokenValue
;
465 bTokenHasValue
= pTokenStackPos
->bTokenHasValue
;
467 return pTokenStackPos
->nTokenId
;
470 SvParser::TokenStackType
* SvParser::GetStackPtr( short nCnt
)
472 sal_uInt8 nAktPos
= sal_uInt8(pTokenStackPos
- pTokenStack
);
475 if( nCnt
>= nTokenStackSize
)
476 nCnt
= (nTokenStackSize
-1);
477 if( nAktPos
+ nCnt
< nTokenStackSize
)
478 nAktPos
= sal::static_int_cast
< sal_uInt8
>(nAktPos
+ nCnt
);
480 nAktPos
= sal::static_int_cast
< sal_uInt8
>(
481 nAktPos
+ (nCnt
- nTokenStackSize
));
485 if( -nCnt
>= nTokenStackSize
)
486 nCnt
= -nTokenStackSize
+1;
487 if( -nCnt
<= nAktPos
)
488 nAktPos
= sal::static_int_cast
< sal_uInt8
>(nAktPos
+ nCnt
);
490 nAktPos
= sal::static_int_cast
< sal_uInt8
>(
491 nAktPos
+ (nCnt
+ nTokenStackSize
));
493 return pTokenStack
+ nAktPos
;
496 // is called for each token which is recognised by CallParser
497 void SvParser::NextToken( int )
502 // to read asynchronous from SvStream
504 int SvParser::GetSaveToken() const
506 return pImplData
? pImplData
->nSaveToken
: 0;
509 void SvParser::SaveState( int nToken
)
511 // save actual status
514 pImplData
= new SvParser_Impl
;
515 pImplData
->nSaveToken
= 0;
518 pImplData
->nFilePos
= rInput
.Tell();
519 pImplData
->nToken
= nToken
;
521 pImplData
->aToken
= aToken
;
522 pImplData
->nlLineNr
= nlLineNr
;
523 pImplData
->nlLinePos
= nlLinePos
;
524 pImplData
->nTokenValue
= nTokenValue
;
525 pImplData
->bTokenHasValue
= bTokenHasValue
;
526 pImplData
->nNextCh
= nNextCh
;
529 void SvParser::RestoreState()
531 // restore old status
534 if( ERRCODE_IO_PENDING
== rInput
.GetError() )
536 aToken
= pImplData
->aToken
;
537 nlLineNr
= pImplData
->nlLineNr
;
538 nlLinePos
= pImplData
->nlLinePos
;
539 nTokenValue
= pImplData
->nTokenValue
;
540 bTokenHasValue
=pImplData
->bTokenHasValue
;
541 nNextCh
= pImplData
->nNextCh
;
543 pImplData
->nSaveToken
= pImplData
->nToken
;
545 rInput
.Seek( pImplData
->nFilePos
);
549 void SvParser::Continue( int )
553 void SvParser::BuildWhichTable( std::vector
<sal_uInt16
> &rWhichMap
,
554 sal_uInt16
*pWhichIds
,
555 sal_uInt16 nWhichIds
)
557 sal_uInt16 aNewRange
[2];
559 for( sal_uInt16 nCnt
= 0; nCnt
< nWhichIds
; ++nCnt
, ++pWhichIds
)
562 aNewRange
[0] = aNewRange
[1] = *pWhichIds
;
566 for ( sal_uInt16 nOfs
= 0; rWhichMap
[nOfs
]; nOfs
+= 2 )
568 if( *pWhichIds
< rWhichMap
[nOfs
] - 1 )
571 rWhichMap
.insert( rWhichMap
.begin() + nOfs
, aNewRange
, aNewRange
+ 2 );
575 else if( *pWhichIds
== rWhichMap
[nOfs
] - 1 )
577 // extend range downwards
578 rWhichMap
[nOfs
] = *pWhichIds
;
582 else if( *pWhichIds
== rWhichMap
[nOfs
+1] + 1 )
584 if( rWhichMap
[nOfs
+2] != 0 && rWhichMap
[nOfs
+2] == *pWhichIds
+ 1 )
586 // merge with next field
587 rWhichMap
[nOfs
+1] = rWhichMap
[nOfs
+3];
588 rWhichMap
.erase( rWhichMap
.begin() + nOfs
+ 2,
589 rWhichMap
.begin() + nOfs
+ 4 );
592 // extend range upwards
593 rWhichMap
[nOfs
+1] = *pWhichIds
;
602 rWhichMap
.insert( rWhichMap
.begin() + rWhichMap
.size() - 1,
603 aNewRange
, aNewRange
+ 2 );
609 IMPL_LINK_NOARG( SvParser
, NewDataRead
)
614 // if file is loaded we are not allowed to continue
615 // instead should ignore the call.
616 if( IsDownloadingFile() )
619 eState
= SVPAR_WORKING
;
622 Continue( pImplData
->nToken
);
624 if( ERRCODE_IO_PENDING
== rInput
.GetError() )
627 if( SVPAR_PENDING
!= eState
)
628 ReleaseRef(); // ready otherwise!
631 case SVPAR_WAITFORDATA
:
632 eState
= SVPAR_WORKING
;
635 case SVPAR_NOTSTARTED
:
640 ReleaseRef(); // ready otherwise!
647 /*========================================================================
649 * SvKeyValueIterator.
651 *======================================================================*/
653 typedef boost::ptr_vector
<SvKeyValue
> SvKeyValueList_Impl
;
655 struct SvKeyValueIterator::Impl
657 SvKeyValueList_Impl maList
;
663 SvKeyValueIterator::SvKeyValueIterator() : mpImpl(new Impl
) {}
665 SvKeyValueIterator::~SvKeyValueIterator()
670 bool SvKeyValueIterator::GetFirst (SvKeyValue
&rKeyVal
)
672 mpImpl
->mnPos
= mpImpl
->maList
.size();
673 return GetNext (rKeyVal
);
676 bool SvKeyValueIterator::GetNext (SvKeyValue
&rKeyVal
)
678 if (mpImpl
->mnPos
> 0)
680 rKeyVal
= mpImpl
->maList
[--mpImpl
->mnPos
];
690 void SvKeyValueIterator::Append (const SvKeyValue
&rKeyVal
)
692 mpImpl
->maList
.push_back(new SvKeyValue(rKeyVal
));
695 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */