Update ooo320-m1
[ooovba.git] / svtools / source / svrtf / svparser.cxx
blob8c031e8f9b845ec748c37b81143a79a7a5e70885
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: svparser.cxx,v $
10 * $Revision: 1.17 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_svtools.hxx"
33 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil -*- */
35 #include <stdio.h>
36 #include <svtools/svparser.hxx>
37 #include <tools/stream.hxx>
38 #include <tools/debug.hxx>
39 #define _SVSTDARR_USHORTS
40 #include <svtools/svstdarr.hxx>
41 #include <rtl/textcvt.h>
42 #include <rtl/tencinfo.h>
44 #define SVPAR_CSM_
46 #define SVPAR_CSM_ANSI 0x0001U
47 #define SVPAR_CSM_UTF8 0x0002U
48 #define SVPAR_CSM_UCS2B 0x0004U
49 #define SVPAR_CSM_UCS2L 0x0008U
50 #define SVPAR_CSM_SWITCH 0x8000U
52 // Struktur, um sich die akt. Daten zumerken
53 struct SvParser_Impl
55 String aToken; // gescanntes Token
56 ULONG nFilePos; // akt. Position im Stream
57 ULONG nlLineNr; // akt. Zeilen Nummer
58 ULONG nlLinePos; // akt. Spalten Nummer
59 long nTokenValue; // zusaetzlicher Wert (RTF)
60 BOOL bTokenHasValue; // indicates whether nTokenValue is valid
61 int nToken; // akt. Token
62 sal_Unicode nNextCh; // akt. Zeichen
64 int nSaveToken; // das Token vom Continue
66 rtl_TextToUnicodeConverter hConv;
67 rtl_TextToUnicodeContext hContext;
69 #ifndef PRODUCT
70 SvFileStream aOut;
71 #endif
73 SvParser_Impl() :
74 nSaveToken(0), hConv( 0 ), hContext( (rtl_TextToUnicodeContext)1 )
82 // Konstruktor
83 SvParser::SvParser( SvStream& rIn, BYTE nStackSize )
84 : rInput( rIn )
85 , nlLineNr( 1 )
86 , nlLinePos( 1 )
87 , pImplData( 0 )
88 , nTokenValue( 0 )
89 , bTokenHasValue( false )
90 , eState( SVPAR_NOTSTARTED )
91 , eSrcEnc( RTL_TEXTENCODING_DONTKNOW )
92 , bDownloadingFile( FALSE )
93 , nTokenStackSize( nStackSize )
94 , nTokenStackPos( 0 )
96 bUCS2BSrcEnc = bSwitchToUCS2 = FALSE;
97 eState = SVPAR_NOTSTARTED;
98 if( nTokenStackSize < 3 )
99 nTokenStackSize = 3;
100 pTokenStack = new TokenStackType[ nTokenStackSize ];
101 pTokenStackPos = pTokenStack;
103 #ifndef PRODUCT
105 // wenn die Datei schon existiert, dann Anhaengen:
106 if( !pImplData )
107 pImplData = new SvParser_Impl;
108 pImplData->aOut.Open( String::CreateFromAscii( "\\parser.dmp" ),
109 STREAM_STD_WRITE | STREAM_NOCREATE );
110 if( pImplData->aOut.GetError() || !pImplData->aOut.IsOpen() )
111 pImplData->aOut.Close();
112 else
114 pImplData->aOut.Seek( STREAM_SEEK_TO_END );
115 pImplData->aOut << "\x0c\n\n >>>>>>>>>>>>>>> Dump Start <<<<<<<<<<<<<<<\n";
117 #endif
120 SvParser::~SvParser()
122 #ifndef PRODUCT
123 if( pImplData->aOut.IsOpen() )
124 pImplData->aOut << "\n\n >>>>>>>>>>>>>>> Dump Ende <<<<<<<<<<<<<<<\n";
125 pImplData->aOut.Close();
126 #endif
128 if( pImplData && pImplData->hConv )
130 rtl_destroyTextToUnicodeContext( pImplData->hConv,
131 pImplData->hContext );
132 rtl_destroyTextToUnicodeConverter( pImplData->hConv );
135 delete pImplData;
137 delete [] pTokenStack;
140 void SvParser::ClearTxtConvContext()
142 if( pImplData && pImplData->hConv )
143 rtl_resetTextToUnicodeContext( pImplData->hConv, pImplData->hContext );
146 void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc )
149 if( eEnc != eSrcEnc )
151 if( pImplData && pImplData->hConv )
153 rtl_destroyTextToUnicodeContext( pImplData->hConv,
154 pImplData->hContext );
155 rtl_destroyTextToUnicodeConverter( pImplData->hConv );
156 pImplData->hConv = 0;
157 pImplData->hContext = (rtl_TextToUnicodeContext )1;
160 if( rtl_isOctetTextEncoding(eEnc) ||
161 RTL_TEXTENCODING_UCS2 == eEnc )
163 eSrcEnc = eEnc;
164 if( !pImplData )
165 pImplData = new SvParser_Impl;
166 pImplData->hConv = rtl_createTextToUnicodeConverter( eSrcEnc );
167 DBG_ASSERT( pImplData->hConv,
168 "SvParser::SetSrcEncoding: no converter for source encoding" );
169 if( !pImplData->hConv )
170 eSrcEnc = RTL_TEXTENCODING_DONTKNOW;
171 else
172 pImplData->hContext =
173 rtl_createTextToUnicodeContext( pImplData->hConv );
175 else
177 DBG_ASSERT( !this,
178 "SvParser::SetSrcEncoding: invalid source encoding" );
179 eSrcEnc = RTL_TEXTENCODING_DONTKNOW;
184 void SvParser::RereadLookahead()
186 rInput.Seek(nNextChPos);
187 nNextCh = GetNextChar();
190 sal_Unicode SvParser::GetNextChar()
192 sal_Unicode c = 0U;
194 // When reading muliple bytes, we don't have to care about the file
195 // position when we run inti the pending state. The file position is
196 // maintained by SaveState/RestoreState.
197 BOOL bErr;
198 if( bSwitchToUCS2 && 0 == rInput.Tell() )
200 sal_uChar c1, c2;
201 BOOL bSeekBack = TRUE;
203 rInput >> c1;
204 bErr = rInput.IsEof() || rInput.GetError();
205 if( !bErr )
207 if( 0xff == c1 || 0xfe == c1 )
209 rInput >> c2;
210 bErr = rInput.IsEof() || rInput.GetError();
211 if( !bErr )
213 if( 0xfe == c1 && 0xff == c2 )
215 eSrcEnc = RTL_TEXTENCODING_UCS2;
216 bUCS2BSrcEnc = TRUE;
217 bSeekBack = FALSE;
219 else if( 0xff == c1 && 0xfe == c2 )
221 eSrcEnc = RTL_TEXTENCODING_UCS2;
222 bUCS2BSrcEnc = FALSE;
223 bSeekBack = FALSE;
228 if( bSeekBack )
229 rInput.Seek( 0 );
231 bSwitchToUCS2 = FALSE;
234 nNextChPos = rInput.Tell();
236 if( RTL_TEXTENCODING_UCS2 == eSrcEnc )
238 sal_Unicode cUC = USHRT_MAX;
239 sal_uChar c1, c2;
241 rInput >> c1 >> c2;
242 if( 2 == rInput.Tell() &&
243 !(rInput.IsEof() || rInput.GetError()) &&
244 ( (bUCS2BSrcEnc && 0xfe == c1 && 0xff == c2) ||
245 (!bUCS2BSrcEnc && 0xff == c1 && 0xfe == c2) ) )
246 rInput >> c1 >> c2;
248 bErr = rInput.IsEof() || rInput.GetError();
249 if( !bErr )
251 if( bUCS2BSrcEnc )
252 cUC = (sal_Unicode(c1) << 8) | c2;
253 else
254 cUC = (sal_Unicode(c2) << 8) | c1;
257 if( !bErr )
259 c = cUC;
262 else
264 sal_Size nChars = 0;
267 sal_Char c1; // signed, that's the text converter expects
268 rInput >> c1;
269 bErr = rInput.IsEof() || rInput.GetError();
270 if( !bErr )
272 if (
273 RTL_TEXTENCODING_DONTKNOW == eSrcEnc ||
274 RTL_TEXTENCODING_SYMBOL == eSrcEnc
277 // no convserion shall take place
278 c = (sal_Unicode)c1;
279 nChars = 1;
281 else
283 DBG_ASSERT( pImplData && pImplData->hConv,
284 "no text converter!" );
286 sal_Unicode cUC;
287 sal_uInt32 nInfo = 0;
288 sal_Size nCvtBytes;
289 nChars = rtl_convertTextToUnicode(
290 pImplData->hConv, pImplData->hContext,
291 &c1, 1, &cUC, 1,
292 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
293 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
294 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
295 &nInfo, &nCvtBytes);
296 if( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 )
298 // The conversion wasn't successfull because we haven't
299 // read enough characters.
300 if( pImplData->hContext != (rtl_TextToUnicodeContext)1 )
302 while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 )
304 rInput >> c1;
305 bErr = rInput.IsEof() || rInput.GetError();
306 if( bErr )
307 break;
309 nChars = rtl_convertTextToUnicode(
310 pImplData->hConv, pImplData->hContext,
311 &c1, 1, &cUC, 1,
312 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
313 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
314 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
315 &nInfo, &nCvtBytes);
317 if( !bErr )
319 if( 1 == nChars && 0 == nInfo )
321 c = cUC;
323 else if( 0 != nChars || 0 != nInfo )
325 DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0,
326 "source buffer is to small" );
327 DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL)) == 0,
328 "there is a conversion error" );
329 DBG_ASSERT( 0 == nChars,
330 "there is a converted character, but an error" );
331 // There are still errors, but nothing we can
332 // do
333 c = (sal_Unicode)'?';
334 nChars = 1;
338 else
340 sal_Char sBuffer[10];
341 sBuffer[0] = c1;
342 sal_uInt16 nLen = 1;
343 while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 &&
344 nLen < 10 )
346 rInput >> c1;
347 bErr = rInput.IsEof() || rInput.GetError();
348 if( bErr )
349 break;
351 sBuffer[nLen++] = c1;
352 nChars = rtl_convertTextToUnicode(
353 pImplData->hConv, 0, sBuffer, nLen, &cUC, 1,
354 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
355 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
356 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
357 &nInfo, &nCvtBytes);
359 if( !bErr )
361 if( 1 == nChars && 0 == nInfo )
363 DBG_ASSERT( nCvtBytes == nLen,
364 "no all bytes have been converted!" );
365 c = cUC;
367 else
369 DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0,
370 "source buffer is to small" );
371 DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL)) == 0,
372 "there is a conversion error" );
373 DBG_ASSERT( 0 == nChars,
374 "there is a converted character, but an error" );
376 // There are still errors, so we use the first
377 // character and restart after that.
378 c = (sal_Unicode)sBuffer[0];
379 rInput.SeekRel( -(nLen-1) );
380 nChars = 1;
385 else if( 1 == nChars && 0 == nInfo )
387 // The conversion was successfull
388 DBG_ASSERT( nCvtBytes == 1,
389 "no all bytes have been converted!" );
390 c = cUC;
392 else if( 0 != nChars || 0 != nInfo )
394 DBG_ASSERT( 0 == nChars,
395 "there is a converted character, but an error" );
396 DBG_ASSERT( 0 != nInfo,
397 "there is no converted character and no error" );
398 // #73398#: If the character could not be converted,
399 // because a conversion is not available, do no conversion at all.
400 c = (sal_Unicode)c1;
401 nChars = 1;
407 while( 0 == nChars && !bErr );
409 if( bErr )
411 if( ERRCODE_IO_PENDING == rInput.GetError() )
413 eState = SVPAR_PENDING;
414 return c;
416 else
417 return sal_Unicode(EOF);
420 #ifndef PRODUCT
421 if( pImplData->aOut.IsOpen() )
422 pImplData->aOut << ByteString::ConvertFromUnicode( c,
423 RTL_TEXTENCODING_MS_1251 );
424 #endif
426 if( c == '\n' )
428 IncLineNr();
429 SetLinePos( 1L );
431 else
432 IncLinePos();
433 return c;
436 int SvParser::GetNextToken()
438 int nRet = 0;
440 if( !nTokenStackPos )
442 aToken.Erase(); // Token-Buffer loeschen
443 nTokenValue = -1; // Kennzeichen fuer kein Value gelesen
444 bTokenHasValue = false;
446 nRet = _GetNextToken();
447 if( SVPAR_PENDING == eState )
448 return nRet;
451 ++pTokenStackPos;
452 if( pTokenStackPos == pTokenStack + nTokenStackSize )
453 pTokenStackPos = pTokenStack;
455 // vom Stack holen ??
456 if( nTokenStackPos )
458 --nTokenStackPos;
459 nTokenValue = pTokenStackPos->nTokenValue;
460 bTokenHasValue = pTokenStackPos->bTokenHasValue;
461 aToken = pTokenStackPos->sToken;
462 nRet = pTokenStackPos->nTokenId;
464 // nein, dann das aktuelle auf den Stack
465 else if( SVPAR_WORKING == eState )
467 pTokenStackPos->sToken = aToken;
468 pTokenStackPos->nTokenValue = nTokenValue;
469 pTokenStackPos->bTokenHasValue = bTokenHasValue;
470 pTokenStackPos->nTokenId = nRet;
472 else if( SVPAR_ACCEPTED != eState && SVPAR_PENDING != eState )
473 eState = SVPAR_ERROR; // irgend ein Fehler
475 return nRet;
478 int SvParser::SkipToken( short nCnt ) // n Tokens zurueck "skippen"
480 pTokenStackPos = GetStackPtr( nCnt );
481 short nTmp = nTokenStackPos - nCnt;
482 if( nTmp < 0 )
483 nTmp = 0;
484 else if( nTmp > nTokenStackSize )
485 nTmp = nTokenStackSize;
486 nTokenStackPos = BYTE(nTmp);
488 // und die Werte zurueck
489 aToken = pTokenStackPos->sToken;
490 nTokenValue = pTokenStackPos->nTokenValue;
491 bTokenHasValue = pTokenStackPos->bTokenHasValue;
493 return pTokenStackPos->nTokenId;
496 SvParser::TokenStackType* SvParser::GetStackPtr( short nCnt )
498 BYTE nAktPos = BYTE(pTokenStackPos - pTokenStack );
499 if( nCnt > 0 )
501 if( nCnt >= nTokenStackSize )
502 nCnt = (nTokenStackSize-1);
503 if( nAktPos + nCnt < nTokenStackSize )
504 nAktPos = sal::static_int_cast< BYTE >(nAktPos + nCnt);
505 else
506 nAktPos = sal::static_int_cast< BYTE >(
507 nAktPos + (nCnt - nTokenStackSize));
509 else if( nCnt < 0 )
511 if( -nCnt >= nTokenStackSize )
512 nCnt = -nTokenStackSize+1;
513 if( -nCnt <= nAktPos )
514 nAktPos = sal::static_int_cast< BYTE >(nAktPos + nCnt);
515 else
516 nAktPos = sal::static_int_cast< BYTE >(
517 nAktPos + (nCnt + nTokenStackSize));
519 return pTokenStack + nAktPos;
522 // wird fuer jedes Token gerufen, das in CallParser erkannt wird
523 void SvParser::NextToken( int )
528 // fuers asynchrone lesen aus dem SvStream
530 int SvParser::GetSaveToken() const
532 return pImplData ? pImplData->nSaveToken : 0;
535 void SvParser::SaveState( int nToken )
537 // aktuellen Status merken
538 if( !pImplData )
540 pImplData = new SvParser_Impl;
541 pImplData->nSaveToken = 0;
544 pImplData->nFilePos = rInput.Tell();
545 pImplData->nToken = nToken;
547 pImplData->aToken = aToken;
548 pImplData->nlLineNr = nlLineNr;
549 pImplData->nlLinePos = nlLinePos;
550 pImplData->nTokenValue= nTokenValue;
551 pImplData->bTokenHasValue = bTokenHasValue;
552 pImplData->nNextCh = nNextCh;
555 void SvParser::RestoreState()
557 // alten Status wieder zurueck setzen
558 if( pImplData )
560 if( ERRCODE_IO_PENDING == rInput.GetError() )
561 rInput.ResetError();
562 aToken = pImplData->aToken;
563 nlLineNr = pImplData->nlLineNr;
564 nlLinePos = pImplData->nlLinePos;
565 nTokenValue= pImplData->nTokenValue;
566 bTokenHasValue=pImplData->bTokenHasValue;
567 nNextCh = pImplData->nNextCh;
569 pImplData->nSaveToken = pImplData->nToken;
571 rInput.Seek( pImplData->nFilePos );
575 void SvParser::Continue( int )
579 void SvParser::BuildWhichTbl( SvUShorts &rWhichMap,
580 USHORT *pWhichIds,
581 USHORT nWhichIds )
583 USHORT aNewRange[2];
585 for( USHORT nCnt = 0; nCnt < nWhichIds; ++nCnt, ++pWhichIds )
586 if( *pWhichIds )
588 aNewRange[0] = aNewRange[1] = *pWhichIds;
589 BOOL bIns = TRUE;
591 // Position suchen
592 for ( USHORT nOfs = 0; rWhichMap[nOfs]; nOfs += 2 )
594 if( *pWhichIds < rWhichMap[nOfs] - 1 )
596 // neuen Range davor
597 rWhichMap.Insert( aNewRange, 2, nOfs );
598 bIns = FALSE;
599 break;
601 else if( *pWhichIds == rWhichMap[nOfs] - 1 )
603 // diesen Range nach unten erweitern
604 rWhichMap[nOfs] = *pWhichIds;
605 bIns = FALSE;
606 break;
608 else if( *pWhichIds == rWhichMap[nOfs+1] + 1 )
610 if( rWhichMap[nOfs+2] != 0 && rWhichMap[nOfs+2] == *pWhichIds + 1 )
612 // mit dem naechsten Bereich mergen
613 rWhichMap[nOfs+1] = rWhichMap[nOfs+3];
614 rWhichMap.Remove( nOfs+2, 2 );
616 else
617 // diesen Range nach oben erweitern
618 rWhichMap[nOfs+1] = *pWhichIds;
619 bIns = FALSE;
620 break;
624 // einen Range hinten anhaengen
625 if( bIns )
626 rWhichMap.Insert( aNewRange, 2, rWhichMap.Count()-1 );
631 IMPL_STATIC_LINK( SvParser, NewDataRead, void*, EMPTYARG )
633 switch( pThis->eState )
635 case SVPAR_PENDING:
636 // Wenn gerade ein File geladen wird duerfen wir nicht weiterlaufen,
637 // sondern muessen den Aufruf ignorieren.
638 if( pThis->IsDownloadingFile() )
639 break;
641 pThis->eState = SVPAR_WORKING;
642 pThis->RestoreState();
644 pThis->Continue( pThis->pImplData->nToken );
646 if( ERRCODE_IO_PENDING == pThis->rInput.GetError() )
647 pThis->rInput.ResetError();
649 if( SVPAR_PENDING != pThis->eState )
650 pThis->ReleaseRef(); // ansonsten sind wir fertig!
651 break;
653 case SVPAR_WAITFORDATA:
654 pThis->eState = SVPAR_WORKING;
655 break;
657 case SVPAR_NOTSTARTED:
658 case SVPAR_WORKING:
659 break;
661 default:
662 pThis->ReleaseRef(); // ansonsten sind wir fertig!
663 break;
666 return 0;
669 /*========================================================================
671 * SvKeyValueIterator.
673 *======================================================================*/
674 SV_DECL_PTRARR_DEL(SvKeyValueList_Impl, SvKeyValue*, 0, 4)
675 SV_IMPL_PTRARR(SvKeyValueList_Impl, SvKeyValue*);
678 * SvKeyValueIterator.
680 SvKeyValueIterator::SvKeyValueIterator (void)
681 : m_pList (new SvKeyValueList_Impl),
682 m_nPos (0)
687 * ~SvKeyValueIterator.
689 SvKeyValueIterator::~SvKeyValueIterator (void)
691 delete m_pList;
695 * GetFirst.
697 BOOL SvKeyValueIterator::GetFirst (SvKeyValue &rKeyVal)
699 m_nPos = m_pList->Count();
700 return GetNext (rKeyVal);
704 * GetNext.
706 BOOL SvKeyValueIterator::GetNext (SvKeyValue &rKeyVal)
708 if (m_nPos > 0)
710 rKeyVal = *m_pList->GetObject(--m_nPos);
711 return TRUE;
713 else
715 // Nothing to do.
716 return FALSE;
721 * Append.
723 void SvKeyValueIterator::Append (const SvKeyValue &rKeyVal)
725 SvKeyValue *pKeyVal = new SvKeyValue (rKeyVal);
726 m_pList->C40_INSERT(SvKeyValue, pKeyVal, m_pList->Count());
729 /* vi:set tabstop=4 shiftwidth=4 expandtab: */