1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: svparser.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_svtools.hxx"
33 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil -*- */
36 #include <svtools/svparser.hxx>
37 #include <tools/stream.hxx>
38 #include <tools/debug.hxx>
39 #define _SVSTDARR_USHORTS
40 #include <svtools/svstdarr.hxx>
41 #include <rtl/textcvt.h>
42 #include <rtl/tencinfo.h>
46 #define SVPAR_CSM_ANSI 0x0001U
47 #define SVPAR_CSM_UTF8 0x0002U
48 #define SVPAR_CSM_UCS2B 0x0004U
49 #define SVPAR_CSM_UCS2L 0x0008U
50 #define SVPAR_CSM_SWITCH 0x8000U
52 // Struktur, um sich die akt. Daten zumerken
55 String aToken
; // gescanntes Token
56 ULONG nFilePos
; // akt. Position im Stream
57 ULONG nlLineNr
; // akt. Zeilen Nummer
58 ULONG nlLinePos
; // akt. Spalten Nummer
59 long nTokenValue
; // zusaetzlicher Wert (RTF)
60 BOOL bTokenHasValue
; // indicates whether nTokenValue is valid
61 int nToken
; // akt. Token
62 sal_Unicode nNextCh
; // akt. Zeichen
64 int nSaveToken
; // das Token vom Continue
66 rtl_TextToUnicodeConverter hConv
;
67 rtl_TextToUnicodeContext hContext
;
74 nSaveToken(0), hConv( 0 ), hContext( (rtl_TextToUnicodeContext
)1 )
83 SvParser::SvParser( SvStream
& rIn
, BYTE nStackSize
)
89 , bTokenHasValue( false )
90 , eState( SVPAR_NOTSTARTED
)
91 , eSrcEnc( RTL_TEXTENCODING_DONTKNOW
)
92 , bDownloadingFile( FALSE
)
93 , nTokenStackSize( nStackSize
)
96 bUCS2BSrcEnc
= bSwitchToUCS2
= FALSE
;
97 eState
= SVPAR_NOTSTARTED
;
98 if( nTokenStackSize
< 3 )
100 pTokenStack
= new TokenStackType
[ nTokenStackSize
];
101 pTokenStackPos
= pTokenStack
;
105 // wenn die Datei schon existiert, dann Anhaengen:
107 pImplData
= new SvParser_Impl
;
108 pImplData
->aOut
.Open( String::CreateFromAscii( "\\parser.dmp" ),
109 STREAM_STD_WRITE
| STREAM_NOCREATE
);
110 if( pImplData
->aOut
.GetError() || !pImplData
->aOut
.IsOpen() )
111 pImplData
->aOut
.Close();
114 pImplData
->aOut
.Seek( STREAM_SEEK_TO_END
);
115 pImplData
->aOut
<< "\x0c\n\n >>>>>>>>>>>>>>> Dump Start <<<<<<<<<<<<<<<\n";
120 SvParser::~SvParser()
123 if( pImplData
->aOut
.IsOpen() )
124 pImplData
->aOut
<< "\n\n >>>>>>>>>>>>>>> Dump Ende <<<<<<<<<<<<<<<\n";
125 pImplData
->aOut
.Close();
128 if( pImplData
&& pImplData
->hConv
)
130 rtl_destroyTextToUnicodeContext( pImplData
->hConv
,
131 pImplData
->hContext
);
132 rtl_destroyTextToUnicodeConverter( pImplData
->hConv
);
137 delete [] pTokenStack
;
140 void SvParser::ClearTxtConvContext()
142 if( pImplData
&& pImplData
->hConv
)
143 rtl_resetTextToUnicodeContext( pImplData
->hConv
, pImplData
->hContext
);
146 void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc
)
149 if( eEnc
!= eSrcEnc
)
151 if( pImplData
&& pImplData
->hConv
)
153 rtl_destroyTextToUnicodeContext( pImplData
->hConv
,
154 pImplData
->hContext
);
155 rtl_destroyTextToUnicodeConverter( pImplData
->hConv
);
156 pImplData
->hConv
= 0;
157 pImplData
->hContext
= (rtl_TextToUnicodeContext
)1;
160 if( rtl_isOctetTextEncoding(eEnc
) ||
161 RTL_TEXTENCODING_UCS2
== eEnc
)
165 pImplData
= new SvParser_Impl
;
166 pImplData
->hConv
= rtl_createTextToUnicodeConverter( eSrcEnc
);
167 DBG_ASSERT( pImplData
->hConv
,
168 "SvParser::SetSrcEncoding: no converter for source encoding" );
169 if( !pImplData
->hConv
)
170 eSrcEnc
= RTL_TEXTENCODING_DONTKNOW
;
172 pImplData
->hContext
=
173 rtl_createTextToUnicodeContext( pImplData
->hConv
);
178 "SvParser::SetSrcEncoding: invalid source encoding" );
179 eSrcEnc
= RTL_TEXTENCODING_DONTKNOW
;
184 void SvParser::RereadLookahead()
186 rInput
.Seek(nNextChPos
);
187 nNextCh
= GetNextChar();
190 sal_Unicode
SvParser::GetNextChar()
194 // When reading muliple bytes, we don't have to care about the file
195 // position when we run inti the pending state. The file position is
196 // maintained by SaveState/RestoreState.
198 if( bSwitchToUCS2
&& 0 == rInput
.Tell() )
201 BOOL bSeekBack
= TRUE
;
204 bErr
= rInput
.IsEof() || rInput
.GetError();
207 if( 0xff == c1
|| 0xfe == c1
)
210 bErr
= rInput
.IsEof() || rInput
.GetError();
213 if( 0xfe == c1
&& 0xff == c2
)
215 eSrcEnc
= RTL_TEXTENCODING_UCS2
;
219 else if( 0xff == c1
&& 0xfe == c2
)
221 eSrcEnc
= RTL_TEXTENCODING_UCS2
;
222 bUCS2BSrcEnc
= FALSE
;
231 bSwitchToUCS2
= FALSE
;
234 nNextChPos
= rInput
.Tell();
236 if( RTL_TEXTENCODING_UCS2
== eSrcEnc
)
238 sal_Unicode cUC
= USHRT_MAX
;
242 if( 2 == rInput
.Tell() &&
243 !(rInput
.IsEof() || rInput
.GetError()) &&
244 ( (bUCS2BSrcEnc
&& 0xfe == c1
&& 0xff == c2
) ||
245 (!bUCS2BSrcEnc
&& 0xff == c1
&& 0xfe == c2
) ) )
248 bErr
= rInput
.IsEof() || rInput
.GetError();
252 cUC
= (sal_Unicode(c1
) << 8) | c2
;
254 cUC
= (sal_Unicode(c2
) << 8) | c1
;
267 sal_Char c1
; // signed, that's the text converter expects
269 bErr
= rInput
.IsEof() || rInput
.GetError();
273 RTL_TEXTENCODING_DONTKNOW
== eSrcEnc
||
274 RTL_TEXTENCODING_SYMBOL
== eSrcEnc
277 // no convserion shall take place
283 DBG_ASSERT( pImplData
&& pImplData
->hConv
,
284 "no text converter!" );
287 sal_uInt32 nInfo
= 0;
289 nChars
= rtl_convertTextToUnicode(
290 pImplData
->hConv
, pImplData
->hContext
,
292 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
|
293 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
|
294 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
,
296 if( (nInfo
&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
) != 0 )
298 // The conversion wasn't successfull because we haven't
299 // read enough characters.
300 if( pImplData
->hContext
!= (rtl_TextToUnicodeContext
)1 )
302 while( (nInfo
&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
) != 0 )
305 bErr
= rInput
.IsEof() || rInput
.GetError();
309 nChars
= rtl_convertTextToUnicode(
310 pImplData
->hConv
, pImplData
->hContext
,
312 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
|
313 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
|
314 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
,
319 if( 1 == nChars
&& 0 == nInfo
)
323 else if( 0 != nChars
|| 0 != nInfo
)
325 DBG_ASSERT( (nInfo
&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
) == 0,
326 "source buffer is to small" );
327 DBG_ASSERT( (nInfo
&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
)) == 0,
328 "there is a conversion error" );
329 DBG_ASSERT( 0 == nChars
,
330 "there is a converted character, but an error" );
331 // There are still errors, but nothing we can
333 c
= (sal_Unicode
)'?';
340 sal_Char sBuffer
[10];
343 while( (nInfo
&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
) != 0 &&
347 bErr
= rInput
.IsEof() || rInput
.GetError();
351 sBuffer
[nLen
++] = c1
;
352 nChars
= rtl_convertTextToUnicode(
353 pImplData
->hConv
, 0, sBuffer
, nLen
, &cUC
, 1,
354 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
|
355 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
|
356 RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR
,
361 if( 1 == nChars
&& 0 == nInfo
)
363 DBG_ASSERT( nCvtBytes
== nLen
,
364 "no all bytes have been converted!" );
369 DBG_ASSERT( (nInfo
&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
) == 0,
370 "source buffer is to small" );
371 DBG_ASSERT( (nInfo
&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
)) == 0,
372 "there is a conversion error" );
373 DBG_ASSERT( 0 == nChars
,
374 "there is a converted character, but an error" );
376 // There are still errors, so we use the first
377 // character and restart after that.
378 c
= (sal_Unicode
)sBuffer
[0];
379 rInput
.SeekRel( -(nLen
-1) );
385 else if( 1 == nChars
&& 0 == nInfo
)
387 // The conversion was successfull
388 DBG_ASSERT( nCvtBytes
== 1,
389 "no all bytes have been converted!" );
392 else if( 0 != nChars
|| 0 != nInfo
)
394 DBG_ASSERT( 0 == nChars
,
395 "there is a converted character, but an error" );
396 DBG_ASSERT( 0 != nInfo
,
397 "there is no converted character and no error" );
398 // #73398#: If the character could not be converted,
399 // because a conversion is not available, do no conversion at all.
407 while( 0 == nChars
&& !bErr
);
411 if( ERRCODE_IO_PENDING
== rInput
.GetError() )
413 eState
= SVPAR_PENDING
;
417 return sal_Unicode(EOF
);
421 if( pImplData
->aOut
.IsOpen() )
422 pImplData
->aOut
<< ByteString::ConvertFromUnicode( c
,
423 RTL_TEXTENCODING_MS_1251
);
436 int SvParser::GetNextToken()
440 if( !nTokenStackPos
)
442 aToken
.Erase(); // Token-Buffer loeschen
443 nTokenValue
= -1; // Kennzeichen fuer kein Value gelesen
444 bTokenHasValue
= false;
446 nRet
= _GetNextToken();
447 if( SVPAR_PENDING
== eState
)
452 if( pTokenStackPos
== pTokenStack
+ nTokenStackSize
)
453 pTokenStackPos
= pTokenStack
;
455 // vom Stack holen ??
459 nTokenValue
= pTokenStackPos
->nTokenValue
;
460 bTokenHasValue
= pTokenStackPos
->bTokenHasValue
;
461 aToken
= pTokenStackPos
->sToken
;
462 nRet
= pTokenStackPos
->nTokenId
;
464 // nein, dann das aktuelle auf den Stack
465 else if( SVPAR_WORKING
== eState
)
467 pTokenStackPos
->sToken
= aToken
;
468 pTokenStackPos
->nTokenValue
= nTokenValue
;
469 pTokenStackPos
->bTokenHasValue
= bTokenHasValue
;
470 pTokenStackPos
->nTokenId
= nRet
;
472 else if( SVPAR_ACCEPTED
!= eState
&& SVPAR_PENDING
!= eState
)
473 eState
= SVPAR_ERROR
; // irgend ein Fehler
478 int SvParser::SkipToken( short nCnt
) // n Tokens zurueck "skippen"
480 pTokenStackPos
= GetStackPtr( nCnt
);
481 short nTmp
= nTokenStackPos
- nCnt
;
484 else if( nTmp
> nTokenStackSize
)
485 nTmp
= nTokenStackSize
;
486 nTokenStackPos
= BYTE(nTmp
);
488 // und die Werte zurueck
489 aToken
= pTokenStackPos
->sToken
;
490 nTokenValue
= pTokenStackPos
->nTokenValue
;
491 bTokenHasValue
= pTokenStackPos
->bTokenHasValue
;
493 return pTokenStackPos
->nTokenId
;
496 SvParser::TokenStackType
* SvParser::GetStackPtr( short nCnt
)
498 BYTE nAktPos
= BYTE(pTokenStackPos
- pTokenStack
);
501 if( nCnt
>= nTokenStackSize
)
502 nCnt
= (nTokenStackSize
-1);
503 if( nAktPos
+ nCnt
< nTokenStackSize
)
504 nAktPos
= sal::static_int_cast
< BYTE
>(nAktPos
+ nCnt
);
506 nAktPos
= sal::static_int_cast
< BYTE
>(
507 nAktPos
+ (nCnt
- nTokenStackSize
));
511 if( -nCnt
>= nTokenStackSize
)
512 nCnt
= -nTokenStackSize
+1;
513 if( -nCnt
<= nAktPos
)
514 nAktPos
= sal::static_int_cast
< BYTE
>(nAktPos
+ nCnt
);
516 nAktPos
= sal::static_int_cast
< BYTE
>(
517 nAktPos
+ (nCnt
+ nTokenStackSize
));
519 return pTokenStack
+ nAktPos
;
522 // wird fuer jedes Token gerufen, das in CallParser erkannt wird
523 void SvParser::NextToken( int )
528 // fuers asynchrone lesen aus dem SvStream
530 int SvParser::GetSaveToken() const
532 return pImplData
? pImplData
->nSaveToken
: 0;
535 void SvParser::SaveState( int nToken
)
537 // aktuellen Status merken
540 pImplData
= new SvParser_Impl
;
541 pImplData
->nSaveToken
= 0;
544 pImplData
->nFilePos
= rInput
.Tell();
545 pImplData
->nToken
= nToken
;
547 pImplData
->aToken
= aToken
;
548 pImplData
->nlLineNr
= nlLineNr
;
549 pImplData
->nlLinePos
= nlLinePos
;
550 pImplData
->nTokenValue
= nTokenValue
;
551 pImplData
->bTokenHasValue
= bTokenHasValue
;
552 pImplData
->nNextCh
= nNextCh
;
555 void SvParser::RestoreState()
557 // alten Status wieder zurueck setzen
560 if( ERRCODE_IO_PENDING
== rInput
.GetError() )
562 aToken
= pImplData
->aToken
;
563 nlLineNr
= pImplData
->nlLineNr
;
564 nlLinePos
= pImplData
->nlLinePos
;
565 nTokenValue
= pImplData
->nTokenValue
;
566 bTokenHasValue
=pImplData
->bTokenHasValue
;
567 nNextCh
= pImplData
->nNextCh
;
569 pImplData
->nSaveToken
= pImplData
->nToken
;
571 rInput
.Seek( pImplData
->nFilePos
);
575 void SvParser::Continue( int )
579 void SvParser::BuildWhichTbl( SvUShorts
&rWhichMap
,
585 for( USHORT nCnt
= 0; nCnt
< nWhichIds
; ++nCnt
, ++pWhichIds
)
588 aNewRange
[0] = aNewRange
[1] = *pWhichIds
;
592 for ( USHORT nOfs
= 0; rWhichMap
[nOfs
]; nOfs
+= 2 )
594 if( *pWhichIds
< rWhichMap
[nOfs
] - 1 )
597 rWhichMap
.Insert( aNewRange
, 2, nOfs
);
601 else if( *pWhichIds
== rWhichMap
[nOfs
] - 1 )
603 // diesen Range nach unten erweitern
604 rWhichMap
[nOfs
] = *pWhichIds
;
608 else if( *pWhichIds
== rWhichMap
[nOfs
+1] + 1 )
610 if( rWhichMap
[nOfs
+2] != 0 && rWhichMap
[nOfs
+2] == *pWhichIds
+ 1 )
612 // mit dem naechsten Bereich mergen
613 rWhichMap
[nOfs
+1] = rWhichMap
[nOfs
+3];
614 rWhichMap
.Remove( nOfs
+2, 2 );
617 // diesen Range nach oben erweitern
618 rWhichMap
[nOfs
+1] = *pWhichIds
;
624 // einen Range hinten anhaengen
626 rWhichMap
.Insert( aNewRange
, 2, rWhichMap
.Count()-1 );
631 IMPL_STATIC_LINK( SvParser
, NewDataRead
, void*, EMPTYARG
)
633 switch( pThis
->eState
)
636 // Wenn gerade ein File geladen wird duerfen wir nicht weiterlaufen,
637 // sondern muessen den Aufruf ignorieren.
638 if( pThis
->IsDownloadingFile() )
641 pThis
->eState
= SVPAR_WORKING
;
642 pThis
->RestoreState();
644 pThis
->Continue( pThis
->pImplData
->nToken
);
646 if( ERRCODE_IO_PENDING
== pThis
->rInput
.GetError() )
647 pThis
->rInput
.ResetError();
649 if( SVPAR_PENDING
!= pThis
->eState
)
650 pThis
->ReleaseRef(); // ansonsten sind wir fertig!
653 case SVPAR_WAITFORDATA
:
654 pThis
->eState
= SVPAR_WORKING
;
657 case SVPAR_NOTSTARTED
:
662 pThis
->ReleaseRef(); // ansonsten sind wir fertig!
669 /*========================================================================
671 * SvKeyValueIterator.
673 *======================================================================*/
674 SV_DECL_PTRARR_DEL(SvKeyValueList_Impl
, SvKeyValue
*, 0, 4)
675 SV_IMPL_PTRARR(SvKeyValueList_Impl
, SvKeyValue
*);
678 * SvKeyValueIterator.
680 SvKeyValueIterator::SvKeyValueIterator (void)
681 : m_pList (new SvKeyValueList_Impl
),
687 * ~SvKeyValueIterator.
689 SvKeyValueIterator::~SvKeyValueIterator (void)
697 BOOL
SvKeyValueIterator::GetFirst (SvKeyValue
&rKeyVal
)
699 m_nPos
= m_pList
->Count();
700 return GetNext (rKeyVal
);
706 BOOL
SvKeyValueIterator::GetNext (SvKeyValue
&rKeyVal
)
710 rKeyVal
= *m_pList
->GetObject(--m_nPos
);
723 void SvKeyValueIterator::Append (const SvKeyValue
&rKeyVal
)
725 SvKeyValue
*pKeyVal
= new SvKeyValue (rKeyVal
);
726 m_pList
->C40_INSERT(SvKeyValue
, pKeyVal
, m_pList
->Count());
729 /* vi:set tabstop=4 shiftwidth=4 expandtab: */