fdo#74697 Add Bluez 5 support for impress remote.
[LibreOffice.git] / svtools / source / svrtf / parrtf.cxx
blobf064a56e525ba2c890226d6c40c2c1fc6cd872db
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <stdio.h> // for EOF
22 #include <rtl/tencinfo.h>
23 #include <tools/stream.hxx>
24 #include <tools/debug.hxx>
25 #include <svtools/rtftoken.h>
26 #include <svtools/rtfkeywd.hxx>
27 #include <svtools/parrtf.hxx>
28 #include <comphelper/string.hxx>
30 const int MAX_STRING_LEN = 1024;
31 const int MAX_TOKEN_LEN = 128;
33 #define RTF_ISDIGIT( c ) comphelper::string::isdigitAscii(c)
34 #define RTF_ISALPHA( c ) comphelper::string::isalphaAscii(c)
36 SvRTFParser::SvRTFParser( SvStream& rIn, sal_uInt8 nStackSize )
37 : SvParser( rIn, nStackSize ),
38 eUNICodeSet( RTL_TEXTENCODING_MS_1252 ), // default ist ANSI-CodeSet
39 nUCharOverread( 1 )
41 // default ist ANSI-CodeSet
42 SetSrcEncoding( RTL_TEXTENCODING_MS_1252 );
43 bRTF_InTextRead = false;
46 SvRTFParser::~SvRTFParser()
53 int SvRTFParser::_GetNextToken()
55 int nRet = 0;
56 do {
57 int bNextCh = true;
58 switch( nNextCh )
60 case '\\':
62 // Steuerzeichen
63 switch( nNextCh = GetNextChar() )
65 case '{':
66 case '}':
67 case '\\':
68 case '+': // habe ich in einem RTF-File gefunden
69 case '~': // nonbreaking space
70 case '-': // optional hyphen
71 case '_': // nonbreaking hyphen
72 case '\'': // HexValue
73 nNextCh = '\\';
74 rInput.SeekRel( -1 );
75 ScanText();
76 nRet = RTF_TEXTTOKEN;
77 bNextCh = 0 == nNextCh;
78 break;
80 case '*': // ignoreflag
81 nRet = RTF_IGNOREFLAG;
82 break;
83 case ':': // subentry in an index entry
84 nRet = RTF_SUBENTRYINDEX;
85 break;
86 case '|': // formula-charakter
87 nRet = RTF_FORMULA;
88 break;
90 case 0x0a:
91 case 0x0d:
92 nRet = RTF_PAR;
93 break;
95 default:
96 if( RTF_ISALPHA( nNextCh ) )
98 aToken = '\\';
100 String aStrBuffer;
101 sal_Unicode* pStr = aStrBuffer.AllocBuffer(
102 MAX_TOKEN_LEN );
103 xub_StrLen nStrLen = 0;
104 do {
105 *(pStr + nStrLen++) = nNextCh;
106 if( MAX_TOKEN_LEN == nStrLen )
108 aToken += aStrBuffer;
109 aToken.GetBufferAccess(); // make unique string!
110 nStrLen = 0;
112 nNextCh = GetNextChar();
113 } while( RTF_ISALPHA( nNextCh ) );
114 if( nStrLen )
116 aStrBuffer.ReleaseBufferAccess( nStrLen );
117 aToken += aStrBuffer;
121 // Minus fuer numerischen Parameter
122 int bNegValue = false;
123 if( '-' == nNextCh )
125 bNegValue = true;
126 nNextCh = GetNextChar();
129 // evt. Numerischer Parameter
130 if( RTF_ISDIGIT( nNextCh ) )
132 nTokenValue = 0;
133 do {
134 nTokenValue *= 10;
135 nTokenValue += nNextCh - '0';
136 nNextCh = GetNextChar();
137 } while( RTF_ISDIGIT( nNextCh ) );
138 if( bNegValue )
139 nTokenValue = -nTokenValue;
140 bTokenHasValue=true;
142 else if( bNegValue ) // das Minus wieder zurueck
144 nNextCh = '-';
145 rInput.SeekRel( -1 );
147 if( ' ' == nNextCh ) // Blank gehoert zum Token!
148 nNextCh = GetNextChar();
150 // suche das Token in der Tabelle:
151 if( 0 == (nRet = GetRTFToken( aToken )) )
152 // Unknown Control
153 nRet = RTF_UNKNOWNCONTROL;
155 // bug 76812 - unicode token handled as normal text
156 bNextCh = false;
157 switch( nRet )
159 case RTF_UC:
160 if( 0 <= nTokenValue )
162 nUCharOverread = (sal_uInt8)nTokenValue;
163 //cmc: other ifdef breaks #i3584
164 aParserStates.top().
165 nUCharOverread = nUCharOverread;
167 aToken.Erase(); // #i47831# erase token to prevent the token from beeing treated as text
168 // read next token
169 nRet = 0;
170 break;
172 case RTF_UPR:
173 if (!_inSkipGroup) {
174 // UPR - overread the group with the ansi
175 // information
176 while( '{' != _GetNextToken() )
178 SkipGroup();
179 _GetNextToken(); // overread the last bracket
180 nRet = 0;
182 break;
184 case RTF_U:
185 if( !bRTF_InTextRead )
187 nRet = RTF_TEXTTOKEN;
188 aToken = (sal_Unicode)nTokenValue;
190 // overread the next n "RTF" characters. This
191 // can be also \{, \}, \'88
192 for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
194 sal_Unicode cAnsi = nNextCh;
195 while( 0xD == cAnsi )
196 cAnsi = GetNextChar();
197 while( 0xA == cAnsi )
198 cAnsi = GetNextChar();
200 if( '\\' == cAnsi &&
201 '\'' == ( cAnsi = GetNextChar() ))
202 // HexValue ueberlesen
203 cAnsi = GetHexValue();
204 nNextCh = GetNextChar();
206 ScanText();
207 bNextCh = 0 == nNextCh;
209 break;
212 else if( SVPAR_PENDING != eState )
214 // Bug 34631 - "\ " ueberlesen - Blank als Zeichen
215 // eState = SVPAR_ERROR;
216 bNextCh = false;
218 break;
221 break;
223 case sal_Unicode(EOF):
224 eState = SVPAR_ACCEPTED;
225 nRet = nNextCh;
226 break;
228 case '{':
230 if( 0 <= nOpenBrakets )
232 RtfParserState_Impl aState( nUCharOverread, GetSrcEncoding() );
233 aParserStates.push( aState );
235 ++nOpenBrakets;
236 DBG_ASSERT(
237 static_cast<size_t>(nOpenBrakets) == aParserStates.size(),
238 "ParserStateStack unequal to bracket count" );
239 nRet = nNextCh;
241 break;
243 case '}':
244 --nOpenBrakets;
245 if( 0 <= nOpenBrakets )
247 aParserStates.pop();
248 if( !aParserStates.empty() )
250 const RtfParserState_Impl& rRPS =
251 aParserStates.top();
252 nUCharOverread = rRPS.nUCharOverread;
253 SetSrcEncoding( rRPS.eCodeSet );
255 else
257 nUCharOverread = 1;
258 SetSrcEncoding( GetCodeSet() );
261 DBG_ASSERT(
262 static_cast<size_t>(nOpenBrakets) == aParserStates.size(),
263 "ParserStateStack unequal to bracket count" );
264 nRet = nNextCh;
265 break;
267 case 0x0d:
268 case 0x0a:
269 break;
271 default:
272 // es folgt normaler Text
273 ScanText();
274 nRet = RTF_TEXTTOKEN;
275 bNextCh = 0 == nNextCh;
276 break;
279 if( bNextCh )
280 nNextCh = GetNextChar();
282 } while( !nRet && SVPAR_WORKING == eState );
283 return nRet;
287 sal_Unicode SvRTFParser::GetHexValue()
289 // Hex-Wert sammeln
290 register int n;
291 register sal_Unicode nHexVal = 0;
293 for( n = 0; n < 2; ++n )
295 nHexVal *= 16;
296 nNextCh = GetNextChar();
297 if( nNextCh >= '0' && nNextCh <= '9' )
298 nHexVal += (nNextCh - 48);
299 else if( nNextCh >= 'a' && nNextCh <= 'f' )
300 nHexVal += (nNextCh - 87);
301 else if( nNextCh >= 'A' && nNextCh <= 'F' )
302 nHexVal += (nNextCh - 55);
304 return nHexVal;
307 void SvRTFParser::ScanText( const sal_Unicode cBreak )
309 String aStrBuffer;
310 int bWeiter = true;
311 while( bWeiter && IsParserWorking() && aStrBuffer.Len() < MAX_STRING_LEN)
313 int bNextCh = true;
314 switch( nNextCh )
316 case '\\':
318 switch (nNextCh = GetNextChar())
320 case '\'':
323 OStringBuffer aByteString;
324 while (1)
326 char c = (char)GetHexValue();
328 * Note: \'00 is a valid internal character in a
329 * string in RTF. OStringBuffer supports
330 * appending nulls fine
332 aByteString.append(c);
334 bool bBreak = false;
335 sal_Char nSlash = '\\';
336 while (!bBreak)
338 wchar_t __next=GetNextChar();
339 if (__next>0xFF) // fix for #i43933# and #i35653#
341 if (aByteString.getLength())
342 aStrBuffer.Append(String(OStringToOUString(aByteString.makeStringAndClear(), GetSrcEncoding())));
343 aStrBuffer.Append((sal_Unicode)__next);
345 continue;
347 nSlash = (sal_Char)__next;
348 while (nSlash == 0xD || nSlash == 0xA)
349 nSlash = (sal_Char)GetNextChar();
351 switch (nSlash)
353 case '{':
354 case '}':
355 case '\\':
356 bBreak = true;
357 break;
358 default:
359 aByteString.append(nSlash);
360 break;
364 nNextCh = GetNextChar();
366 if (nSlash != '\\' || nNextCh != '\'')
368 rInput.SeekRel(-1);
369 nNextCh = nSlash;
370 break;
374 bNextCh = false;
376 if (aByteString.getLength())
377 aStrBuffer.Append(String(OStringToOUString(aByteString.makeStringAndClear(), GetSrcEncoding())));
379 break;
380 case '\\':
381 case '}':
382 case '{':
383 case '+': // habe ich in einem RTF-File gefunden
384 aStrBuffer.Append(nNextCh);
385 break;
386 case '~': // nonbreaking space
387 aStrBuffer.Append(static_cast< sal_Unicode >(0xA0));
388 break;
389 case '-': // optional hyphen
390 aStrBuffer.Append(static_cast< sal_Unicode >(0xAD));
391 break;
392 case '_': // nonbreaking hyphen
393 aStrBuffer.Append(static_cast< sal_Unicode >(0x2011));
394 break;
396 case 'u':
397 // UNI-Code Zeichen lesen
399 nNextCh = GetNextChar();
400 rInput.SeekRel( -2 );
402 if( '-' == nNextCh || RTF_ISDIGIT( nNextCh ) )
404 bRTF_InTextRead = true;
406 String sSave( aToken );
407 nNextCh = '\\';
408 #ifdef DBG_UTIL
409 int nToken =
410 #endif
411 _GetNextToken();
412 DBG_ASSERT( RTF_U == nToken, "doch kein UNI-Code Zeichen" );
413 // dont convert symbol chars
414 aStrBuffer.Append(
415 static_cast< sal_Unicode >(nTokenValue));
417 // overread the next n "RTF" characters. This
418 // can be also \{, \}, \'88
419 for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
421 sal_Unicode cAnsi = nNextCh;
422 while( 0xD == cAnsi )
423 cAnsi = GetNextChar();
424 while( 0xA == cAnsi )
425 cAnsi = GetNextChar();
427 if( '\\' == cAnsi &&
428 '\'' == ( cAnsi = GetNextChar() ))
429 // HexValue ueberlesen
430 cAnsi = GetHexValue();
431 nNextCh = GetNextChar();
433 bNextCh = false;
434 aToken = sSave;
435 bRTF_InTextRead = false;
437 else
439 nNextCh = '\\';
440 bWeiter = false; // Abbrechen, String zusammen
443 break;
445 default:
446 rInput.SeekRel( -1 );
447 nNextCh = '\\';
448 bWeiter = false; // Abbrechen, String zusammen
449 break;
452 break;
454 case sal_Unicode(EOF):
455 eState = SVPAR_ERROR;
456 // weiter
457 case '{':
458 case '}':
459 bWeiter = false;
460 break;
462 case 0x0a:
463 case 0x0d:
464 break;
466 default:
467 if( nNextCh == cBreak || aStrBuffer.Len() >= MAX_STRING_LEN)
468 bWeiter = false;
469 else
471 do {
472 // alle anderen Zeichen kommen in den Text
473 aStrBuffer.Append(nNextCh);
475 if (sal_Unicode(EOF) == (nNextCh = GetNextChar()))
477 if (aStrBuffer.Len())
478 aToken += aStrBuffer;
479 return;
481 } while
483 (RTF_ISALPHA(nNextCh) || RTF_ISDIGIT(nNextCh)) &&
484 (aStrBuffer.Len() < MAX_STRING_LEN)
486 bNextCh = false;
490 if( bWeiter && bNextCh )
491 nNextCh = GetNextChar();
494 if (aStrBuffer.Len())
495 aToken += aStrBuffer;
499 short SvRTFParser::_inSkipGroup=0;
501 void SvRTFParser::SkipGroup()
503 short nBrackets=1;
504 if (_inSkipGroup>0)
505 return;
506 _inSkipGroup++;
507 //#i16185# fecking \bin keyword
510 switch (nNextCh)
512 case '{':
513 ++nBrackets;
514 break;
515 case '}':
516 if (!--nBrackets) {
517 _inSkipGroup--;
518 return;
520 break;
522 int nToken = _GetNextToken();
523 if (nToken == RTF_BIN)
525 rInput.SeekRel(-1);
526 rInput.SeekRel(nTokenValue);
527 nNextCh = GetNextChar();
529 while (nNextCh==0xa || nNextCh==0xd)
531 nNextCh = GetNextChar();
533 } while (sal_Unicode(EOF) != nNextCh && IsParserWorking());
535 if( SVPAR_PENDING != eState && '}' != nNextCh )
536 eState = SVPAR_ERROR;
537 _inSkipGroup--;
540 void SvRTFParser::ReadUnknownData() { SkipGroup(); }
541 void SvRTFParser::ReadBitmapData() { SkipGroup(); }
542 void SvRTFParser::ReadOLEData() { SkipGroup(); }
545 SvParserState SvRTFParser::CallParser()
547 sal_Char cFirstCh;
548 nNextChPos = rInput.Tell();
549 rInput >> cFirstCh; nNextCh = cFirstCh;
550 eState = SVPAR_WORKING;
551 nOpenBrakets = 0;
552 SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 );
553 eUNICodeSet = RTL_TEXTENCODING_MS_1252; // default ist ANSI-CodeSet
555 // die 1. beiden Token muessen '{' und \\rtf sein !!
556 if( '{' == GetNextToken() && RTF_RTF == GetNextToken() )
558 AddRef();
559 Continue( 0 );
560 if( SVPAR_PENDING != eState )
561 ReleaseRef(); // dann brauchen wir den Parser nicht mehr!
563 else
564 eState = SVPAR_ERROR;
566 return eState;
569 void SvRTFParser::Continue( int nToken )
571 // DBG_ASSERT( SVPAR_CS_DONTKNOW == GetCharSet(),
572 // "Zeichensatz wurde geaendert." );
574 if( !nToken )
575 nToken = GetNextToken();
577 while( IsParserWorking() )
579 SaveState( nToken );
580 switch( nToken )
582 case '}':
583 if( nOpenBrakets )
584 goto NEXTTOKEN;
585 eState = SVPAR_ACCEPTED;
586 break;
588 case '{':
589 // eine unbekannte Gruppe ?
591 if( RTF_IGNOREFLAG != GetNextToken() )
592 nToken = SkipToken( -1 );
593 else if( RTF_UNKNOWNCONTROL != GetNextToken() )
594 nToken = SkipToken( -2 );
595 else
597 // gleich herausfiltern
598 ReadUnknownData();
599 nToken = GetNextToken();
600 if( '}' != nToken )
601 eState = SVPAR_ERROR;
602 break; // auf zum naechsten Token!!
605 goto NEXTTOKEN;
607 case RTF_UNKNOWNCONTROL:
608 break; // unbekannte Token ueberspringen
609 case RTF_NEXTTYPE:
610 case RTF_ANSITYPE:
611 SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_MS_1252 );
612 break;
613 case RTF_MACTYPE:
614 SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_APPLE_ROMAN );
615 break;
616 case RTF_PCTYPE:
617 SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_437 );
618 break;
619 case RTF_PCATYPE:
620 SetSrcEncoding( eCodeSet = RTL_TEXTENCODING_IBM_850 );
621 break;
622 case RTF_ANSICPG:
623 eCodeSet = rtl_getTextEncodingFromWindowsCodePage(nTokenValue);
624 SetSrcEncoding(eCodeSet);
625 break;
626 default:
627 NEXTTOKEN:
628 NextToken( nToken );
629 break;
631 if( IsParserWorking() )
632 SaveState( 0 ); // bis hierhin abgearbeitet,
633 // weiter mit neuem Token!
634 nToken = GetNextToken();
636 if( SVPAR_ACCEPTED == eState && 0 < nOpenBrakets )
637 eState = SVPAR_ERROR;
640 void SvRTFParser::SetEncoding( rtl_TextEncoding eEnc )
642 if (eEnc == RTL_TEXTENCODING_DONTKNOW)
643 eEnc = GetCodeSet();
645 if (!aParserStates.empty())
646 aParserStates.top().eCodeSet = eEnc;
647 SetSrcEncoding(eEnc);
650 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */