1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: scanner.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_basic.hxx"
41 #include <math.h> // atof()
43 #include <rtl/math.hxx>
44 #include <vcl/svapp.hxx>
45 #include <unotools/charclass.hxx>
47 #include <runtime.hxx>
49 SbiScanner::SbiScanner( const ::rtl::OUString
& rBuf
, StarBASIC
* p
) : aBuf( rBuf
)
54 eScanType
= SbxVARIANT
;
72 bPrevLineExtentsComment
= FALSE
;
77 SbiScanner::~SbiScanner()
80 void SbiScanner::LockColumn()
86 void SbiScanner::UnlockColumn()
92 void SbiScanner::GenError( SbError code
)
94 if( GetSbData()->bBlockCompilerError
)
99 if( !bError
&& bErrors
)
102 // Nur einen Fehler pro Statement reporten
106 // Falls EXPECTED oder UNEXPECTED kommen sollte, bezieht es sich
107 // immer auf das letzte Token, also die Col1 uebernehmen
108 USHORT nc
= nColLock
? nSavedCol1
: nCol1
;
112 case SbERR_UNEXPECTED
:
113 case SbERR_SYMBOL_EXPECTED
:
114 case SbERR_LABEL_EXPECTED
:
116 if( nc
> nCol2
) nCol2
= nc
;
119 bRes
= pBasic
->CError( code
, aError
, nLine
, nc
, nCol2
);
122 ( code
== SbERR_NO_MEMORY
|| code
== SbERR_PROG_TOO_LARGE
);
128 // Falls sofort ein Doppelpunkt folgt, wird TRUE zurueckgeliefert.
129 // Wird von SbiTokenizer::MayBeLabel() verwendet, um einen Label zu erkennen
131 BOOL
SbiScanner::DoesColonFollow()
133 if( pLine
&& *pLine
== ':' )
135 pLine
++; nCol
++; return TRUE
;
140 // Testen auf ein legales Suffix
142 static SbxDataType
GetSuffixType( sal_Unicode c
)
144 static String aSuffixesStr
= String::CreateFromAscii( "%&!#@ $" );
147 sal_uInt32 n
= aSuffixesStr
.Search( c
);
148 if( STRING_NOTFOUND
!= n
&& c
!= ' ' )
149 return SbxDataType( (USHORT
) n
+ SbxINTEGER
);
154 // Einlesen des naechsten Symbols in die Variablen aSym, nVal und eType
155 // Returnwert ist FALSE bei EOF oder Fehlern
158 BOOL
SbiScanner::NextSym()
160 // Fuer den EOLN-Fall merken
161 USHORT nOldLine
= nLine
;
162 USHORT nOldCol1
= nCol1
;
163 USHORT nOldCol2
= nCol2
;
164 sal_Unicode buf
[ BUF_SIZE
], *p
= buf
;
167 eScanType
= SbxVARIANT
;
170 bNumber
= bSpaces
= FALSE
;
176 INT32 nLen
= aBuf
.getLength();
177 if( nBufPos
>= nLen
)
179 const sal_Unicode
* p2
= aBuf
.getStr();
181 while( ( n
< nLen
) && ( *p2
!= '\n' ) && ( *p2
!= '\r' ) )
183 aLine
= aBuf
.copy( nBufPos
, n
- nBufPos
);
186 if( *p2
== '\r' && *( p2
+1 ) == '\n' )
192 pLine
= aLine
.getStr();
194 nCol
= nCol1
= nCol2
= nOldCol1
= nOldCol2
= 0;
199 while( *pLine
&& (( *pLine
== ' ' ) || ( *pLine
== '\t' ) || ( *pLine
== '\f' )) )
200 pLine
++, nCol
++, bSpaces
= TRUE
;
208 if( bPrevLineExtentsComment
)
209 goto PrevLineCommentLbl
;
218 // Symbol? Dann Zeichen kopieren.
219 if( BasicSimpleCharClass::isAlpha( *pLine
, bCompatible
) || *pLine
== '_' )
221 // Wenn nach '_' nichts kommt, ist es ein Zeilenabschluss!
222 if( *pLine
== '_' && !*(pLine
+1) )
227 for ( ; (BasicSimpleCharClass::isAlphaNumeric( *pLine
, bCompatible
) || ( *pLine
== '_' ) ); pLine
++ )
229 aSym
= aLine
.copy( n
, nCol
- n
);
230 // Abschliessendes '_' durch Space ersetzen, wenn Zeilenende folgt
231 // (sonst falsche Zeilenfortsetzung)
232 if( !bUsedForHilite
&& !*pLine
&& *(pLine
-1) == '_' )
234 aSym
.GetBufferAccess(); // #109693 force copy if necessary
235 *((sal_Unicode
*)(pLine
-1)) = ' '; // cast wegen const
238 // Das Ausrufezeichen bitte nicht testen, wenn
239 // danach noch ein Symbol anschliesst
240 else if( *pLine
!= '!' || !BasicSimpleCharClass::isAlpha( pLine
[ 1 ], bCompatible
) )
242 SbxDataType t
= GetSuffixType( *pLine
);
243 if( t
!= SbxVARIANT
)
252 // Zahl? Dann einlesen und konvertieren.
253 else if( BasicSimpleCharClass::isDigit( *pLine
& 0xFF )
254 || ( *pLine
== '.' && BasicSimpleCharClass::isDigit( *(pLine
+1) & 0xFF ) ) )
260 eScanType
= SbxDOUBLE
;
261 BOOL bBufOverflow
= FALSE
;
262 while( strchr( "0123456789.DEde", *pLine
) && *pLine
)
264 // AB 4.1.1996: Buffer voll? -> leer weiter scannen
265 if( (p
-buf
) == (BUF_SIZE
-1) )
271 // Komma oder Exponent?
276 pLine
++; nCol
++; continue;
278 else *p
++ = *pLine
++, nCol
++;
280 else if( strchr( "DdEe", *pLine
) )
284 pLine
++; nCol
++; continue;
286 // if( toupper( *pLine ) == 'D' )
287 // eScanType = SbxDOUBLE;
288 *p
++ = 'E'; pLine
++; nCol
++;
289 // Vorzeichen hinter Exponent?
294 *p
++ = *pLine
++, nCol
++;
298 *p
++ = *pLine
++, nCol
++;
299 if( comma
&& !exp
) ncdig
++;
304 aSym
= p
; bNumber
= TRUE
;
305 // Komma, Exponent mehrfach vorhanden?
306 if( comma
> 1 || exp
> 1 )
308 GenError( SbERR_BAD_CHAR_IN_NUMBER
); }
310 // #57844 Lokalisierte Funktion benutzen
311 nVal
= rtl_math_uStringToDouble( buf
, buf
+(p
-buf
), '.', ',', NULL
, NULL
);
312 // ALT: nVal = atof( buf );
317 if( nVal
>= SbxMININT
&& nVal
<= SbxMAXINT
)
318 eScanType
= SbxINTEGER
;
320 if( nVal
>= SbxMINLNG
&& nVal
<= SbxMAXLNG
)
324 GenError( SbERR_MATH_OVERFLOW
);
325 // zu viele Zahlen fuer SINGLE?
326 // if (ndig > 15 || ncdig > 6)
327 // eScanType = SbxDOUBLE;
329 // if( nVal > SbxMAXSNG || nVal < SbxMINSNG )
330 // eScanType = SbxDOUBLE;
333 SbxDataType t
= GetSuffixType( *pLine
);
334 if( t
!= SbxVARIANT
)
342 // Hex/Oktalzahl? Einlesen und konvertieren:
343 else if( *pLine
== '&' )
346 sal_Unicode cmp1
[] = { '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F', 0 };
347 sal_Unicode cmp2
[] = { '0', '1', '2', '3', '4', '5', '6', '7', 0 };
348 sal_Unicode
*cmp
= cmp1
;
349 //char *cmp = "0123456789ABCDEF";
350 sal_Unicode base
= 16;
351 sal_Unicode ndig
= 8;
352 sal_Unicode xch
= *pLine
++ & 0xFF; nCol
++;
353 switch( toupper( xch
) )
356 cmp
= cmp2
; base
= 8; ndig
= 11; break;
357 //cmp = "01234567"; base = 8; ndig = 11; break;
361 // Wird als Operator angesehen
362 pLine
--; nCol
--; nCol1
= nCol
-1; aSym
= '&'; return SYMBOL
;
367 BOOL bBufOverflow
= FALSE
;
368 while( BasicSimpleCharClass::isAlphaNumeric( *pLine
& 0xFF, bCompatible
) )
370 sal_Unicode ch
= sal::static_int_cast
< sal_Unicode
>(
371 toupper( *pLine
& 0xFF ) );
373 // AB 4.1.1996: Buffer voll, leer weiter scannen
374 if( (p
-buf
) == (BUF_SIZE
-1) )
376 else if( String( cmp
).Search( ch
) != STRING_NOTFOUND
)
377 //else if( strchr( cmp, ch ) )
382 GenError( SbERR_BAD_CHAR_IN_NUMBER
);
386 for( p
= buf
; *p
; p
++ )
388 i
= (*p
& 0xFF) - '0';
390 l
= ( l
* base
) + i
;
393 GenError( SbERR_MATH_OVERFLOW
); break;
396 if( *pLine
== '&' ) pLine
++, nCol
++;
398 eScanType
= ( l
>= SbxMININT
&& l
<= SbxMAXINT
) ? SbxINTEGER
: SbxLONG
;
400 GenError( SbERR_MATH_OVERFLOW
);
404 else if( *pLine
== '"' || *pLine
== '[' )
406 sal_Unicode cSep
= *pLine
;
408 bSymbol
= TRUE
, cSep
= ']';
413 while( *pLine
&& ( *pLine
!= cSep
) );
417 if( *pLine
!= cSep
|| cSep
== ']' ) break;
418 } else aError
= cSep
, GenError( SbERR_EXPECTED
);
420 // If VBA Interop then doen't eat the [] chars
421 if ( cSep
== ']' && bVBASupportOn
)
422 aSym
= aLine
.copy( n
- 1, nCol
- n
+ 1);
424 aSym
= aLine
.copy( n
, nCol
- n
- 1 );
425 // Doppelte Stringbegrenzer raus
431 nIdx
= aSym
.Search( s
, nIdx
);
432 if( nIdx
== STRING_NOTFOUND
)
434 aSym
.Erase( nIdx
, 1 );
439 eScanType
= ( cSep
== '#' ) ? SbxDATE
: SbxSTRING
;
441 // ungueltige Zeichen:
442 else if( ( *pLine
& 0xFF ) >= 0x7F )
444 GenError( SbERR_SYNTAX
); pLine
++; nCol
++;
452 case '<': if( *pLine
== '>' || *pLine
== '=' ) n
= 2; break;
453 case '>': if( *pLine
== '=' ) n
= 2; break;
454 case ':': if( *pLine
== '=' ) n
= 2; break;
456 aSym
= aLine
.copy( nCol
, n
);
457 pLine
+= n
-1; nCol
= nCol
+ n
;
464 if( bPrevLineExtentsComment
|| (eScanType
!= SbxSTRING
&&
465 ( aSym
.GetBuffer()[0] == '\'' || aSym
.EqualsIgnoreCaseAscii( "REM" ) ) ) )
467 bPrevLineExtentsComment
= FALSE
;
468 aSym
= String::CreateFromAscii( "REM" );
469 USHORT nLen
= String( pLine
).Len();
470 if( bCompatible
&& pLine
[ nLen
- 1 ] == '_' && pLine
[ nLen
- 2 ] == ' ' )
471 bPrevLineExtentsComment
= TRUE
;
472 nCol2
= nCol2
+ nLen
;
477 // Sonst Zeilen-Ende: aber bitte auf '_' testen, ob die
478 // Zeile nicht weitergeht!
480 if( nCol
&& *--pLine
== '_' )
483 bool bRes
= NextSym();
484 if( bVBASupportOn
&& aSym
.GetBuffer()[0] == '.' )
488 // ^^^ <- spaces is legal in MSO VBA
489 OSL_TRACE("*** resetting bSpaces***");
506 LetterTable
BasicSimpleCharClass::aLetterTable
;
508 LetterTable::LetterTable( void )
510 for( int i
= 0 ; i
< 256 ; ++i
)
511 IsLetterTab
[i
] = false;
513 IsLetterTab
[0xC0] = true; // À , CAPITAL LETTER A WITH GRAVE ACCENT
514 IsLetterTab
[0xC1] = true; // Á , CAPITAL LETTER A WITH ACUTE ACCENT
515 IsLetterTab
[0xC2] = true; // Â , CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
516 IsLetterTab
[0xC3] = true; // Ã , CAPITAL LETTER A WITH TILDE
517 IsLetterTab
[0xC4] = true; // Ä , CAPITAL LETTER A WITH DIAERESIS
518 IsLetterTab
[0xC5] = true; // Å , CAPITAL LETTER A WITH RING ABOVE
519 IsLetterTab
[0xC6] = true; // Æ , CAPITAL LIGATURE AE
520 IsLetterTab
[0xC7] = true; // Ç , CAPITAL LETTER C WITH CEDILLA
521 IsLetterTab
[0xC8] = true; // È , CAPITAL LETTER E WITH GRAVE ACCENT
522 IsLetterTab
[0xC9] = true; // É , CAPITAL LETTER E WITH ACUTE ACCENT
523 IsLetterTab
[0xCA] = true; // Ê , CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
524 IsLetterTab
[0xCB] = true; // Ë , CAPITAL LETTER E WITH DIAERESIS
525 IsLetterTab
[0xCC] = true; // Ì , CAPITAL LETTER I WITH GRAVE ACCENT
526 IsLetterTab
[0xCD] = true; // Í , CAPITAL LETTER I WITH ACUTE ACCENT
527 IsLetterTab
[0xCE] = true; // Î , CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
528 IsLetterTab
[0xCF] = true; // Ï , CAPITAL LETTER I WITH DIAERESIS
529 IsLetterTab
[0xD0] = true; // Ð , CAPITAL LETTER ETH
530 IsLetterTab
[0xD1] = true; // Ñ , CAPITAL LETTER N WITH TILDE
531 IsLetterTab
[0xD2] = true; // Ò , CAPITAL LETTER O WITH GRAVE ACCENT
532 IsLetterTab
[0xD3] = true; // Ó , CAPITAL LETTER O WITH ACUTE ACCENT
533 IsLetterTab
[0xD4] = true; // Ô , CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
534 IsLetterTab
[0xD5] = true; // Õ , CAPITAL LETTER O WITH TILDE
535 IsLetterTab
[0xD6] = true; // Ö , CAPITAL LETTER O WITH DIAERESIS
536 IsLetterTab
[0xD8] = true; // Ø , CAPITAL LETTER O WITH STROKE
537 IsLetterTab
[0xD9] = true; // Ù , CAPITAL LETTER U WITH GRAVE ACCENT
538 IsLetterTab
[0xDA] = true; // Ú , CAPITAL LETTER U WITH ACUTE ACCENT
539 IsLetterTab
[0xDB] = true; // Û , CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
540 IsLetterTab
[0xDC] = true; // Ü , CAPITAL LETTER U WITH DIAERESIS
541 IsLetterTab
[0xDD] = true; // Ý , CAPITAL LETTER Y WITH ACUTE ACCENT
542 IsLetterTab
[0xDE] = true; // Þ , CAPITAL LETTER THORN
543 IsLetterTab
[0xDF] = true; // ß , SMALL LETTER SHARP S
544 IsLetterTab
[0xE0] = true; // à , SMALL LETTER A WITH GRAVE ACCENT
545 IsLetterTab
[0xE1] = true; // á , SMALL LETTER A WITH ACUTE ACCENT
546 IsLetterTab
[0xE2] = true; // â , SMALL LETTER A WITH CIRCUMFLEX ACCENT
547 IsLetterTab
[0xE3] = true; // ã , SMALL LETTER A WITH TILDE
548 IsLetterTab
[0xE4] = true; // ä , SMALL LETTER A WITH DIAERESIS
549 IsLetterTab
[0xE5] = true; // å , SMALL LETTER A WITH RING ABOVE
550 IsLetterTab
[0xE6] = true; // æ , SMALL LIGATURE AE
551 IsLetterTab
[0xE7] = true; // ç , SMALL LETTER C WITH CEDILLA
552 IsLetterTab
[0xE8] = true; // è , SMALL LETTER E WITH GRAVE ACCENT
553 IsLetterTab
[0xE9] = true; // é , SMALL LETTER E WITH ACUTE ACCENT
554 IsLetterTab
[0xEA] = true; // ê , SMALL LETTER E WITH CIRCUMFLEX ACCENT
555 IsLetterTab
[0xEB] = true; // ë , SMALL LETTER E WITH DIAERESIS
556 IsLetterTab
[0xEC] = true; // ì , SMALL LETTER I WITH GRAVE ACCENT
557 IsLetterTab
[0xED] = true; // í , SMALL LETTER I WITH ACUTE ACCENT
558 IsLetterTab
[0xEE] = true; // î , SMALL LETTER I WITH CIRCUMFLEX ACCENT
559 IsLetterTab
[0xEF] = true; // ï , SMALL LETTER I WITH DIAERESIS
560 IsLetterTab
[0xF0] = true; // ð , SMALL LETTER ETH
561 IsLetterTab
[0xF1] = true; // ñ , SMALL LETTER N WITH TILDE
562 IsLetterTab
[0xF2] = true; // ò , SMALL LETTER O WITH GRAVE ACCENT
563 IsLetterTab
[0xF3] = true; // ó , SMALL LETTER O WITH ACUTE ACCENT
564 IsLetterTab
[0xF4] = true; // ô , SMALL LETTER O WITH CIRCUMFLEX ACCENT
565 IsLetterTab
[0xF5] = true; // õ , SMALL LETTER O WITH TILDE
566 IsLetterTab
[0xF6] = true; // ö , SMALL LETTER O WITH DIAERESIS
567 IsLetterTab
[0xF8] = true; // ø , SMALL LETTER O WITH OBLIQUE BAR
568 IsLetterTab
[0xF9] = true; // ù , SMALL LETTER U WITH GRAVE ACCENT
569 IsLetterTab
[0xFA] = true; // ú , SMALL LETTER U WITH ACUTE ACCENT
570 IsLetterTab
[0xFB] = true; // û , SMALL LETTER U WITH CIRCUMFLEX ACCENT
571 IsLetterTab
[0xFC] = true; // ü , SMALL LETTER U WITH DIAERESIS
572 IsLetterTab
[0xFD] = true; // ý , SMALL LETTER Y WITH ACUTE ACCENT
573 IsLetterTab
[0xFE] = true; // þ , SMALL LETTER THORN
574 IsLetterTab
[0xFF] = true; // ÿ , SMALL LETTER Y WITH DIAERESIS
577 bool LetterTable::isLetterUnicode( sal_Unicode c
)
579 static CharClass
* pCharClass
= NULL
;
580 if( pCharClass
== NULL
)
581 pCharClass
= new CharClass( Application::GetSettings().GetLocale() );
583 bool bRet
= pCharClass
->isLetter( aStr
, 0 );