Update ooo320-m1
[ooovba.git] / basic / source / comp / scanner.cxx
blob07033cd7eafc6879743d9d6cb00d4d32d32d93bb
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: scanner.cxx,v $
10 * $Revision: 1.25 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_basic.hxx"
34 #include "sbcomp.hxx"
35 #include <stdio.h>
36 #include <string.h>
37 #include <ctype.h>
38 #if defined UNX
39 #include <stdlib.h>
40 #else
41 #include <math.h> // atof()
42 #endif
43 #include <rtl/math.hxx>
44 #include <vcl/svapp.hxx>
45 #include <unotools/charclass.hxx>
47 #include <runtime.hxx>
49 SbiScanner::SbiScanner( const ::rtl::OUString& rBuf, StarBASIC* p ) : aBuf( rBuf )
51 pBasic = p;
52 pLine = NULL;
53 nVal = 0;
54 eScanType = SbxVARIANT;
55 nErrors = 0;
56 nBufPos = 0;
57 nCurCol1 = 0;
58 nSavedCol1 = 0;
59 nColLock = 0;
60 nLine = 0;
61 nCol1 = 0;
62 nCol2 = 0;
63 nCol = 0;
64 bError =
65 bAbort =
66 bSpaces =
67 bNumber =
68 bSymbol =
69 bUsedForHilite =
70 bCompatible =
71 bVBASupportOn =
72 bPrevLineExtentsComment = FALSE;
73 bHash =
74 bErrors = TRUE;
77 SbiScanner::~SbiScanner()
80 void SbiScanner::LockColumn()
82 if( !nColLock++ )
83 nSavedCol1 = nCol1;
86 void SbiScanner::UnlockColumn()
88 if( nColLock )
89 nColLock--;
92 void SbiScanner::GenError( SbError code )
94 if( GetSbData()->bBlockCompilerError )
96 bAbort = TRUE;
97 return;
99 if( !bError && bErrors )
101 BOOL bRes = TRUE;
102 // Nur einen Fehler pro Statement reporten
103 bError = TRUE;
104 if( pBasic )
106 // Falls EXPECTED oder UNEXPECTED kommen sollte, bezieht es sich
107 // immer auf das letzte Token, also die Col1 uebernehmen
108 USHORT nc = nColLock ? nSavedCol1 : nCol1;
109 switch( code )
111 case SbERR_EXPECTED:
112 case SbERR_UNEXPECTED:
113 case SbERR_SYMBOL_EXPECTED:
114 case SbERR_LABEL_EXPECTED:
115 nc = nCol1;
116 if( nc > nCol2 ) nCol2 = nc;
117 break;
119 bRes = pBasic->CError( code, aError, nLine, nc, nCol2 );
121 bAbort |= !bRes |
122 ( code == SbERR_NO_MEMORY || code == SbERR_PROG_TOO_LARGE );
124 if( bErrors )
125 nErrors++;
128 // Falls sofort ein Doppelpunkt folgt, wird TRUE zurueckgeliefert.
129 // Wird von SbiTokenizer::MayBeLabel() verwendet, um einen Label zu erkennen
131 BOOL SbiScanner::DoesColonFollow()
133 if( pLine && *pLine == ':' )
135 pLine++; nCol++; return TRUE;
137 else return FALSE;
140 // Testen auf ein legales Suffix
142 static SbxDataType GetSuffixType( sal_Unicode c )
144 static String aSuffixesStr = String::CreateFromAscii( "%&!#@ $" );
145 if( c )
147 sal_uInt32 n = aSuffixesStr.Search( c );
148 if( STRING_NOTFOUND != n && c != ' ' )
149 return SbxDataType( (USHORT) n + SbxINTEGER );
151 return SbxVARIANT;
154 // Einlesen des naechsten Symbols in die Variablen aSym, nVal und eType
155 // Returnwert ist FALSE bei EOF oder Fehlern
156 #define BUF_SIZE 80
158 BOOL SbiScanner::NextSym()
160 // Fuer den EOLN-Fall merken
161 USHORT nOldLine = nLine;
162 USHORT nOldCol1 = nCol1;
163 USHORT nOldCol2 = nCol2;
164 sal_Unicode buf[ BUF_SIZE ], *p = buf;
165 bHash = FALSE;
167 eScanType = SbxVARIANT;
168 aSym.Erase();
169 bSymbol =
170 bNumber = bSpaces = FALSE;
172 // Zeile einlesen?
173 if( !pLine )
175 INT32 n = nBufPos;
176 INT32 nLen = aBuf.getLength();
177 if( nBufPos >= nLen )
178 return FALSE;
179 const sal_Unicode* p2 = aBuf.getStr();
180 p2 += n;
181 while( ( n < nLen ) && ( *p2 != '\n' ) && ( *p2 != '\r' ) )
182 p2++, n++;
183 aLine = aBuf.copy( nBufPos, n - nBufPos );
184 if( n < nLen )
186 if( *p2 == '\r' && *( p2+1 ) == '\n' )
187 n += 2;
188 else
189 n++;
191 nBufPos = n;
192 pLine = aLine.getStr();
193 nOldLine = ++nLine;
194 nCol = nCol1 = nCol2 = nOldCol1 = nOldCol2 = 0;
195 nColLock = 0;
198 // Leerstellen weg:
199 while( *pLine && (( *pLine == ' ' ) || ( *pLine == '\t' ) || ( *pLine == '\f' )) )
200 pLine++, nCol++, bSpaces = TRUE;
202 nCol1 = nCol;
204 // nur Leerzeile?
205 if( !*pLine )
206 goto eoln;
208 if( bPrevLineExtentsComment )
209 goto PrevLineCommentLbl;
211 if( *pLine == '#' )
213 pLine++;
214 nCol++;
215 bHash = TRUE;
218 // Symbol? Dann Zeichen kopieren.
219 if( BasicSimpleCharClass::isAlpha( *pLine, bCompatible ) || *pLine == '_' )
221 // Wenn nach '_' nichts kommt, ist es ein Zeilenabschluss!
222 if( *pLine == '_' && !*(pLine+1) )
223 { pLine++;
224 goto eoln; }
225 bSymbol = TRUE;
226 short n = nCol;
227 for ( ; (BasicSimpleCharClass::isAlphaNumeric( *pLine, bCompatible ) || ( *pLine == '_' ) ); pLine++ )
228 nCol++;
229 aSym = aLine.copy( n, nCol - n );
230 // Abschliessendes '_' durch Space ersetzen, wenn Zeilenende folgt
231 // (sonst falsche Zeilenfortsetzung)
232 if( !bUsedForHilite && !*pLine && *(pLine-1) == '_' )
234 aSym.GetBufferAccess(); // #109693 force copy if necessary
235 *((sal_Unicode*)(pLine-1)) = ' '; // cast wegen const
237 // Typkennung?
238 // Das Ausrufezeichen bitte nicht testen, wenn
239 // danach noch ein Symbol anschliesst
240 else if( *pLine != '!' || !BasicSimpleCharClass::isAlpha( pLine[ 1 ], bCompatible ) )
242 SbxDataType t = GetSuffixType( *pLine );
243 if( t != SbxVARIANT )
245 eScanType = t;
246 pLine++;
247 nCol++;
252 // Zahl? Dann einlesen und konvertieren.
253 else if( BasicSimpleCharClass::isDigit( *pLine & 0xFF )
254 || ( *pLine == '.' && BasicSimpleCharClass::isDigit( *(pLine+1) & 0xFF ) ) )
256 short exp = 0;
257 short comma = 0;
258 short ndig = 0;
259 short ncdig = 0;
260 eScanType = SbxDOUBLE;
261 BOOL bBufOverflow = FALSE;
262 while( strchr( "0123456789.DEde", *pLine ) && *pLine )
264 // AB 4.1.1996: Buffer voll? -> leer weiter scannen
265 if( (p-buf) == (BUF_SIZE-1) )
267 bBufOverflow = TRUE;
268 pLine++, nCol++;
269 continue;
271 // Komma oder Exponent?
272 if( *pLine == '.' )
274 if( ++comma > 1 )
276 pLine++; nCol++; continue;
278 else *p++ = *pLine++, nCol++;
280 else if( strchr( "DdEe", *pLine ) )
282 if (++exp > 1)
284 pLine++; nCol++; continue;
286 // if( toupper( *pLine ) == 'D' )
287 // eScanType = SbxDOUBLE;
288 *p++ = 'E'; pLine++; nCol++;
289 // Vorzeichen hinter Exponent?
290 if( *pLine == '+' )
291 pLine++, nCol++;
292 else
293 if( *pLine == '-' )
294 *p++ = *pLine++, nCol++;
296 else
298 *p++ = *pLine++, nCol++;
299 if( comma && !exp ) ncdig++;
301 if (!exp) ndig++;
303 *p = 0;
304 aSym = p; bNumber = TRUE;
305 // Komma, Exponent mehrfach vorhanden?
306 if( comma > 1 || exp > 1 )
307 { aError = '.';
308 GenError( SbERR_BAD_CHAR_IN_NUMBER ); }
310 // #57844 Lokalisierte Funktion benutzen
311 nVal = rtl_math_uStringToDouble( buf, buf+(p-buf), '.', ',', NULL, NULL );
312 // ALT: nVal = atof( buf );
314 ndig = ndig - comma;
315 if( !comma && !exp )
317 if( nVal >= SbxMININT && nVal <= SbxMAXINT )
318 eScanType = SbxINTEGER;
319 else
320 if( nVal >= SbxMINLNG && nVal <= SbxMAXLNG )
321 eScanType = SbxLONG;
323 if( bBufOverflow )
324 GenError( SbERR_MATH_OVERFLOW );
325 // zu viele Zahlen fuer SINGLE?
326 // if (ndig > 15 || ncdig > 6)
327 // eScanType = SbxDOUBLE;
328 // else
329 // if( nVal > SbxMAXSNG || nVal < SbxMINSNG )
330 // eScanType = SbxDOUBLE;
332 // Typkennung?
333 SbxDataType t = GetSuffixType( *pLine );
334 if( t != SbxVARIANT )
336 eScanType = t;
337 pLine++;
338 nCol++;
342 // Hex/Oktalzahl? Einlesen und konvertieren:
343 else if( *pLine == '&' )
345 pLine++; nCol++;
346 sal_Unicode cmp1[] = { '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F', 0 };
347 sal_Unicode cmp2[] = { '0', '1', '2', '3', '4', '5', '6', '7', 0 };
348 sal_Unicode *cmp = cmp1;
349 //char *cmp = "0123456789ABCDEF";
350 sal_Unicode base = 16;
351 sal_Unicode ndig = 8;
352 sal_Unicode xch = *pLine++ & 0xFF; nCol++;
353 switch( toupper( xch ) )
355 case 'O':
356 cmp = cmp2; base = 8; ndig = 11; break;
357 //cmp = "01234567"; base = 8; ndig = 11; break;
358 case 'H':
359 break;
360 default :
361 // Wird als Operator angesehen
362 pLine--; nCol--; nCol1 = nCol-1; aSym = '&'; return SYMBOL;
364 bNumber = TRUE;
365 long l = 0;
366 int i;
367 BOOL bBufOverflow = FALSE;
368 while( BasicSimpleCharClass::isAlphaNumeric( *pLine & 0xFF, bCompatible ) )
370 sal_Unicode ch = sal::static_int_cast< sal_Unicode >(
371 toupper( *pLine & 0xFF ) );
372 pLine++; nCol++;
373 // AB 4.1.1996: Buffer voll, leer weiter scannen
374 if( (p-buf) == (BUF_SIZE-1) )
375 bBufOverflow = TRUE;
376 else if( String( cmp ).Search( ch ) != STRING_NOTFOUND )
377 //else if( strchr( cmp, ch ) )
378 *p++ = ch;
379 else
381 aError = ch;
382 GenError( SbERR_BAD_CHAR_IN_NUMBER );
385 *p = 0;
386 for( p = buf; *p; p++ )
388 i = (*p & 0xFF) - '0';
389 if( i > 9 ) i -= 7;
390 l = ( l * base ) + i;
391 if( !ndig-- )
393 GenError( SbERR_MATH_OVERFLOW ); break;
396 if( *pLine == '&' ) pLine++, nCol++;
397 nVal = (double) l;
398 eScanType = ( l >= SbxMININT && l <= SbxMAXINT ) ? SbxINTEGER : SbxLONG;
399 if( bBufOverflow )
400 GenError( SbERR_MATH_OVERFLOW );
403 // Strings:
404 else if( *pLine == '"' || *pLine == '[' )
406 sal_Unicode cSep = *pLine;
407 if( cSep == '[' )
408 bSymbol = TRUE, cSep = ']';
409 short n = nCol+1;
410 while( *pLine )
412 do pLine++, nCol++;
413 while( *pLine && ( *pLine != cSep ) );
414 if( *pLine == cSep )
416 pLine++; nCol++;
417 if( *pLine != cSep || cSep == ']' ) break;
418 } else aError = cSep, GenError( SbERR_EXPECTED );
420 // If VBA Interop then doen't eat the [] chars
421 if ( cSep == ']' && bVBASupportOn )
422 aSym = aLine.copy( n - 1, nCol - n + 1);
423 else
424 aSym = aLine.copy( n, nCol - n - 1 );
425 // Doppelte Stringbegrenzer raus
426 String s( cSep );
427 s += cSep;
428 USHORT nIdx = 0;
431 nIdx = aSym.Search( s, nIdx );
432 if( nIdx == STRING_NOTFOUND )
433 break;
434 aSym.Erase( nIdx, 1 );
435 nIdx++;
437 while( true );
438 if( cSep != ']' )
439 eScanType = ( cSep == '#' ) ? SbxDATE : SbxSTRING;
441 // ungueltige Zeichen:
442 else if( ( *pLine & 0xFF ) >= 0x7F )
444 GenError( SbERR_SYNTAX ); pLine++; nCol++;
446 // andere Gruppen:
447 else
449 short n = 1;
450 switch( *pLine++ )
452 case '<': if( *pLine == '>' || *pLine == '=' ) n = 2; break;
453 case '>': if( *pLine == '=' ) n = 2; break;
454 case ':': if( *pLine == '=' ) n = 2; break;
456 aSym = aLine.copy( nCol, n );
457 pLine += n-1; nCol = nCol + n;
460 nCol2 = nCol-1;
462 PrevLineCommentLbl:
463 // Kommentar?
464 if( bPrevLineExtentsComment || (eScanType != SbxSTRING &&
465 ( aSym.GetBuffer()[0] == '\'' || aSym.EqualsIgnoreCaseAscii( "REM" ) ) ) )
467 bPrevLineExtentsComment = FALSE;
468 aSym = String::CreateFromAscii( "REM" );
469 USHORT nLen = String( pLine ).Len();
470 if( bCompatible && pLine[ nLen - 1 ] == '_' && pLine[ nLen - 2 ] == ' ' )
471 bPrevLineExtentsComment = TRUE;
472 nCol2 = nCol2 + nLen;
473 pLine = NULL;
475 return TRUE;
477 // Sonst Zeilen-Ende: aber bitte auf '_' testen, ob die
478 // Zeile nicht weitergeht!
479 eoln:
480 if( nCol && *--pLine == '_' )
482 pLine = NULL;
483 bool bRes = NextSym();
484 if( bVBASupportOn && aSym.GetBuffer()[0] == '.' )
486 // object _
487 // .Method
488 // ^^^ <- spaces is legal in MSO VBA
489 OSL_TRACE("*** resetting bSpaces***");
490 bSpaces = FALSE;
492 return bRes;
494 else
496 pLine = NULL;
497 nLine = nOldLine;
498 nCol1 = nOldCol1;
499 nCol2 = nOldCol2;
500 aSym = '\n';
501 nColLock = 0;
502 return TRUE;
506 LetterTable BasicSimpleCharClass::aLetterTable;
508 LetterTable::LetterTable( void )
510 for( int i = 0 ; i < 256 ; ++i )
511 IsLetterTab[i] = false;
513 IsLetterTab[0xC0] = true; // À , CAPITAL LETTER A WITH GRAVE ACCENT
514 IsLetterTab[0xC1] = true; // Á , CAPITAL LETTER A WITH ACUTE ACCENT
515 IsLetterTab[0xC2] = true; // Â , CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
516 IsLetterTab[0xC3] = true; // Ã , CAPITAL LETTER A WITH TILDE
517 IsLetterTab[0xC4] = true; // Ä , CAPITAL LETTER A WITH DIAERESIS
518 IsLetterTab[0xC5] = true; // Å , CAPITAL LETTER A WITH RING ABOVE
519 IsLetterTab[0xC6] = true; // Æ , CAPITAL LIGATURE AE
520 IsLetterTab[0xC7] = true; // Ç , CAPITAL LETTER C WITH CEDILLA
521 IsLetterTab[0xC8] = true; // È , CAPITAL LETTER E WITH GRAVE ACCENT
522 IsLetterTab[0xC9] = true; // É , CAPITAL LETTER E WITH ACUTE ACCENT
523 IsLetterTab[0xCA] = true; // Ê , CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
524 IsLetterTab[0xCB] = true; // Ë , CAPITAL LETTER E WITH DIAERESIS
525 IsLetterTab[0xCC] = true; // Ì , CAPITAL LETTER I WITH GRAVE ACCENT
526 IsLetterTab[0xCD] = true; // Í , CAPITAL LETTER I WITH ACUTE ACCENT
527 IsLetterTab[0xCE] = true; // Î , CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
528 IsLetterTab[0xCF] = true; // Ï , CAPITAL LETTER I WITH DIAERESIS
529 IsLetterTab[0xD0] = true; // Ð , CAPITAL LETTER ETH
530 IsLetterTab[0xD1] = true; // Ñ , CAPITAL LETTER N WITH TILDE
531 IsLetterTab[0xD2] = true; // Ò , CAPITAL LETTER O WITH GRAVE ACCENT
532 IsLetterTab[0xD3] = true; // Ó , CAPITAL LETTER O WITH ACUTE ACCENT
533 IsLetterTab[0xD4] = true; // Ô , CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
534 IsLetterTab[0xD5] = true; // Õ , CAPITAL LETTER O WITH TILDE
535 IsLetterTab[0xD6] = true; // Ö , CAPITAL LETTER O WITH DIAERESIS
536 IsLetterTab[0xD8] = true; // Ø , CAPITAL LETTER O WITH STROKE
537 IsLetterTab[0xD9] = true; // Ù , CAPITAL LETTER U WITH GRAVE ACCENT
538 IsLetterTab[0xDA] = true; // Ú , CAPITAL LETTER U WITH ACUTE ACCENT
539 IsLetterTab[0xDB] = true; // Û , CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
540 IsLetterTab[0xDC] = true; // Ü , CAPITAL LETTER U WITH DIAERESIS
541 IsLetterTab[0xDD] = true; // Ý , CAPITAL LETTER Y WITH ACUTE ACCENT
542 IsLetterTab[0xDE] = true; // Þ , CAPITAL LETTER THORN
543 IsLetterTab[0xDF] = true; // ß , SMALL LETTER SHARP S
544 IsLetterTab[0xE0] = true; // à , SMALL LETTER A WITH GRAVE ACCENT
545 IsLetterTab[0xE1] = true; // á , SMALL LETTER A WITH ACUTE ACCENT
546 IsLetterTab[0xE2] = true; // â , SMALL LETTER A WITH CIRCUMFLEX ACCENT
547 IsLetterTab[0xE3] = true; // ã , SMALL LETTER A WITH TILDE
548 IsLetterTab[0xE4] = true; // ä , SMALL LETTER A WITH DIAERESIS
549 IsLetterTab[0xE5] = true; // å , SMALL LETTER A WITH RING ABOVE
550 IsLetterTab[0xE6] = true; // æ , SMALL LIGATURE AE
551 IsLetterTab[0xE7] = true; // ç , SMALL LETTER C WITH CEDILLA
552 IsLetterTab[0xE8] = true; // è , SMALL LETTER E WITH GRAVE ACCENT
553 IsLetterTab[0xE9] = true; // é , SMALL LETTER E WITH ACUTE ACCENT
554 IsLetterTab[0xEA] = true; // ê , SMALL LETTER E WITH CIRCUMFLEX ACCENT
555 IsLetterTab[0xEB] = true; // ë , SMALL LETTER E WITH DIAERESIS
556 IsLetterTab[0xEC] = true; // ì , SMALL LETTER I WITH GRAVE ACCENT
557 IsLetterTab[0xED] = true; // í , SMALL LETTER I WITH ACUTE ACCENT
558 IsLetterTab[0xEE] = true; // î , SMALL LETTER I WITH CIRCUMFLEX ACCENT
559 IsLetterTab[0xEF] = true; // ï , SMALL LETTER I WITH DIAERESIS
560 IsLetterTab[0xF0] = true; // ð , SMALL LETTER ETH
561 IsLetterTab[0xF1] = true; // ñ , SMALL LETTER N WITH TILDE
562 IsLetterTab[0xF2] = true; // ò , SMALL LETTER O WITH GRAVE ACCENT
563 IsLetterTab[0xF3] = true; // ó , SMALL LETTER O WITH ACUTE ACCENT
564 IsLetterTab[0xF4] = true; // ô , SMALL LETTER O WITH CIRCUMFLEX ACCENT
565 IsLetterTab[0xF5] = true; // õ , SMALL LETTER O WITH TILDE
566 IsLetterTab[0xF6] = true; // ö , SMALL LETTER O WITH DIAERESIS
567 IsLetterTab[0xF8] = true; // ø , SMALL LETTER O WITH OBLIQUE BAR
568 IsLetterTab[0xF9] = true; // ù , SMALL LETTER U WITH GRAVE ACCENT
569 IsLetterTab[0xFA] = true; // ú , SMALL LETTER U WITH ACUTE ACCENT
570 IsLetterTab[0xFB] = true; // û , SMALL LETTER U WITH CIRCUMFLEX ACCENT
571 IsLetterTab[0xFC] = true; // ü , SMALL LETTER U WITH DIAERESIS
572 IsLetterTab[0xFD] = true; // ý , SMALL LETTER Y WITH ACUTE ACCENT
573 IsLetterTab[0xFE] = true; // þ , SMALL LETTER THORN
574 IsLetterTab[0xFF] = true; // ÿ , SMALL LETTER Y WITH DIAERESIS
577 bool LetterTable::isLetterUnicode( sal_Unicode c )
579 static CharClass* pCharClass = NULL;
580 if( pCharClass == NULL )
581 pCharClass = new CharClass( Application::GetSettings().GetLocale() );
582 String aStr( c );
583 bool bRet = pCharClass->isLetter( aStr, 0 );
584 return bRet;