Update ooo320-m1
[ooovba.git] / basic / source / comp / token.cxx
blob469f537b816ecbbb75b0e3ea0cd26d6b6413414e
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: token.cxx,v $
10 * $Revision: 1.23 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_basic.hxx"
34 #include <ctype.h>
35 #include "sbcomp.hxx"
37 struct TokenTable { SbiToken t; const char *s; };
39 static short nToken; // Anzahl der Tokens
41 static TokenTable* pTokTable;
43 static TokenTable aTokTable_Basic [] = { // Token-Tabelle:
45 { CAT, "&" },
46 { MUL, "*" },
47 { PLUS, "+" },
48 { MINUS, "-" },
49 { DIV, "/" },
50 { EOS, ":" },
51 { ASSIGN, ":=" },
52 { LT, "<" },
53 { LE, "<=" },
54 { NE, "<>" },
55 { EQ, "=" },
56 { GT, ">" },
57 { GE, ">=" },
58 { ACCESS, "Access" },
59 { ALIAS, "Alias" },
60 { AND, "And" },
61 { ANY, "Any" },
62 { APPEND, "Append" },
63 { AS, "As" },
64 { ATTRIBUTE,"Attribute" },
65 { BASE, "Base" },
66 { BINARY, "Binary" },
67 { TBOOLEAN, "Boolean" },
68 { BYREF, "ByRef", },
69 { TBYTE, "Byte", },
70 { BYVAL, "ByVal", },
71 { CALL, "Call" },
72 { CASE, "Case" },
73 { _CDECL_, "Cdecl" },
74 { CLASSMODULE, "ClassModule" },
75 { CLOSE, "Close" },
76 { COMPARE, "Compare" },
77 { COMPATIBLE,"Compatible" },
78 { _CONST_, "Const" },
79 { TCURRENCY,"Currency" },
80 { TDATE, "Date" },
81 { DECLARE, "Declare" },
82 { DEFBOOL, "DefBool" },
83 { DEFCUR, "DefCur" },
84 { DEFDATE, "DefDate" },
85 { DEFDBL, "DefDbl" },
86 { DEFERR, "DefErr" },
87 { DEFINT, "DefInt" },
88 { DEFLNG, "DefLng" },
89 { DEFOBJ, "DefObj" },
90 { DEFSNG, "DefSng" },
91 { DEFSTR, "DefStr" },
92 { DEFVAR, "DefVar" },
93 { DIM, "Dim" },
94 { DO, "Do" },
95 { TDOUBLE, "Double" },
96 { EACH, "Each" },
97 { ELSE, "Else" },
98 { ELSEIF, "ElseIf" },
99 { END, "End" },
100 { ENDENUM, "End Enum" },
101 { ENDFUNC, "End Function" },
102 { ENDIF, "End If" },
103 { ENDPROPERTY, "End Property" },
104 { ENDSELECT,"End Select" },
105 { ENDSUB, "End Sub" },
106 { ENDTYPE, "End Type" },
107 { ENDIF, "EndIf" },
108 { ENUM, "Enum" },
109 { EQV, "Eqv" },
110 { ERASE, "Erase" },
111 { _ERROR_, "Error" },
112 { EXIT, "Exit" },
113 { EXPLICIT, "Explicit" },
114 { FOR, "For" },
115 { FUNCTION, "Function" },
116 { GET, "Get" },
117 { GLOBAL, "Global" },
118 { GOSUB, "GoSub" },
119 { GOTO, "GoTo" },
120 { IF, "If" },
121 { IMP, "Imp" },
122 { IMPLEMENTS, "Implements" },
123 { _IN_, "In" },
124 { INPUT, "Input" }, // auch INPUT #
125 { TINTEGER, "Integer" },
126 { IS, "Is" },
127 { LET, "Let" },
128 { LIB, "Lib" },
129 { LIKE, "Like" },
130 { LINE, "Line" },
131 { LINEINPUT,"Line Input" },
132 { LOCAL, "Local" },
133 { LOCK, "Lock" },
134 { TLONG, "Long" },
135 { LOOP, "Loop" },
136 { LPRINT, "LPrint" },
137 { LSET, "LSet" }, // JSM
138 { MOD, "Mod" },
139 { NAME, "Name" },
140 { NEW, "New" },
141 { NEXT, "Next" },
142 { NOT, "Not" },
143 { TOBJECT, "Object" },
144 { ON, "On" },
145 { OPEN, "Open" },
146 { OPTION, "Option" },
147 { _OPTIONAL_, "Optional" },
148 { OR, "Or" },
149 { OUTPUT, "Output" },
150 { PARAMARRAY, "ParamArray" },
151 { PRESERVE, "Preserve" },
152 { PRINT, "Print" },
153 { PRIVATE, "Private" },
154 { PROPERTY, "Property" },
155 { PUBLIC, "Public" },
156 { RANDOM, "Random" },
157 { READ, "Read" },
158 { REDIM, "ReDim" },
159 { REM, "Rem" },
160 { RESUME, "Resume" },
161 { RETURN, "Return" },
162 { RSET, "RSet" }, // JSM
163 { SELECT, "Select" },
164 { SET, "Set" },
165 #ifdef SHARED
166 #undef SHARED
167 #define tmpSHARED
168 #endif
169 { SHARED, "Shared" },
170 #ifdef tmpSHARED
171 #define SHARED
172 #undef tmpSHARED
173 #endif
174 { TSINGLE, "Single" },
175 { STATIC, "Static" },
176 { STEP, "Step" },
177 { STOP, "Stop" },
178 { TSTRING, "String" },
179 { SUB, "Sub" },
180 { STOP, "System" },
181 { TEXT, "Text" },
182 { THEN, "Then" },
183 { TO, "To", },
184 { TYPE, "Type" },
185 { TYPEOF, "TypeOf" },
186 { UNTIL, "Until" },
187 { TVARIANT, "Variant" },
188 { VBASUPPORT, "VbaSupport" },
189 { WEND, "Wend" },
190 { WHILE, "While" },
191 { WITH, "With" },
192 { WRITE, "Write" }, // auch WRITE #
193 { XOR, "Xor" },
194 { NIL, "" }
198 TokenTable aTokTable_Java [] = { // Token-Tabelle:
200 { JS_LOG_NOT, "!" },
201 { JS_NE, "!=" },
202 { JS_MOD, "%" },
203 { JS_ASS_MOD, "%=" },
204 { JS_BIT_AND, "&" },
205 { JS_LOG_AND, "&&" },
206 { JS_ASS_AND, "&=" },
207 { JS_LPAREN, "(" },
208 { JS_RPAREN, ")" },
209 { JS_MUL, "*" },
210 { JS_ASS_MUL, "*=" },
211 { JS_PLUS, "+" },
212 { JS_INC, "++" },
213 { JS_ASS_PLUS, "+=" },
214 { JS_COMMA, "," },
215 { JS_MINUS, "-" },
216 { JS_DEC, "--" },
217 { JS_ASS_MINUS, "-=" },
218 { JS_DIV, "/" },
219 { JS_ASS_DIV, "/=" },
220 { JS_COND_SEL, ":" },
221 { JS_LT, "<" },
222 { JS_LSHIFT, "<<" },
223 { JS_ASS_LSHIFT,"<<=" },
224 { JS_LE, "<=" },
225 { JS_NE, "<>" },
226 { JS_ASSIGNMENT,"=" },
227 { JS_EQ, "==" },
228 { JS_GT, ">" },
229 { JS_RSHIFT, ">>" },
230 { JS_ASS_RSHIFT,">>=" },
231 { JS_RSHIFT_Z, ">>>" },
232 { JS_ASS_RSHIFT_Z,">>>=" },
233 { JS_GE, ">=" },
234 { JS_COND_QUEST,"?" },
235 { ACCESS, "Access" },
236 { ALIAS, "Alias" },
237 { AND, "And" },
238 { ANY, "Any" },
239 { APPEND, "Append" },
240 { AS, "As" },
241 { BASE, "Base" },
242 { BINARY, "Binary" },
243 { TBOOLEAN, "Boolean" },
244 { BYVAL, "ByVal", },
245 { CALL, "Call" },
246 { CASE, "Case" },
247 { _CDECL_, "Cdecl" },
248 { CLOSE, "Close" },
249 { COMPARE, "Compare" },
250 { _CONST_, "Const" },
251 { TCURRENCY,"Currency" },
252 { TDATE, "Date" },
253 { DECLARE, "Declare" },
254 { DEFBOOL, "DefBool" },
255 { DEFCUR, "DefCur" },
256 { DEFDATE, "DefDate" },
257 { DEFDBL, "DefDbl" },
258 { DEFERR, "DefErr" },
259 { DEFINT, "DefInt" },
260 { DEFLNG, "DefLng" },
261 { DEFOBJ, "DefObj" },
262 { DEFSNG, "DefSng" },
263 { DEFSTR, "DefStr" },
264 { DEFVAR, "DefVar" },
265 { DIM, "Dim" },
266 { DO, "Do" },
267 { TDOUBLE, "Double" },
268 { EACH, "Each" },
269 { ELSE, "Else" },
270 { ELSEIF, "ElseIf" },
271 { END, "End" },
272 { ENDFUNC, "End Function" },
273 { ENDIF, "End If" },
274 { ENDSELECT,"End Select" },
275 { ENDSUB, "End Sub" },
276 { ENDTYPE, "End Type" },
277 { ENDIF, "EndIf" },
278 { EQV, "Eqv" },
279 { ERASE, "Erase" },
280 { _ERROR_, "Error" },
281 { EXIT, "Exit" },
282 { EXPLICIT, "Explicit" },
283 { FOR, "For" },
284 { FUNCTION, "Function" },
285 { GLOBAL, "Global" },
286 { GOSUB, "GoSub" },
287 { GOTO, "GoTo" },
288 { IF, "If" },
289 { IMP, "Imp" },
290 { _IN_, "In" },
291 { INPUT, "Input" }, // auch INPUT #
292 { TINTEGER, "Integer" },
293 { IS, "Is" },
294 { LET, "Let" },
295 { LIB, "Lib" },
296 { LINE, "Line" },
297 { LINEINPUT,"Line Input" },
298 { LOCAL, "Local" },
299 { LOCK, "Lock" },
300 { TLONG, "Long" },
301 { LOOP, "Loop" },
302 { LPRINT, "LPrint" },
303 { LSET, "LSet" }, // JSM
304 { MOD, "Mod" },
305 { NAME, "Name" },
306 { NEW, "New" },
307 { NEXT, "Next" },
308 { NOT, "Not" },
309 { TOBJECT, "Object" },
310 { ON, "On" },
311 { OPEN, "Open" },
312 { OPTION, "Option" },
313 { _OPTIONAL_, "Optional" },
314 { OR, "Or" },
315 { OUTPUT, "Output" },
316 { PRESERVE, "Preserve" },
317 { PRINT, "Print" },
318 { PRIVATE, "Private" },
319 { PUBLIC, "Public" },
320 { RANDOM, "Random" },
321 { READ, "Read" },
322 { REDIM, "ReDim" },
323 { REM, "Rem" },
324 { RESUME, "Resume" },
325 { RETURN, "Return" },
326 { RSET, "RSet" }, // JSM
327 { SELECT, "Select" },
328 { SET, "Set" },
329 { SHARED, "Shared" },
330 { TSINGLE, "Single" },
331 { STATIC, "Static" },
332 { STEP, "Step" },
333 { STOP, "Stop" },
334 { TSTRING, "String" },
335 { SUB, "Sub" },
336 { STOP, "System" },
337 { TEXT, "Text" },
338 { THEN, "Then" },
339 { TO, "To", },
340 { TYPE, "Type" },
341 { UNTIL, "Until" },
342 { TVARIANT, "Variant" },
343 { WEND, "Wend" },
344 { WHILE, "While" },
345 { WITH, "With" },
346 { WRITE, "Write" }, // auch WRITE #
347 { XOR, "Xor" },
348 { JS_LINDEX, "[" },
349 { JS_RINDEX, "]" },
350 { JS_BIT_XOR, "^" },
351 { JS_ASS_XOR, "^=" },
352 { JS_BIT_OR, "|" },
353 { JS_ASS_OR, "|=" },
354 { JS_LOG_OR, "||" },
355 { JS_BIT_NOT, "~" },
356 { NIL }
360 // Der Konstruktor ermittelt die Laenge der Token-Tabelle.
362 SbiTokenizer::SbiTokenizer( const ::rtl::OUString& rSrc, StarBASIC* pb )
363 : SbiScanner( rSrc, pb )
365 pTokTable = aTokTable_Basic;
366 //if( StarBASIC::GetGlobalLanguageMode() == SB_LANG_JAVASCRIPT )
367 // pTokTable = aTokTable_Java;
368 TokenTable *tp;
369 bEof = bAs = FALSE;
370 eCurTok = NIL;
371 ePush = NIL;
372 bEos = bKeywords = bErrorIsSymbol = TRUE;
373 if( !nToken )
374 for( nToken = 0, tp = pTokTable; tp->t; nToken++, tp++ ) {}
377 SbiTokenizer::~SbiTokenizer()
380 // Wiederablage (Pushback) eines Tokens. (Bis zu 2 Tokens)
382 void SbiTokenizer::Push( SbiToken t )
384 if( ePush != NIL )
385 Error( SbERR_INTERNAL_ERROR, "PUSH" );
386 else ePush = t;
389 void SbiTokenizer::Error( SbError code, const char* pMsg )
391 aError = String::CreateFromAscii( pMsg );
392 Error( code );
395 void SbiTokenizer::Error( SbError code, String aMsg )
397 aError = aMsg;
398 Error( code );
401 void SbiTokenizer::Error( SbError code, SbiToken tok )
403 aError = Symbol( tok );
404 Error( code );
407 // Einlesen des naechsten Tokens, ohne dass das Token geschluckt wird
409 SbiToken SbiTokenizer::Peek()
411 if( ePush == NIL )
413 USHORT nOldLine = nLine;
414 USHORT nOldCol1 = nCol1;
415 USHORT nOldCol2 = nCol2;
416 ePush = Next();
417 nPLine = nLine; nLine = nOldLine;
418 nPCol1 = nCol1; nCol1 = nOldCol1;
419 nPCol2 = nCol2; nCol2 = nOldCol2;
421 return eCurTok = ePush;
424 // Dies ist fuer die Decompilation.
425 // Zahlen und Symbole liefern einen Leerstring zurueck.
427 const String& SbiTokenizer::Symbol( SbiToken t )
429 // Zeichen-Token?
430 if( t < FIRSTKWD )
432 aSym = (char) t;
433 return aSym;
435 switch( t )
437 case NEG : aSym = '-'; return aSym;
438 case EOS : aSym = String::CreateFromAscii( ":/CRLF" ); return aSym;
439 case EOLN : aSym = String::CreateFromAscii( "CRLF" ); return aSym;
440 default: break;
442 TokenTable* tp = pTokTable;
443 for( short i = 0; i < nToken; i++, tp++ )
445 if( tp->t == t )
447 aSym = String::CreateFromAscii( tp->s );
448 return aSym;
451 const sal_Unicode *p = aSym.GetBuffer();
452 if (*p <= ' ') aSym = String::CreateFromAscii( "???" );
453 return aSym;
456 // Einlesen des naechsten Tokens und Ablage desselben
457 // Tokens, die nicht in der Token-Tabelle vorkommen, werden
458 // direkt als Zeichen zurueckgeliefert.
459 // Einige Worte werden gesondert behandelt.
461 SbiToken SbiTokenizer::Next()
463 if (bEof) return EOLN;
464 // Schon eines eingelesen?
465 if( ePush != NIL )
467 eCurTok = ePush;
468 ePush = NIL;
469 nLine = nPLine;
470 nCol1 = nPCol1;
471 nCol2 = nPCol2;
472 bEos = IsEoln( eCurTok );
473 return eCurTok;
475 TokenTable *tp;
477 // Sonst einlesen:
478 if( !NextSym() )
480 bEof = bEos = TRUE;
481 return eCurTok = EOLN;
483 // Zeilenende?
484 if( aSym.GetBuffer()[0] == '\n' )
486 bEos = TRUE; return eCurTok = EOLN;
488 bEos = FALSE;
490 // Zahl?
491 if( bNumber )
492 return eCurTok = NUMBER;
494 // String?
495 else if( ( eScanType == SbxDATE || eScanType == SbxSTRING ) && !bSymbol )
496 return eCurTok = FIXSTRING;
497 // Sonderfaelle von Zeichen, die zwischen "Z" und "a" liegen. ICompare()
498 // wertet die Position dieser Zeichen unterschiedlich aus.
499 else if( aSym.GetBuffer()[0] == '^' )
500 return eCurTok = EXPON;
501 else if( aSym.GetBuffer()[0] == '\\' )
502 return eCurTok = IDIV;
503 else
505 // Mit Typkennung oder ein Symbol und keine Keyword-Erkennung?
506 // Dann kein Token-Test
507 if( eScanType != SbxVARIANT
508 || ( !bKeywords && bSymbol ) )
509 return eCurTok = SYMBOL;
510 // Gueltiges Token?
511 short lb = 0;
512 short ub = nToken-1;
513 short delta;
516 delta = (ub - lb) >> 1;
517 tp = &pTokTable[ lb + delta ];
518 StringCompare res = aSym.CompareIgnoreCaseToAscii( tp->s );
519 // Gefunden?
520 if( res == COMPARE_EQUAL ) goto special;
521 // Groesser? Dann untere Haelfte
522 if( res == COMPARE_LESS )
524 if ((ub - lb) == 2) ub = lb;
525 else ub = ub - delta;
527 // Kleiner? Dann obere Haelfte
528 else
530 if ((ub -lb) == 2) lb = ub;
531 else lb = lb + delta;
533 } while( delta );
534 // Symbol? Wenn nicht >= Token
535 sal_Unicode ch = aSym.GetBuffer()[0];
536 if( !BasicSimpleCharClass::isAlpha( ch, bCompatible ) && !bSymbol )
537 return eCurTok = (SbiToken) (ch & 0x00FF);
538 return eCurTok = SYMBOL;
540 special:
541 // LINE INPUT
542 if( tp->t == LINE )
544 short nC1 = nCol1;
545 String aOldSym = aSym;
546 eCurTok = Peek();
547 if( eCurTok == INPUT )
549 Next();
550 nCol1 = nC1;
551 return eCurTok = LINEINPUT;
553 else
555 aSym = aOldSym;
556 return eCurTok = LINE;
559 // END IF, CASE, SUB, DEF, FUNCTION, TYPE, CLASS, WITH
560 if( tp->t == END )
562 // AB, 15.3.96, Spezialbehandlung fuer END, beim Peek() geht die
563 // aktuelle Zeile verloren, daher alles merken und danach restaurieren
564 USHORT nOldLine = nLine;
565 USHORT nOldCol = nCol;
566 USHORT nOldCol1 = nCol1;
567 USHORT nOldCol2 = nCol2;
568 String aOldSym = aSym;
569 SaveLine(); // pLine im Scanner sichern
571 eCurTok = Peek();
572 switch( eCurTok )
574 case IF: Next(); eCurTok = ENDIF; break;
575 case SELECT: Next(); eCurTok = ENDSELECT; break;
576 case SUB: Next(); eCurTok = ENDSUB; break;
577 case FUNCTION: Next(); eCurTok = ENDFUNC; break;
578 case PROPERTY: Next(); eCurTok = ENDPROPERTY; break;
579 case TYPE: Next(); eCurTok = ENDTYPE; break;
580 case ENUM: Next(); eCurTok = ENDENUM; break;
581 case WITH: Next(); eCurTok = ENDWITH; break;
582 default : eCurTok = END;
584 nCol1 = nOldCol1;
585 if( eCurTok == END )
587 // Alles zuruecksetzen, damit Token nach END ganz neu gelesen wird
588 ePush = NIL;
589 nLine = nOldLine;
590 nCol = nOldCol;
591 nCol2 = nOldCol2;
592 aSym = aOldSym;
593 RestoreLine(); // pLine im Scanner restaurieren
595 return eCurTok;
598 // check whether the keyword has been dim as a variable
599 if( IsSymbol( tp->t ) )
601 return eCurTok = SYMBOL;
604 // Sind Datentypen Keywords?
605 // Nur nach AS, sonst sind es Symbole!
606 // Es gibt ja ERROR(), DATA(), STRING() etc.
607 eCurTok = tp->t;
608 // AS: Datentypen sind Keywords
609 if( tp->t == AS )
610 bAs = TRUE;
611 else
613 if( bAs )
614 bAs = FALSE;
615 else if( eCurTok >= DATATYPE1 && eCurTok <= DATATYPE2 && (bErrorIsSymbol || eCurTok != _ERROR_) )
616 eCurTok = SYMBOL;
619 // CLASSMODULE, PROPERTY, GET, ENUM token only visible in compatible mode
620 SbiToken eTok = tp->t;
621 if( bCompatible )
623 // #129904 Suppress system
624 if( eTok == STOP && aSym.CompareIgnoreCaseToAscii( "system" ) == COMPARE_EQUAL )
625 eCurTok = SYMBOL;
627 else
629 if( eTok == CLASSMODULE ||
630 eTok == IMPLEMENTS ||
631 eTok == PARAMARRAY ||
632 eTok == ENUM ||
633 eTok == PROPERTY ||
634 eTok == GET ||
635 eTok == TYPEOF )
637 eCurTok = SYMBOL;
641 bEos = IsEoln( eCurTok );
642 return eCurTok;
645 #ifdef _MSC_VER
646 #pragma optimize("",off)
647 #endif
649 // Kann das aktuell eingelesene Token ein Label sein?
651 BOOL SbiTokenizer::MayBeLabel( BOOL bNeedsColon )
653 if( eCurTok == SYMBOL )
654 return bNeedsColon ? DoesColonFollow() : TRUE;
655 else
656 return BOOL( eCurTok == NUMBER
657 && eScanType == SbxINTEGER
658 && nVal >= 0 );
661 #ifdef _MSC_VER
662 #pragma optimize("",off)
663 #endif
666 void SbiTokenizer::Hilite( SbTextPortions& rList )
668 bErrors = FALSE;
669 bUsedForHilite = TRUE;
670 SbiToken eLastTok = NIL;
671 for( ;; )
673 Next();
674 if( IsEof() )
675 break;
676 SbTextPortion aRes;
677 aRes.nLine = nLine;
678 aRes.nStart = nCol1;
679 aRes.nEnd = nCol2;
680 switch( eCurTok )
682 case REM:
683 aRes.eType = SB_COMMENT; break;
684 case SYMBOL:
685 aRes.eType = SB_SYMBOL; break;
686 case FIXSTRING:
687 aRes.eType = SB_STRING; break;
688 case NUMBER:
689 aRes.eType = SB_NUMBER; break;
690 default:
691 if( ( eCurTok >= FIRSTKWD && eCurTok <= LASTKWD )
692 || (eCurTok >= _CDECL_ ) )
693 aRes.eType = SB_KEYWORD;
694 else
695 aRes.eType = SB_PUNCTUATION;
697 // Die Folge xxx.Keyword sollte nicht als Kwd geflagt werden
698 if( aRes.eType == SB_KEYWORD
699 && ( eLastTok == DOT|| eLastTok == EXCLAM ) )
700 aRes.eType = SB_SYMBOL;
701 if( eCurTok != EOLN && aRes.nStart <= aRes.nEnd )
702 rList.Insert( aRes, rList.Count() );
703 if( aRes.eType == SB_COMMENT )
704 break;
705 eLastTok = eCurTok;
707 bUsedForHilite = FALSE;