Version 4.0.2.1, tag libreoffice-4.0.2.1
[LibreOffice.git] / basic / source / comp / token.cxx
bloba6054c19415a597f09e0d728dd75c0961117ab04
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include "basiccharclass.hxx"
22 #include "sbcomp.hxx"
24 struct TokenTable { SbiToken t; const char *s; };
26 static short nToken; // number of tokens
28 static TokenTable* pTokTable;
30 static TokenTable aTokTable_Basic [] = {
31 { CAT, "&" },
32 { MUL, "*" },
33 { PLUS, "+" },
34 { MINUS, "-" },
35 { DIV, "/" },
36 { EOS, ":" },
37 { ASSIGN, ":=" },
38 { LT, "<" },
39 { LE, "<=" },
40 { NE, "<>" },
41 { EQ, "=" },
42 { GT, ">" },
43 { GE, ">=" },
44 { ACCESS, "Access" },
45 { ALIAS, "Alias" },
46 { AND, "And" },
47 { ANY, "Any" },
48 { APPEND, "Append" },
49 { AS, "As" },
50 { ATTRIBUTE,"Attribute" },
51 { BASE, "Base" },
52 { BINARY, "Binary" },
53 { TBOOLEAN, "Boolean" },
54 { BYREF, "ByRef", },
55 { TBYTE, "Byte", },
56 { BYVAL, "ByVal", },
57 { CALL, "Call" },
58 { CASE, "Case" },
59 { _CDECL_, "Cdecl" },
60 { CLASSMODULE, "ClassModule" },
61 { CLOSE, "Close" },
62 { COMPARE, "Compare" },
63 { COMPATIBLE,"Compatible" },
64 { _CONST_, "Const" },
65 { TCURRENCY,"Currency" },
66 { TDATE, "Date" },
67 { DECLARE, "Declare" },
68 { DEFBOOL, "DefBool" },
69 { DEFCUR, "DefCur" },
70 { DEFDATE, "DefDate" },
71 { DEFDBL, "DefDbl" },
72 { DEFERR, "DefErr" },
73 { DEFINT, "DefInt" },
74 { DEFLNG, "DefLng" },
75 { DEFOBJ, "DefObj" },
76 { DEFSNG, "DefSng" },
77 { DEFSTR, "DefStr" },
78 { DEFVAR, "DefVar" },
79 { DIM, "Dim" },
80 { DO, "Do" },
81 { TDOUBLE, "Double" },
82 { EACH, "Each" },
83 { ELSE, "Else" },
84 { ELSEIF, "ElseIf" },
85 { END, "End" },
86 { ENDENUM, "End Enum" },
87 { ENDFUNC, "End Function" },
88 { ENDIF, "End If" },
89 { ENDPROPERTY, "End Property" },
90 { ENDSELECT,"End Select" },
91 { ENDSUB, "End Sub" },
92 { ENDTYPE, "End Type" },
93 { ENDIF, "EndIf" },
94 { ENUM, "Enum" },
95 { EQV, "Eqv" },
96 { ERASE, "Erase" },
97 { _ERROR_, "Error" },
98 { EXIT, "Exit" },
99 { EXPLICIT, "Explicit" },
100 { FOR, "For" },
101 { FUNCTION, "Function" },
102 { GET, "Get" },
103 { GLOBAL, "Global" },
104 { GOSUB, "GoSub" },
105 { GOTO, "GoTo" },
106 { IF, "If" },
107 { IMP, "Imp" },
108 { IMPLEMENTS, "Implements" },
109 { _IN_, "In" },
110 { INPUT, "Input" }, // also INPUT #
111 { TINTEGER, "Integer" },
112 { IS, "Is" },
113 { LET, "Let" },
114 { LIB, "Lib" },
115 { LIKE, "Like" },
116 { LINE, "Line" },
117 { LINEINPUT,"Line Input" },
118 { LOCAL, "Local" },
119 { LOCK, "Lock" },
120 { TLONG, "Long" },
121 { LOOP, "Loop" },
122 { LPRINT, "LPrint" },
123 { LSET, "LSet" }, // JSM
124 { MOD, "Mod" },
125 { NAME, "Name" },
126 { NEW, "New" },
127 { NEXT, "Next" },
128 { NOT, "Not" },
129 { TOBJECT, "Object" },
130 { ON, "On" },
131 { OPEN, "Open" },
132 { OPTION, "Option" },
133 { _OPTIONAL_, "Optional" },
134 { OR, "Or" },
135 { OUTPUT, "Output" },
136 { PARAMARRAY, "ParamArray" },
137 { PRESERVE, "Preserve" },
138 { PRINT, "Print" },
139 { PRIVATE, "Private" },
140 { PROPERTY, "Property" },
141 { PUBLIC, "Public" },
142 { RANDOM, "Random" },
143 { READ, "Read" },
144 { REDIM, "ReDim" },
145 { REM, "Rem" },
146 { RESUME, "Resume" },
147 { RETURN, "Return" },
148 { RSET, "RSet" }, // JSM
149 { SELECT, "Select" },
150 { SET, "Set" },
151 #ifdef SHARED
152 #undef SHARED
153 #define tmpSHARED
154 #endif
155 { SHARED, "Shared" },
156 #ifdef tmpSHARED
157 #define SHARED
158 #undef tmpSHARED
159 #endif
160 { TSINGLE, "Single" },
161 { STATIC, "Static" },
162 { STEP, "Step" },
163 { STOP, "Stop" },
164 { TSTRING, "String" },
165 { SUB, "Sub" },
166 { STOP, "System" },
167 { TEXT, "Text" },
168 { THEN, "Then" },
169 { TO, "To", },
170 { TYPE, "Type" },
171 { TYPEOF, "TypeOf" },
172 { UNTIL, "Until" },
173 { TVARIANT, "Variant" },
174 { VBASUPPORT, "VbaSupport" },
175 { WEND, "Wend" },
176 { WHILE, "While" },
177 { WITH, "With" },
178 { WITHEVENTS, "WithEvents" },
179 { WRITE, "Write" }, // also WRITE #
180 { XOR, "Xor" },
181 { NIL, "" }
185 // #i109076
186 TokenLabelInfo::TokenLabelInfo( void )
188 m_pTokenCanBeLabelTab = new bool[VBASUPPORT+1];
189 for( int i = 0 ; i <= VBASUPPORT ; ++i )
191 m_pTokenCanBeLabelTab[i] = false;
193 // Token accepted as label by VBA
194 SbiToken eLabelToken[] = { ACCESS, ALIAS, APPEND, BASE, BINARY, CLASSMODULE,
195 COMPARE, COMPATIBLE, DEFERR, _ERROR_, EXPLICIT, LIB, LINE, LPRINT, NAME,
196 TOBJECT, OUTPUT, PROPERTY, RANDOM, READ, STEP, STOP, TEXT, VBASUPPORT, NIL };
197 SbiToken* pTok = eLabelToken;
198 SbiToken eTok;
199 for( pTok = eLabelToken ; (eTok = *pTok) != NIL ; ++pTok )
201 m_pTokenCanBeLabelTab[eTok] = true;
205 TokenLabelInfo::~TokenLabelInfo()
207 delete[] m_pTokenCanBeLabelTab;
211 // the constructor detects the length of the token table
213 SbiTokenizer::SbiTokenizer( const ::rtl::OUString& rSrc, StarBASIC* pb )
214 : SbiScanner( rSrc, pb )
216 pTokTable = aTokTable_Basic;
217 TokenTable *tp;
218 bEof = bAs = false;
219 eCurTok = NIL;
220 ePush = NIL;
221 bEos = bKeywords = bErrorIsSymbol = true;
222 if( !nToken )
224 for( nToken = 0, tp = pTokTable; tp->t; nToken++, tp++ )
229 SbiTokenizer::~SbiTokenizer()
234 void SbiTokenizer::Push( SbiToken t )
236 if( ePush != NIL )
237 Error( SbERR_INTERNAL_ERROR, "PUSH" );
238 else ePush = t;
241 void SbiTokenizer::Error( SbError code, const char* pMsg )
243 aError = ::rtl::OUString::createFromAscii( pMsg );
244 Error( code );
247 void SbiTokenizer::Error( SbError code, const ::rtl::OUString &aMsg )
249 aError = aMsg;
250 Error( code );
253 void SbiTokenizer::Error( SbError code, SbiToken tok )
255 aError = Symbol( tok );
256 Error( code );
259 // reading in the next token without absorbing it
261 SbiToken SbiTokenizer::Peek()
263 if( ePush == NIL )
265 sal_uInt16 nOldLine = nLine;
266 sal_uInt16 nOldCol1 = nCol1;
267 sal_uInt16 nOldCol2 = nCol2;
268 ePush = Next();
269 nPLine = nLine; nLine = nOldLine;
270 nPCol1 = nCol1; nCol1 = nOldCol1;
271 nPCol2 = nCol2; nCol2 = nOldCol2;
273 return eCurTok = ePush;
276 // For decompilation. Numbers and symbols return an empty string.
278 const ::rtl::OUString& SbiTokenizer::Symbol( SbiToken t )
280 // character token?
281 if( t < FIRSTKWD )
283 aSym = ::rtl::OUString::valueOf(sal::static_int_cast<sal_Unicode>(t));
284 return aSym;
286 switch( t )
288 case NEG :
289 aSym = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("-"));
290 return aSym;
291 case EOS :
292 aSym = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM(":/CRLF"));
293 return aSym;
294 case EOLN :
295 aSym = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("CRLF"));
296 return aSym;
297 default:
298 break;
300 TokenTable* tp = pTokTable;
301 for( short i = 0; i < nToken; i++, tp++ )
303 if( tp->t == t )
305 aSym = ::rtl::OStringToOUString(tp->s, RTL_TEXTENCODING_ASCII_US);
306 return aSym;
309 const sal_Unicode *p = aSym.getStr();
310 if (*p <= ' ')
312 aSym = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("???"));
314 return aSym;
317 // Reading in the next token and put it down.
318 // Tokens that don't appear in the token table
319 // are directly returned as a character.
320 // Some words are treated in a special way.
322 SbiToken SbiTokenizer::Next()
324 if (bEof)
326 return EOLN;
328 // have read in one already?
329 if( ePush != NIL )
331 eCurTok = ePush;
332 ePush = NIL;
333 nLine = nPLine;
334 nCol1 = nPCol1;
335 nCol2 = nPCol2;
336 bEos = IsEoln( eCurTok );
337 return eCurTok;
339 TokenTable *tp;
341 if( !NextSym() )
343 bEof = bEos = true;
344 return eCurTok = EOLN;
347 if( aSym[0] == '\n' )
349 bEos = true; return eCurTok = EOLN;
351 bEos = false;
353 if( bNumber )
355 return eCurTok = NUMBER;
357 else if( ( eScanType == SbxDATE || eScanType == SbxSTRING ) && !bSymbol )
359 return eCurTok = FIXSTRING;
361 // Special cases of characters that are between "Z" and "a". ICompare()
362 // evaluates the position of these characters in different ways.
363 else if( aSym[0] == '^' )
365 return eCurTok = EXPON;
367 else if( aSym[0] == '\\' )
369 return eCurTok = IDIV;
371 else
373 if( eScanType != SbxVARIANT
374 || ( !bKeywords && bSymbol ) )
375 return eCurTok = SYMBOL;
376 // valid token?
377 short lb = 0;
378 short ub = nToken-1;
379 short delta;
382 delta = (ub - lb) >> 1;
383 tp = &pTokTable[ lb + delta ];
384 sal_Int32 res = aSym.compareToIgnoreAsciiCaseAscii( tp->s );
386 if( res == 0 )
388 goto special;
390 if( res < 0 )
392 if ((ub - lb) == 2)
394 ub = lb;
396 else
398 ub = ub - delta;
401 else
403 if ((ub -lb) == 2)
405 lb = ub;
407 else
409 lb = lb + delta;
413 while( delta );
414 // Symbol? if not >= token
415 sal_Unicode ch = aSym[0];
416 if( !theBasicCharClass::get().isAlpha( ch, bCompatible ) && !bSymbol )
418 return eCurTok = (SbiToken) (ch & 0x00FF);
420 return eCurTok = SYMBOL;
422 special:
423 // #i92642
424 bool bStartOfLine = (eCurTok == NIL || eCurTok == REM || eCurTok == EOLN);
425 if( !bStartOfLine && (tp->t == NAME || tp->t == LINE) )
427 return eCurTok = SYMBOL;
429 else if( tp->t == TEXT )
431 return eCurTok = SYMBOL;
433 // maybe we can expand this for other statements that have parameters
434 // that are keywords ( and those keywords are only used within such
435 // statements )
436 // what's happening here is that if we come across 'append' ( and we are
437 // not in the middle of parsing a special statement ( like 'Open')
438 // we just treat keyword 'append' as a normal 'SYMBOL'.
439 // Also we accept Dim APPEND
440 else if ( ( !bInStatement || eCurTok == DIM ) && tp->t == APPEND )
442 return eCurTok = SYMBOL;
444 // #i92642: Special LINE token handling -> SbiParser::Line()
446 // END IF, CASE, SUB, DEF, FUNCTION, TYPE, CLASS, WITH
447 if( tp->t == END )
449 // from 15.3.96, special treatment for END, at Peek() the current
450 // time is lost, so memorize everything and restore after
451 sal_uInt16 nOldLine = nLine;
452 sal_uInt16 nOldCol = nCol;
453 sal_uInt16 nOldCol1 = nCol1;
454 sal_uInt16 nOldCol2 = nCol2;
455 OUString aOldSym = aSym;
456 SaveLine(); // save pLine in the scanner
458 eCurTok = Peek();
459 switch( eCurTok )
461 case IF: Next(); eCurTok = ENDIF; break;
462 case SELECT: Next(); eCurTok = ENDSELECT; break;
463 case SUB: Next(); eCurTok = ENDSUB; break;
464 case FUNCTION: Next(); eCurTok = ENDFUNC; break;
465 case PROPERTY: Next(); eCurTok = ENDPROPERTY; break;
466 case TYPE: Next(); eCurTok = ENDTYPE; break;
467 case ENUM: Next(); eCurTok = ENDENUM; break;
468 case WITH: Next(); eCurTok = ENDWITH; break;
469 default : eCurTok = END; break;
471 nCol1 = nOldCol1;
472 if( eCurTok == END )
474 // reset everything so that token is read completely newly after END
475 ePush = NIL;
476 nLine = nOldLine;
477 nCol = nOldCol;
478 nCol2 = nOldCol2;
479 aSym = aOldSym;
480 RestoreLine();
482 return eCurTok;
484 // are data types keywords?
485 // there is ERROR(), DATA(), STRING() etc.
486 eCurTok = tp->t;
487 // AS: data types are keywords
488 if( tp->t == AS )
490 bAs = true;
492 else
494 if( bAs )
496 bAs = false;
498 else if( eCurTok >= DATATYPE1 && eCurTok <= DATATYPE2 && (bErrorIsSymbol || eCurTok != _ERROR_) )
500 eCurTok = SYMBOL;
504 // CLASSMODULE, PROPERTY, GET, ENUM token only visible in compatible mode
505 SbiToken eTok = tp->t;
506 if( bCompatible )
508 // #129904 Suppress system
509 if( eTok == STOP && aSym.equalsIgnoreAsciiCaseAsciiL(RTL_CONSTASCII_STRINGPARAM("system")) )
511 eCurTok = SYMBOL;
513 if( eTok == GET && bStartOfLine )
515 eCurTok = SYMBOL;
518 else
520 if( eTok == CLASSMODULE ||
521 eTok == IMPLEMENTS ||
522 eTok == PARAMARRAY ||
523 eTok == ENUM ||
524 eTok == PROPERTY ||
525 eTok == GET ||
526 eTok == TYPEOF )
528 eCurTok = SYMBOL;
532 bEos = IsEoln( eCurTok );
533 return eCurTok;
536 #ifdef _MSC_VER
537 #pragma optimize("",off)
538 #endif
541 bool SbiTokenizer::MayBeLabel( bool bNeedsColon )
543 if( eCurTok == SYMBOL || m_aTokenLabelInfo.canTokenBeLabel( eCurTok ) )
545 return bNeedsColon ? DoesColonFollow() : true;
547 else
549 return ( eCurTok == NUMBER
550 && eScanType == SbxINTEGER
551 && nVal >= 0 );
555 #ifdef _MSC_VER
556 #pragma optimize("",off)
557 #endif
560 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */