Version 5.4.3.2, tag libreoffice-5.4.3.2
[LibreOffice.git] / basic / source / comp / token.cxx
blobca57eb04dcc25c6183afa73cff3f911e905e9f99
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <array>
23 #include "basiccharclass.hxx"
24 #include "token.hxx"
26 struct TokenTable { SbiToken t; const char *s; };
28 static short nToken; // number of tokens
30 static const TokenTable* pTokTable;
32 static const TokenTable aTokTable_Basic [] = {
33 { CAT, "&" },
34 { MUL, "*" },
35 { PLUS, "+" },
36 { MINUS, "-" },
37 { DIV, "/" },
38 { EOS, ":" },
39 { ASSIGN, ":=" },
40 { LT, "<" },
41 { LE, "<=" },
42 { NE, "<>" },
43 { EQ, "=" },
44 { GT, ">" },
45 { GE, ">=" },
46 { ACCESS, "Access" },
47 { ALIAS, "Alias" },
48 { AND, "And" },
49 { ANY, "Any" },
50 { APPEND, "Append" },
51 { AS, "As" },
52 { ATTRIBUTE,"Attribute" },
53 { BASE, "Base" },
54 { BINARY, "Binary" },
55 { TBOOLEAN, "Boolean" },
56 { BYREF, "ByRef", },
57 { TBYTE, "Byte", },
58 { BYVAL, "ByVal", },
59 { CALL, "Call" },
60 { CASE, "Case" },
61 { CDECL_, "Cdecl" },
62 { CLASSMODULE, "ClassModule" },
63 { CLOSE, "Close" },
64 { COMPARE, "Compare" },
65 { COMPATIBLE,"Compatible" },
66 { CONST_, "Const" },
67 { TCURRENCY,"Currency" },
68 { TDATE, "Date" },
69 { DECLARE, "Declare" },
70 { DEFBOOL, "DefBool" },
71 { DEFCUR, "DefCur" },
72 { DEFDATE, "DefDate" },
73 { DEFDBL, "DefDbl" },
74 { DEFERR, "DefErr" },
75 { DEFINT, "DefInt" },
76 { DEFLNG, "DefLng" },
77 { DEFOBJ, "DefObj" },
78 { DEFSNG, "DefSng" },
79 { DEFSTR, "DefStr" },
80 { DEFVAR, "DefVar" },
81 { DIM, "Dim" },
82 { DO, "Do" },
83 { TDOUBLE, "Double" },
84 { EACH, "Each" },
85 { ELSE, "Else" },
86 { ELSEIF, "ElseIf" },
87 { END, "End" },
88 { ENDENUM, "End Enum" },
89 { ENDFUNC, "End Function" },
90 { ENDIF, "End If" },
91 { ENDPROPERTY, "End Property" },
92 { ENDSELECT,"End Select" },
93 { ENDSUB, "End Sub" },
94 { ENDTYPE, "End Type" },
95 { ENDIF, "EndIf" },
96 { ENUM, "Enum" },
97 { EQV, "Eqv" },
98 { ERASE, "Erase" },
99 { ERROR_, "Error" },
100 { EXIT, "Exit" },
101 { BASIC_EXPLICIT, "Explicit" },
102 { FOR, "For" },
103 { FUNCTION, "Function" },
104 { GET, "Get" },
105 { GLOBAL, "Global" },
106 { GOSUB, "GoSub" },
107 { GOTO, "GoTo" },
108 { IF, "If" },
109 { IMP, "Imp" },
110 { IMPLEMENTS, "Implements" },
111 { IN_, "In" },
112 { INPUT, "Input" }, // also INPUT #
113 { TINTEGER, "Integer" },
114 { IS, "Is" },
115 { LET, "Let" },
116 { LIB, "Lib" },
117 { LIKE, "Like" },
118 { LINE, "Line" },
119 { LINEINPUT,"Line Input" },
120 { LOCAL, "Local" },
121 { LOCK, "Lock" },
122 { TLONG, "Long" },
123 { LOOP, "Loop" },
124 { LPRINT, "LPrint" },
125 { LSET, "LSet" }, // JSM
126 { MOD, "Mod" },
127 { NAME, "Name" },
128 { NEW, "New" },
129 { NEXT, "Next" },
130 { NOT, "Not" },
131 { TOBJECT, "Object" },
132 { ON, "On" },
133 { OPEN, "Open" },
134 { OPTION, "Option" },
135 { OPTIONAL_, "Optional" },
136 { OR, "Or" },
137 { OUTPUT, "Output" },
138 { PARAMARRAY, "ParamArray" },
139 { PRESERVE, "Preserve" },
140 { PRINT, "Print" },
141 { PRIVATE, "Private" },
142 { PROPERTY, "Property" },
143 { PTRSAFE, "PtrSafe" },
144 { PUBLIC, "Public" },
145 { RANDOM, "Random" },
146 { READ, "Read" },
147 { REDIM, "ReDim" },
148 { REM, "Rem" },
149 { RESUME, "Resume" },
150 { RETURN, "Return" },
151 { RSET, "RSet" }, // JSM
152 { SELECT, "Select" },
153 { SET, "Set" },
154 { SHARED, "Shared" },
155 { TSINGLE, "Single" },
156 { STATIC, "Static" },
157 { STEP, "Step" },
158 { STOP, "Stop" },
159 { TSTRING, "String" },
160 { SUB, "Sub" },
161 { STOP, "System" },
162 { TEXT, "Text" },
163 { THEN, "Then" },
164 { TO, "To", },
165 { TYPE, "Type" },
166 { TYPEOF, "TypeOf" },
167 { UNTIL, "Until" },
168 { TVARIANT, "Variant" },
169 { VBASUPPORT, "VbaSupport" },
170 { WEND, "Wend" },
171 { WHILE, "While" },
172 { WITH, "With" },
173 { WITHEVENTS, "WithEvents" },
174 { WRITE, "Write" }, // also WRITE #
175 { XOR, "Xor" },
176 { NIL, "" }
179 // #i109076
180 class TokenLabelInfo
182 std::array<bool,VBASUPPORT+1> m_pTokenCanBeLabelTab;
184 public:
185 TokenLabelInfo();
187 bool canTokenBeLabel( SbiToken eTok )
188 { return m_pTokenCanBeLabelTab[eTok]; }
191 class StaticTokenLabelInfo: public ::rtl::Static< TokenLabelInfo, StaticTokenLabelInfo >{};
193 // #i109076
194 TokenLabelInfo::TokenLabelInfo()
196 m_pTokenCanBeLabelTab.fill(false);
198 // Token accepted as label by VBA
199 static const SbiToken eLabelToken[] = { ACCESS, ALIAS, APPEND, BASE, BINARY, CLASSMODULE,
200 COMPARE, COMPATIBLE, DEFERR, ERROR_, BASIC_EXPLICIT, LIB, LINE, LPRINT, NAME,
201 TOBJECT, OUTPUT, PROPERTY, RANDOM, READ, STEP, STOP, TEXT, VBASUPPORT, NIL };
202 SbiToken eTok;
203 for( const SbiToken* pTok = eLabelToken ; (eTok = *pTok) != NIL ; ++pTok )
205 m_pTokenCanBeLabelTab[eTok] = true;
209 // the constructor detects the length of the token table
211 SbiTokenizer::SbiTokenizer( const OUString& rSrc, StarBASIC* pb )
212 : SbiScanner(rSrc, pb)
213 , eCurTok(NIL)
214 , ePush(NIL)
215 , nPLine(0)
216 , nPCol1(0)
217 , nPCol2(0)
218 , bEof(false)
219 , bEos(true)
220 , bKeywords(true)
221 , bAs(false)
222 , bErrorIsSymbol(true)
224 pTokTable = aTokTable_Basic;
225 if( !nToken )
227 const TokenTable *tp;
228 for( nToken = 0, tp = pTokTable; tp->t; nToken++, tp++ )
233 SbiTokenizer::~SbiTokenizer()
238 void SbiTokenizer::Push( SbiToken t )
240 if( ePush != NIL )
241 Error( ERRCODE_BASIC_INTERNAL_ERROR, "PUSH" );
242 else ePush = t;
245 void SbiTokenizer::Error( SbError code, const OUString &aMsg )
247 aError = aMsg;
248 Error( code );
251 void SbiTokenizer::Error( SbError code, SbiToken tok )
253 aError = Symbol( tok );
254 Error( code );
257 // reading in the next token without absorbing it
259 SbiToken SbiTokenizer::Peek()
261 if( ePush == NIL )
263 sal_uInt16 nOldLine = nLine;
264 sal_uInt16 nOldCol1 = nCol1;
265 sal_uInt16 nOldCol2 = nCol2;
266 ePush = Next();
267 nPLine = nLine; nLine = nOldLine;
268 nPCol1 = nCol1; nCol1 = nOldCol1;
269 nPCol2 = nCol2; nCol2 = nOldCol2;
271 return eCurTok = ePush;
274 // For decompilation. Numbers and symbols return an empty string.
276 const OUString& SbiTokenizer::Symbol( SbiToken t )
278 // character token?
279 if( t < FIRSTKWD )
281 aSym = OUString(sal::static_int_cast<sal_Unicode>(t));
282 return aSym;
284 switch( t )
286 case NEG :
287 aSym = "-";
288 return aSym;
289 case EOS :
290 aSym = ":/CRLF";
291 return aSym;
292 case EOLN :
293 aSym = "CRLF";
294 return aSym;
295 default:
296 break;
298 const TokenTable* tp = pTokTable;
299 for( short i = 0; i < nToken; i++, tp++ )
301 if( tp->t == t )
303 aSym = OStringToOUString(tp->s, RTL_TEXTENCODING_ASCII_US);
304 return aSym;
307 const sal_Unicode *p = aSym.getStr();
308 if (*p <= ' ')
310 aSym = "???";
312 return aSym;
315 // Reading in the next token and put it down.
316 // Tokens that don't appear in the token table
317 // are directly returned as a character.
318 // Some words are treated in a special way.
320 SbiToken SbiTokenizer::Next()
322 if (bEof)
324 return EOLN;
326 // have read in one already?
327 if( ePush != NIL )
329 eCurTok = ePush;
330 ePush = NIL;
331 nLine = nPLine;
332 nCol1 = nPCol1;
333 nCol2 = nPCol2;
334 bEos = IsEoln( eCurTok );
335 return eCurTok;
337 const TokenTable *tp;
339 if( !NextSym() )
341 bEof = bEos = true;
342 return eCurTok = EOLN;
345 if( aSym.startsWith("\n") )
347 bEos = true;
348 return eCurTok = EOLN;
350 bEos = false;
352 if( bNumber )
354 return eCurTok = NUMBER;
356 else if( ( eScanType == SbxDATE || eScanType == SbxSTRING ) && !bSymbol )
358 return eCurTok = FIXSTRING;
360 else if( aSym.isEmpty() )
362 //something went wrong
363 bEof = bEos = true;
364 return eCurTok = EOLN;
366 // Special cases of characters that are between "Z" and "a". ICompare()
367 // evaluates the position of these characters in different ways.
368 else if( aSym[0] == '^' )
370 return eCurTok = EXPON;
372 else if( aSym[0] == '\\' )
374 return eCurTok = IDIV;
376 else
378 if( eScanType != SbxVARIANT
379 || ( !bKeywords && bSymbol ) )
380 return eCurTok = SYMBOL;
381 // valid token?
382 short lb = 0;
383 short ub = nToken-1;
384 short delta;
387 delta = (ub - lb) >> 1;
388 tp = &pTokTable[ lb + delta ];
389 sal_Int32 res = aSym.compareToIgnoreAsciiCaseAscii( tp->s );
391 if( res == 0 )
393 goto special;
395 if( res < 0 )
397 if ((ub - lb) == 2)
399 ub = lb;
401 else
403 ub = ub - delta;
406 else
408 if ((ub -lb) == 2)
410 lb = ub;
412 else
414 lb = lb + delta;
418 while( delta );
419 // Symbol? if not >= token
420 sal_Unicode ch = aSym[0];
421 if( !BasicCharClass::isAlpha( ch, bCompatible ) && !bSymbol )
423 return eCurTok = (SbiToken) (ch & 0x00FF);
425 return eCurTok = SYMBOL;
427 special:
428 // #i92642
429 bool bStartOfLine = (eCurTok == NIL || eCurTok == REM || eCurTok == EOLN ||
430 eCurTok == THEN || eCurTok == ELSE); // single line If
431 if( !bStartOfLine && (tp->t == NAME || tp->t == LINE) )
433 return eCurTok = SYMBOL;
435 else if( tp->t == TEXT )
437 return eCurTok = SYMBOL;
439 // maybe we can expand this for other statements that have parameters
440 // that are keywords ( and those keywords are only used within such
441 // statements )
442 // what's happening here is that if we come across 'append' ( and we are
443 // not in the middle of parsing a special statement ( like 'Open')
444 // we just treat keyword 'append' as a normal 'SYMBOL'.
445 // Also we accept Dim APPEND
446 else if ( ( !bInStatement || eCurTok == DIM ) && tp->t == APPEND )
448 return eCurTok = SYMBOL;
450 // #i92642: Special LINE token handling -> SbiParser::Line()
452 // END IF, CASE, SUB, DEF, FUNCTION, TYPE, CLASS, WITH
453 if( tp->t == END )
455 // from 15.3.96, special treatment for END, at Peek() the current
456 // time is lost, so memorize everything and restore after
457 sal_uInt16 nOldLine = nLine;
458 sal_uInt16 nOldCol = nCol;
459 sal_uInt16 nOldCol1 = nCol1;
460 sal_uInt16 nOldCol2 = nCol2;
461 OUString aOldSym = aSym;
462 SaveLine(); // save pLine in the scanner
464 eCurTok = Peek();
465 switch( eCurTok )
467 case IF: Next(); eCurTok = ENDIF; break;
468 case SELECT: Next(); eCurTok = ENDSELECT; break;
469 case SUB: Next(); eCurTok = ENDSUB; break;
470 case FUNCTION: Next(); eCurTok = ENDFUNC; break;
471 case PROPERTY: Next(); eCurTok = ENDPROPERTY; break;
472 case TYPE: Next(); eCurTok = ENDTYPE; break;
473 case ENUM: Next(); eCurTok = ENDENUM; break;
474 case WITH: Next(); eCurTok = ENDWITH; break;
475 default : eCurTok = END; break;
477 nCol1 = nOldCol1;
478 if( eCurTok == END )
480 // reset everything so that token is read completely newly after END
481 ePush = NIL;
482 nLine = nOldLine;
483 nCol = nOldCol;
484 nCol2 = nOldCol2;
485 aSym = aOldSym;
486 RestoreLine();
488 return eCurTok;
490 // are data types keywords?
491 // there is ERROR(), DATA(), STRING() etc.
492 eCurTok = tp->t;
493 // AS: data types are keywords
494 if( tp->t == AS )
496 bAs = true;
498 else
500 if( bAs )
502 bAs = false;
504 else if( eCurTok >= DATATYPE1 && eCurTok <= DATATYPE2 && (bErrorIsSymbol || eCurTok != ERROR_) )
506 eCurTok = SYMBOL;
510 // CLASSMODULE, PROPERTY, GET, ENUM token only visible in compatible mode
511 SbiToken eTok = tp->t;
512 if( bCompatible )
514 // #129904 Suppress system
515 if( eTok == STOP && aSym.equalsIgnoreAsciiCase("system") )
517 eCurTok = SYMBOL;
519 if( eTok == GET && bStartOfLine )
521 eCurTok = SYMBOL;
524 else
526 if( eTok == CLASSMODULE ||
527 eTok == IMPLEMENTS ||
528 eTok == PARAMARRAY ||
529 eTok == ENUM ||
530 eTok == PROPERTY ||
531 eTok == GET ||
532 eTok == TYPEOF )
534 eCurTok = SYMBOL;
538 bEos = IsEoln( eCurTok );
539 return eCurTok;
542 bool SbiTokenizer::MayBeLabel( bool bNeedsColon )
544 if( eCurTok == SYMBOL || StaticTokenLabelInfo::get().canTokenBeLabel( eCurTok ) )
546 return !bNeedsColon || DoesColonFollow();
548 else
550 return ( eCurTok == NUMBER
551 && eScanType == SbxINTEGER
552 && nVal >= 0 );
557 OUString SbiTokenizer::GetKeywordCase( const OUString& sKeyword )
559 if( !nToken )
561 const TokenTable *tp;
562 for( nToken = 0, tp = pTokTable; tp->t; nToken++, tp++ )
565 const TokenTable* tp = pTokTable;
566 for( short i = 0; i < nToken; i++, tp++ )
568 if( sKeyword.equalsIgnoreAsciiCaseAscii(tp->s) )
569 return OStringToOUString(tp->s, RTL_TEXTENCODING_ASCII_US);
571 return OUString();
574 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */