Version 6.4.0.3, tag libreoffice-6.4.0.3
[LibreOffice.git] / basic / source / comp / token.cxx
blob2e014182785892361a305e06f4915c77ed089bb9
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <array>
23 #include <basic/sberrors.hxx>
24 #include <rtl/instance.hxx>
25 #include <sal/macros.h>
26 #include <basiccharclass.hxx>
27 #include <token.hxx>
29 struct TokenTable { SbiToken t; const char *s; };
31 static const TokenTable aTokTable_Basic [] = {
32 { CAT, "&" },
33 { MUL, "*" },
34 { PLUS, "+" },
35 { MINUS, "-" },
36 { DIV, "/" },
37 { EOS, ":" },
38 { ASSIGN, ":=" },
39 { LT, "<" },
40 { LE, "<=" },
41 { NE, "<>" },
42 { EQ, "=" },
43 { GT, ">" },
44 { GE, ">=" },
45 { ACCESS, "Access" },
46 { ALIAS, "Alias" },
47 { AND, "And" },
48 { ANY, "Any" },
49 { APPEND, "Append" },
50 { AS, "As" },
51 { ATTRIBUTE,"Attribute" },
52 { BASE, "Base" },
53 { BINARY, "Binary" },
54 { TBOOLEAN, "Boolean" },
55 { BYREF, "ByRef", },
56 { TBYTE, "Byte", },
57 { BYVAL, "ByVal", },
58 { CALL, "Call" },
59 { CASE, "Case" },
60 { CDECL_, "Cdecl" },
61 { CLASSMODULE, "ClassModule" },
62 { CLOSE, "Close" },
63 { COMPARE, "Compare" },
64 { COMPATIBLE,"Compatible" },
65 { CONST_, "Const" },
66 { TCURRENCY,"Currency" },
67 { TDATE, "Date" },
68 { DECLARE, "Declare" },
69 { DEFBOOL, "DefBool" },
70 { DEFCUR, "DefCur" },
71 { DEFDATE, "DefDate" },
72 { DEFDBL, "DefDbl" },
73 { DEFERR, "DefErr" },
74 { DEFINT, "DefInt" },
75 { DEFLNG, "DefLng" },
76 { DEFOBJ, "DefObj" },
77 { DEFSNG, "DefSng" },
78 { DEFSTR, "DefStr" },
79 { DEFVAR, "DefVar" },
80 { DIM, "Dim" },
81 { DO, "Do" },
82 { TDOUBLE, "Double" },
83 { EACH, "Each" },
84 { ELSE, "Else" },
85 { ELSEIF, "ElseIf" },
86 { END, "End" },
87 { ENDENUM, "End Enum" },
88 { ENDFUNC, "End Function" },
89 { ENDIF, "End If" },
90 { ENDPROPERTY, "End Property" },
91 { ENDSELECT,"End Select" },
92 { ENDSUB, "End Sub" },
93 { ENDTYPE, "End Type" },
94 { ENDIF, "EndIf" },
95 { ENUM, "Enum" },
96 { EQV, "Eqv" },
97 { ERASE, "Erase" },
98 { ERROR_, "Error" },
99 { EXIT, "Exit" },
100 { BASIC_EXPLICIT, "Explicit" },
101 { FOR, "For" },
102 { FUNCTION, "Function" },
103 { GET, "Get" },
104 { GLOBAL, "Global" },
105 { GOSUB, "GoSub" },
106 { GOTO, "GoTo" },
107 { IF, "If" },
108 { IMP, "Imp" },
109 { IMPLEMENTS, "Implements" },
110 { IN_, "In" },
111 { INPUT, "Input" }, // also INPUT #
112 { TINTEGER, "Integer" },
113 { IS, "Is" },
114 { LET, "Let" },
115 { LIB, "Lib" },
116 { LIKE, "Like" },
117 { LINE, "Line" },
118 { LINEINPUT,"Line Input" },
119 { LOCAL, "Local" },
120 { LOCK, "Lock" },
121 { TLONG, "Long" },
122 { LOOP, "Loop" },
123 { LPRINT, "LPrint" },
124 { LSET, "LSet" }, // JSM
125 { MOD, "Mod" },
126 { NAME, "Name" },
127 { NEW, "New" },
128 { NEXT, "Next" },
129 { NOT, "Not" },
130 { TOBJECT, "Object" },
131 { ON, "On" },
132 { OPEN, "Open" },
133 { OPTION, "Option" },
134 { OPTIONAL_, "Optional" },
135 { OR, "Or" },
136 { OUTPUT, "Output" },
137 { PARAMARRAY, "ParamArray" },
138 { PRESERVE, "Preserve" },
139 { PRINT, "Print" },
140 { PRIVATE, "Private" },
141 { PROPERTY, "Property" },
142 { PTRSAFE, "PtrSafe" },
143 { PUBLIC, "Public" },
144 { RANDOM, "Random" },
145 { READ, "Read" },
146 { REDIM, "ReDim" },
147 { REM, "Rem" },
148 { RESUME, "Resume" },
149 { RETURN, "Return" },
150 { RSET, "RSet" }, // JSM
151 { SELECT, "Select" },
152 { SET, "Set" },
153 { SHARED, "Shared" },
154 { TSINGLE, "Single" },
155 { STATIC, "Static" },
156 { STEP, "Step" },
157 { STOP, "Stop" },
158 { TSTRING, "String" },
159 { SUB, "Sub" },
160 { STOP, "System" },
161 { TEXT, "Text" },
162 { THEN, "Then" },
163 { TO, "To", },
164 { TYPE, "Type" },
165 { TYPEOF, "TypeOf" },
166 { UNTIL, "Until" },
167 { TVARIANT, "Variant" },
168 { VBASUPPORT, "VbaSupport" },
169 { WEND, "Wend" },
170 { WHILE, "While" },
171 { WITH, "With" },
172 { WITHEVENTS, "WithEvents" },
173 { WRITE, "Write" }, // also WRITE #
174 { XOR, "Xor" },
177 // #i109076
178 class TokenLabelInfo
180 std::array<bool,VBASUPPORT+1> m_pTokenCanBeLabelTab;
182 public:
183 TokenLabelInfo();
185 bool canTokenBeLabel( SbiToken eTok )
186 { return m_pTokenCanBeLabelTab[eTok]; }
189 class StaticTokenLabelInfo: public ::rtl::Static< TokenLabelInfo, StaticTokenLabelInfo >{};
191 // #i109076
192 TokenLabelInfo::TokenLabelInfo()
194 m_pTokenCanBeLabelTab.fill(false);
196 // Token accepted as label by VBA
197 static const SbiToken eLabelToken[] = { ACCESS, ALIAS, APPEND, BASE, BINARY, CLASSMODULE,
198 COMPARE, COMPATIBLE, DEFERR, ERROR_, BASIC_EXPLICIT, LIB, LINE, LPRINT, NAME,
199 TOBJECT, OUTPUT, PROPERTY, RANDOM, READ, STEP, STOP, TEXT, VBASUPPORT };
200 for( SbiToken eTok : eLabelToken )
202 m_pTokenCanBeLabelTab[eTok] = true;
207 SbiTokenizer::SbiTokenizer( const OUString& rSrc, StarBASIC* pb )
208 : SbiScanner(rSrc, pb)
209 , eCurTok(NIL)
210 , ePush(NIL)
211 , nPLine(0)
212 , nPCol1(0)
213 , nPCol2(0)
214 , bEof(false)
215 , bEos(true)
216 , bAs(false)
217 , bErrorIsSymbol(true)
221 void SbiTokenizer::Push( SbiToken t )
223 if( ePush != NIL )
224 Error( ERRCODE_BASIC_INTERNAL_ERROR, "PUSH" );
225 else ePush = t;
228 void SbiTokenizer::Error( ErrCode code, const OUString &aMsg )
230 aError = aMsg;
231 Error( code );
234 void SbiTokenizer::Error( ErrCode code, SbiToken tok )
236 aError = Symbol( tok );
237 Error( code );
240 // reading in the next token without absorbing it
242 SbiToken SbiTokenizer::Peek()
244 if( ePush == NIL )
246 sal_Int32 nOldLine = nLine;
247 sal_Int32 nOldCol1 = nCol1;
248 sal_Int32 nOldCol2 = nCol2;
249 ePush = Next();
250 nPLine = nLine; nLine = nOldLine;
251 nPCol1 = nCol1; nCol1 = nOldCol1;
252 nPCol2 = nCol2; nCol2 = nOldCol2;
254 return eCurTok = ePush;
257 // For decompilation. Numbers and symbols return an empty string.
259 const OUString& SbiTokenizer::Symbol( SbiToken t )
261 // character token?
262 if( t < FIRSTKWD )
264 aSym = OUString(sal::static_int_cast<sal_Unicode>(t));
265 return aSym;
267 switch( t )
269 case NEG :
270 aSym = "-";
271 return aSym;
272 case EOS :
273 aSym = ":/CRLF";
274 return aSym;
275 case EOLN :
276 aSym = "CRLF";
277 return aSym;
278 default:
279 break;
281 for( auto& rTok : aTokTable_Basic )
283 if( rTok.t == t )
285 aSym = OStringToOUString(rTok.s, RTL_TEXTENCODING_ASCII_US);
286 return aSym;
289 const sal_Unicode *p = aSym.getStr();
290 if (*p <= ' ')
292 aSym = "???";
294 return aSym;
297 // Reading in the next token and put it down.
298 // Tokens that don't appear in the token table
299 // are directly returned as a character.
300 // Some words are treated in a special way.
302 SbiToken SbiTokenizer::Next()
304 if (bEof)
306 return EOLN;
308 // have read in one already?
309 if( ePush != NIL )
311 eCurTok = ePush;
312 ePush = NIL;
313 nLine = nPLine;
314 nCol1 = nPCol1;
315 nCol2 = nPCol2;
316 bEos = IsEoln( eCurTok );
317 return eCurTok;
319 const TokenTable *tp;
321 if( !NextSym() )
323 bEof = bEos = true;
324 return eCurTok = EOLN;
327 if( aSym.startsWith("\n") )
329 bEos = true;
330 return eCurTok = EOLN;
332 bEos = false;
334 if( bNumber )
336 return eCurTok = NUMBER;
338 else if( ( eScanType == SbxDATE || eScanType == SbxSTRING ) && !bSymbol )
340 return eCurTok = FIXSTRING;
342 else if( aSym.isEmpty() )
344 //something went wrong
345 bEof = bEos = true;
346 return eCurTok = EOLN;
348 // Special cases of characters that are between "Z" and "a". ICompare()
349 // evaluates the position of these characters in different ways.
350 else if( aSym[0] == '^' )
352 return eCurTok = EXPON;
354 else if( aSym[0] == '\\' )
356 return eCurTok = IDIV;
358 else
360 if( eScanType != SbxVARIANT )
361 return eCurTok = SYMBOL;
362 // valid token?
363 short lb = 0;
364 short ub = SAL_N_ELEMENTS(aTokTable_Basic)-1;
365 short delta;
368 delta = (ub - lb) >> 1;
369 tp = &aTokTable_Basic[ lb + delta ];
370 sal_Int32 res = aSym.compareToIgnoreAsciiCaseAscii( tp->s );
372 if( res == 0 )
374 goto special;
376 if( res < 0 )
378 if ((ub - lb) == 2)
380 ub = lb;
382 else
384 ub = ub - delta;
387 else
389 if ((ub -lb) == 2)
391 lb = ub;
393 else
395 lb = lb + delta;
399 while( delta );
400 // Symbol? if not >= token
401 sal_Unicode ch = aSym[0];
402 if( !BasicCharClass::isAlpha( ch, bCompatible ) && !bSymbol )
404 return eCurTok = static_cast<SbiToken>(ch & 0x00FF);
406 return eCurTok = SYMBOL;
408 special:
409 // #i92642
410 bool bStartOfLine = (eCurTok == NIL || eCurTok == REM || eCurTok == EOLN ||
411 eCurTok == THEN || eCurTok == ELSE); // single line If
412 if( !bStartOfLine && (tp->t == NAME || tp->t == LINE) )
414 return eCurTok = SYMBOL;
416 else if( tp->t == TEXT )
418 return eCurTok = SYMBOL;
420 // maybe we can expand this for other statements that have parameters
421 // that are keywords ( and those keywords are only used within such
422 // statements )
423 // what's happening here is that if we come across 'append' ( and we are
424 // not in the middle of parsing a special statement ( like 'Open')
425 // we just treat keyword 'append' as a normal 'SYMBOL'.
426 // Also we accept Dim APPEND
427 else if ( ( !bInStatement || eCurTok == DIM ) && tp->t == APPEND )
429 return eCurTok = SYMBOL;
431 // #i92642: Special LINE token handling -> SbiParser::Line()
433 // END IF, CASE, SUB, DEF, FUNCTION, TYPE, CLASS, WITH
434 if( tp->t == END )
436 // from 15.3.96, special treatment for END, at Peek() the current
437 // time is lost, so memorize everything and restore after
438 sal_Int32 nOldLine = nLine;
439 sal_Int32 nOldCol = nCol;
440 sal_Int32 nOldCol1 = nCol1;
441 sal_Int32 nOldCol2 = nCol2;
442 OUString aOldSym = aSym;
443 SaveLine(); // save pLine in the scanner
445 eCurTok = Peek();
446 switch( eCurTok )
448 case IF: Next(); eCurTok = ENDIF; break;
449 case SELECT: Next(); eCurTok = ENDSELECT; break;
450 case SUB: Next(); eCurTok = ENDSUB; break;
451 case FUNCTION: Next(); eCurTok = ENDFUNC; break;
452 case PROPERTY: Next(); eCurTok = ENDPROPERTY; break;
453 case TYPE: Next(); eCurTok = ENDTYPE; break;
454 case ENUM: Next(); eCurTok = ENDENUM; break;
455 case WITH: Next(); eCurTok = ENDWITH; break;
456 default : eCurTok = END; break;
458 nCol1 = nOldCol1;
459 if( eCurTok == END )
461 // reset everything so that token is read completely newly after END
462 ePush = NIL;
463 nLine = nOldLine;
464 nCol = nOldCol;
465 nCol2 = nOldCol2;
466 aSym = aOldSym;
467 RestoreLine();
469 return eCurTok;
471 // are data types keywords?
472 // there is ERROR(), DATA(), STRING() etc.
473 eCurTok = tp->t;
474 // AS: data types are keywords
475 if( tp->t == AS )
477 bAs = true;
479 else
481 if( bAs )
483 bAs = false;
485 else if( eCurTok >= DATATYPE1 && eCurTok <= DATATYPE2 && (bErrorIsSymbol || eCurTok != ERROR_) )
487 eCurTok = SYMBOL;
491 // CLASSMODULE, PROPERTY, GET, ENUM token only visible in compatible mode
492 SbiToken eTok = tp->t;
493 if( bCompatible )
495 // #129904 Suppress system
496 if( eTok == STOP && aSym.equalsIgnoreAsciiCase("system") )
498 eCurTok = SYMBOL;
500 if( eTok == GET && bStartOfLine )
502 eCurTok = SYMBOL;
505 else
507 if( eTok == CLASSMODULE ||
508 eTok == IMPLEMENTS ||
509 eTok == PARAMARRAY ||
510 eTok == ENUM ||
511 eTok == PROPERTY ||
512 eTok == GET ||
513 eTok == TYPEOF )
515 eCurTok = SYMBOL;
519 bEos = IsEoln( eCurTok );
520 return eCurTok;
523 bool SbiTokenizer::MayBeLabel( bool bNeedsColon )
525 if( eCurTok == SYMBOL || StaticTokenLabelInfo::get().canTokenBeLabel( eCurTok ) )
527 return !bNeedsColon || DoesColonFollow();
529 else
531 return ( eCurTok == NUMBER
532 && eScanType == SbxINTEGER
533 && nVal >= 0 );
538 OUString SbiTokenizer::GetKeywordCase( const OUString& sKeyword )
540 for( auto& rTok : aTokTable_Basic )
542 if( sKeyword.equalsIgnoreAsciiCaseAscii(rTok.s) )
543 return OStringToOUString(rTok.s, RTL_TEXTENCODING_ASCII_US);
545 return OUString();
548 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */