nss: upgrade to release 3.73
[LibreOffice.git] / basic / source / comp / token.cxx
blobbf47a1b2aaa8c7a24652a5b3c27f68440c80692a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <array>
23 #include <basic/sberrors.hxx>
24 #include <rtl/instance.hxx>
25 #include <sal/macros.h>
26 #include <basiccharclass.hxx>
27 #include <token.hxx>
29 namespace {
31 struct TokenTable { SbiToken t; const char *s; };
35 const TokenTable aTokTable_Basic [] = {
36 { CAT, "&" },
37 { MUL, "*" },
38 { PLUS, "+" },
39 { MINUS, "-" },
40 { DIV, "/" },
41 { EOS, ":" },
42 { ASSIGN, ":=" },
43 { LT, "<" },
44 { LE, "<=" },
45 { NE, "<>" },
46 { EQ, "=" },
47 { GT, ">" },
48 { GE, ">=" },
49 { ACCESS, "Access" },
50 { ALIAS, "Alias" },
51 { AND, "And" },
52 { ANY, "Any" },
53 { APPEND, "Append" },
54 { AS, "As" },
55 { ATTRIBUTE,"Attribute" },
56 { BASE, "Base" },
57 { BINARY, "Binary" },
58 { TBOOLEAN, "Boolean" },
59 { BYREF, "ByRef", },
60 { TBYTE, "Byte", },
61 { BYVAL, "ByVal", },
62 { CALL, "Call" },
63 { CASE, "Case" },
64 { CDECL_, "Cdecl" },
65 { CLASSMODULE, "ClassModule" },
66 { CLOSE, "Close" },
67 { COMPARE, "Compare" },
68 { COMPATIBLE,"Compatible" },
69 { CONST_, "Const" },
70 { TCURRENCY,"Currency" },
71 { TDATE, "Date" },
72 { DECLARE, "Declare" },
73 { DEFBOOL, "DefBool" },
74 { DEFCUR, "DefCur" },
75 { DEFDATE, "DefDate" },
76 { DEFDBL, "DefDbl" },
77 { DEFERR, "DefErr" },
78 { DEFINT, "DefInt" },
79 { DEFLNG, "DefLng" },
80 { DEFOBJ, "DefObj" },
81 { DEFSNG, "DefSng" },
82 { DEFSTR, "DefStr" },
83 { DEFVAR, "DefVar" },
84 { DIM, "Dim" },
85 { DO, "Do" },
86 { TDOUBLE, "Double" },
87 { EACH, "Each" },
88 { ELSE, "Else" },
89 { ELSEIF, "ElseIf" },
90 { END, "End" },
91 { ENDENUM, "End Enum" },
92 { ENDFUNC, "End Function" },
93 { ENDIF, "End If" },
94 { ENDPROPERTY, "End Property" },
95 { ENDSELECT,"End Select" },
96 { ENDSUB, "End Sub" },
97 { ENDTYPE, "End Type" },
98 { ENDIF, "EndIf" },
99 { ENUM, "Enum" },
100 { EQV, "Eqv" },
101 { ERASE, "Erase" },
102 { ERROR_, "Error" },
103 { EXIT, "Exit" },
104 { BASIC_EXPLICIT, "Explicit" },
105 { FOR, "For" },
106 { FUNCTION, "Function" },
107 { GET, "Get" },
108 { GLOBAL, "Global" },
109 { GOSUB, "GoSub" },
110 { GOTO, "GoTo" },
111 { IF, "If" },
112 { IMP, "Imp" },
113 { IMPLEMENTS, "Implements" },
114 { IN_, "In" },
115 { INPUT, "Input" }, // also INPUT #
116 { TINTEGER, "Integer" },
117 { IS, "Is" },
118 { LET, "Let" },
119 { LIB, "Lib" },
120 { LIKE, "Like" },
121 { LINE, "Line" },
122 { LINEINPUT,"Line Input" },
123 { LOCAL, "Local" },
124 { LOCK, "Lock" },
125 { TLONG, "Long" },
126 { LOOP, "Loop" },
127 { LPRINT, "LPrint" },
128 { LSET, "LSet" }, // JSM
129 { MOD, "Mod" },
130 { NAME, "Name" },
131 { NEW, "New" },
132 { NEXT, "Next" },
133 { NOT, "Not" },
134 { TOBJECT, "Object" },
135 { ON, "On" },
136 { OPEN, "Open" },
137 { OPTION, "Option" },
138 { OPTIONAL_, "Optional" },
139 { OR, "Or" },
140 { OUTPUT, "Output" },
141 { PARAMARRAY, "ParamArray" },
142 { PRESERVE, "Preserve" },
143 { PRINT, "Print" },
144 { PRIVATE, "Private" },
145 { PROPERTY, "Property" },
146 { PTRSAFE, "PtrSafe" },
147 { PUBLIC, "Public" },
148 { RANDOM, "Random" },
149 { READ, "Read" },
150 { REDIM, "ReDim" },
151 { REM, "Rem" },
152 { RESUME, "Resume" },
153 { RETURN, "Return" },
154 { RSET, "RSet" }, // JSM
155 { SELECT, "Select" },
156 { SET, "Set" },
157 { SHARED, "Shared" },
158 { TSINGLE, "Single" },
159 { STATIC, "Static" },
160 { STEP, "Step" },
161 { STOP, "Stop" },
162 { TSTRING, "String" },
163 { SUB, "Sub" },
164 { STOP, "System" },
165 { TEXT, "Text" },
166 { THEN, "Then" },
167 { TO, "To", },
168 { TYPE, "Type" },
169 { TYPEOF, "TypeOf" },
170 { UNTIL, "Until" },
171 { TVARIANT, "Variant" },
172 { VBASUPPORT, "VbaSupport" },
173 { WEND, "Wend" },
174 { WHILE, "While" },
175 { WITH, "With" },
176 { WITHEVENTS, "WithEvents" },
177 { WRITE, "Write" }, // also WRITE #
178 { XOR, "Xor" },
181 namespace {
183 // #i109076
184 class TokenLabelInfo
186 std::array<bool,VBASUPPORT+1> m_pTokenCanBeLabelTab;
188 public:
189 TokenLabelInfo();
191 bool canTokenBeLabel( SbiToken eTok )
192 { return m_pTokenCanBeLabelTab[eTok]; }
195 class StaticTokenLabelInfo: public ::rtl::Static< TokenLabelInfo, StaticTokenLabelInfo >{};
199 // #i109076
200 TokenLabelInfo::TokenLabelInfo()
202 m_pTokenCanBeLabelTab.fill(false);
204 // Token accepted as label by VBA
205 static const SbiToken eLabelToken[] = { ACCESS, ALIAS, APPEND, BASE, BINARY, CLASSMODULE,
206 COMPARE, COMPATIBLE, DEFERR, ERROR_, BASIC_EXPLICIT, LIB, LINE, LPRINT, NAME,
207 TOBJECT, OUTPUT, PROPERTY, RANDOM, READ, STEP, STOP, TEXT, VBASUPPORT };
208 for( SbiToken eTok : eLabelToken )
210 m_pTokenCanBeLabelTab[eTok] = true;
215 SbiTokenizer::SbiTokenizer( const OUString& rSrc, StarBASIC* pb )
216 : SbiScanner(rSrc, pb)
217 , eCurTok(NIL)
218 , ePush(NIL)
219 , nPLine(0)
220 , nPCol1(0)
221 , nPCol2(0)
222 , bEof(false)
223 , bEos(true)
224 , bAs(false)
225 , bErrorIsSymbol(true)
229 void SbiTokenizer::Push( SbiToken t )
231 if( ePush != NIL )
232 Error( ERRCODE_BASIC_INTERNAL_ERROR, "PUSH" );
233 else ePush = t;
236 void SbiTokenizer::Error( ErrCode code, const OUString &aMsg )
238 aError = aMsg;
239 Error( code );
242 void SbiTokenizer::Error( ErrCode code, SbiToken tok )
244 aError = Symbol( tok );
245 Error( code );
248 // reading in the next token without absorbing it
250 SbiToken SbiTokenizer::Peek()
252 if( ePush == NIL )
254 sal_Int32 nOldLine = nLine;
255 sal_Int32 nOldCol1 = nCol1;
256 sal_Int32 nOldCol2 = nCol2;
257 ePush = Next();
258 nPLine = nLine; nLine = nOldLine;
259 nPCol1 = nCol1; nCol1 = nOldCol1;
260 nPCol2 = nCol2; nCol2 = nOldCol2;
262 eCurTok = ePush;
263 return eCurTok;
266 // For decompilation. Numbers and symbols return an empty string.
268 const OUString& SbiTokenizer::Symbol( SbiToken t )
270 // character token?
271 if( t < FIRSTKWD )
273 aSym = OUString(sal::static_int_cast<sal_Unicode>(t));
274 return aSym;
276 switch( t )
278 case NEG :
279 aSym = "-";
280 return aSym;
281 case EOS :
282 aSym = ":/CRLF";
283 return aSym;
284 case EOLN :
285 aSym = "CRLF";
286 return aSym;
287 default:
288 break;
290 for( auto& rTok : aTokTable_Basic )
292 if( rTok.t == t )
294 aSym = OStringToOUString(rTok.s, RTL_TEXTENCODING_ASCII_US);
295 return aSym;
298 const sal_Unicode *p = aSym.getStr();
299 if (*p <= ' ')
301 aSym = "???";
303 return aSym;
306 // Reading in the next token and put it down.
307 // Tokens that don't appear in the token table
308 // are directly returned as a character.
309 // Some words are treated in a special way.
311 SbiToken SbiTokenizer::Next()
313 if (bEof)
315 return EOLN;
317 // have read in one already?
318 if( ePush != NIL )
320 eCurTok = ePush;
321 ePush = NIL;
322 nLine = nPLine;
323 nCol1 = nPCol1;
324 nCol2 = nPCol2;
325 bEos = IsEoln( eCurTok );
326 return eCurTok;
328 const TokenTable *tp;
330 if( !NextSym() )
332 bEof = bEos = true;
333 eCurTok = EOLN;
334 return eCurTok;
337 if( aSym.startsWith("\n") )
339 bEos = true;
340 eCurTok = EOLN;
341 return eCurTok;
343 bEos = false;
345 if( bNumber )
347 eCurTok = NUMBER;
348 return eCurTok;
350 else if( ( eScanType == SbxDATE || eScanType == SbxSTRING ) && !bSymbol )
352 eCurTok = FIXSTRING;
353 return eCurTok;
355 else if( aSym.isEmpty() )
357 //something went wrong
358 bEof = bEos = true;
359 eCurTok = EOLN;
360 return eCurTok;
362 // Special cases of characters that are between "Z" and "a". ICompare()
363 // evaluates the position of these characters in different ways.
364 else if( aSym[0] == '^' )
366 eCurTok = EXPON;
367 return eCurTok;
369 else if( aSym[0] == '\\' )
371 eCurTok = IDIV;
372 return eCurTok;
374 else
376 if( eScanType != SbxVARIANT )
378 eCurTok = SYMBOL;
379 return eCurTok;
381 // valid token?
382 short lb = 0;
383 short ub = SAL_N_ELEMENTS(aTokTable_Basic)-1;
384 short delta;
387 delta = (ub - lb) >> 1;
388 tp = &aTokTable_Basic[ lb + delta ];
389 sal_Int32 res = aSym.compareToIgnoreAsciiCaseAscii( tp->s );
391 if( res == 0 )
393 goto special;
395 if( res < 0 )
397 if ((ub - lb) == 2)
399 ub = lb;
401 else
403 ub = ub - delta;
406 else
408 if ((ub -lb) == 2)
410 lb = ub;
412 else
414 lb = lb + delta;
418 while( delta );
419 // Symbol? if not >= token
420 sal_Unicode ch = aSym[0];
421 if( !BasicCharClass::isAlpha( ch, bCompatible ) && !bSymbol )
423 eCurTok = static_cast<SbiToken>(ch & 0x00FF);
424 return eCurTok;
426 eCurTok = SYMBOL;
427 return eCurTok;
429 special:
430 // #i92642
431 bool bStartOfLine = (eCurTok == NIL || eCurTok == REM || eCurTok == EOLN ||
432 eCurTok == THEN || eCurTok == ELSE); // single line If
433 if( !bStartOfLine && (tp->t == NAME || tp->t == LINE) )
435 eCurTok = SYMBOL;
436 return eCurTok;
438 else if( tp->t == TEXT )
440 eCurTok = SYMBOL;
441 return eCurTok;
443 // maybe we can expand this for other statements that have parameters
444 // that are keywords ( and those keywords are only used within such
445 // statements )
446 // what's happening here is that if we come across 'append' ( and we are
447 // not in the middle of parsing a special statement ( like 'Open')
448 // we just treat keyword 'append' as a normal 'SYMBOL'.
449 // Also we accept Dim APPEND
450 else if ( ( !bInStatement || eCurTok == DIM ) && tp->t == APPEND )
452 eCurTok = SYMBOL;
453 return eCurTok;
455 // #i92642: Special LINE token handling -> SbiParser::Line()
457 // END IF, CASE, SUB, DEF, FUNCTION, TYPE, CLASS, WITH
458 if( tp->t == END )
460 // from 15.3.96, special treatment for END, at Peek() the current
461 // time is lost, so memorize everything and restore after
462 sal_Int32 nOldLine = nLine;
463 sal_Int32 nOldCol = nCol;
464 sal_Int32 nOldCol1 = nCol1;
465 sal_Int32 nOldCol2 = nCol2;
466 OUString aOldSym = aSym;
467 SaveLine(); // save pLine in the scanner
469 eCurTok = Peek();
470 switch( eCurTok )
472 case IF: Next(); eCurTok = ENDIF; break;
473 case SELECT: Next(); eCurTok = ENDSELECT; break;
474 case SUB: Next(); eCurTok = ENDSUB; break;
475 case FUNCTION: Next(); eCurTok = ENDFUNC; break;
476 case PROPERTY: Next(); eCurTok = ENDPROPERTY; break;
477 case TYPE: Next(); eCurTok = ENDTYPE; break;
478 case ENUM: Next(); eCurTok = ENDENUM; break;
479 case WITH: Next(); eCurTok = ENDWITH; break;
480 default : eCurTok = END; break;
482 nCol1 = nOldCol1;
483 if( eCurTok == END )
485 // reset everything so that token is read completely newly after END
486 ePush = NIL;
487 nLine = nOldLine;
488 nCol = nOldCol;
489 nCol2 = nOldCol2;
490 aSym = aOldSym;
491 RestoreLine();
493 return eCurTok;
495 // are data types keywords?
496 // there is ERROR(), DATA(), STRING() etc.
497 eCurTok = tp->t;
498 // AS: data types are keywords
499 if( tp->t == AS )
501 bAs = true;
503 else
505 if( bAs )
507 bAs = false;
509 else if( eCurTok >= DATATYPE1 && eCurTok <= DATATYPE2 && (bErrorIsSymbol || eCurTok != ERROR_) )
511 eCurTok = SYMBOL;
515 // CLASSMODULE, PROPERTY, GET, ENUM token only visible in compatible mode
516 SbiToken eTok = tp->t;
517 if( bCompatible )
519 // #129904 Suppress system
520 if( eTok == STOP && aSym.equalsIgnoreAsciiCase("system") )
522 eCurTok = SYMBOL;
524 if( eTok == GET && bStartOfLine )
526 eCurTok = SYMBOL;
529 else
531 if( eTok == CLASSMODULE ||
532 eTok == IMPLEMENTS ||
533 eTok == PARAMARRAY ||
534 eTok == ENUM ||
535 eTok == PROPERTY ||
536 eTok == GET ||
537 eTok == TYPEOF )
539 eCurTok = SYMBOL;
543 bEos = IsEoln( eCurTok );
544 return eCurTok;
547 bool SbiTokenizer::MayBeLabel( bool bNeedsColon )
549 if( eCurTok == SYMBOL || StaticTokenLabelInfo::get().canTokenBeLabel( eCurTok ) )
551 return !bNeedsColon || DoesColonFollow();
553 else
555 return ( eCurTok == NUMBER
556 && eScanType == SbxINTEGER
557 && nVal >= 0 );
562 OUString SbiTokenizer::GetKeywordCase( const OUString& sKeyword )
564 for( auto& rTok : aTokTable_Basic )
566 if( sKeyword.equalsIgnoreAsciiCaseAscii(rTok.s) )
567 return OStringToOUString(rTok.s, RTL_TEXTENCODING_ASCII_US);
569 return OUString();
572 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */