1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <basiccharclass.hxx>
21 #include <scanner.hxx>
22 #include <sbintern.hxx>
23 #include <runtime.hxx>
25 #include <basic/sberrors.hxx>
26 #include <i18nlangtag/lang.h>
27 #include <svl/zforlist.hxx>
28 #include <rtl/character.hxx>
30 SbiScanner::SbiScanner( const OUString
& rBuf
, StarBASIC
* p
) : aBuf( rBuf
)
35 eScanType
= SbxVARIANT
;
52 bPrevLineExtentsComment
= false;
57 void SbiScanner::LockColumn()
63 void SbiScanner::UnlockColumn()
69 void SbiScanner::GenError( ErrCode code
)
71 if( GetSbData()->bBlockCompilerError
)
79 // report only one error per statement
83 // in case of EXPECTED or UNEXPECTED it always refers
84 // to the last token, so take the Col1 over
85 sal_Int32 nc
= nColLock
? nSavedCol1
: nCol1
;
87 ERRCODE_BASIC_EXPECTED
,
88 ERRCODE_BASIC_UNEXPECTED
,
89 ERRCODE_BASIC_SYMBOL_EXPECTED
,
90 ERRCODE_BASIC_LABEL_EXPECTED
) )
93 if( nc
> nCol2
) nCol2
= nc
;
95 bRes
= pBasic
->CError( code
, aError
, nLine
, nc
, nCol2
);
97 bAbort
= bAbort
|| !bRes
|| ( code
== ERRCODE_BASIC_NO_MEMORY
|| code
== ERRCODE_BASIC_PROG_TOO_LARGE
);
103 // used by SbiTokenizer::MayBeLabel() to detect a label
104 bool SbiScanner::DoesColonFollow()
106 if(nCol
< aLine
.getLength() && aLine
[nCol
] == ':')
115 // test for legal suffix
116 static SbxDataType
GetSuffixType( sal_Unicode c
)
137 // reading the next symbol into the variables aSym, nVal and eType
138 // return value is sal_False at EOF or errors
141 void SbiScanner::scanAlphanumeric()
144 while(nCol
< aLine
.getLength() && (BasicCharClass::isAlphaNumeric(aLine
[nCol
], bCompatible
) || aLine
[nCol
] == '_'))
149 aSym
= aLine
.copy(n
, nCol
- n
);
152 void SbiScanner::scanGoto()
155 while(n
< aLine
.getLength() && BasicCharClass::isWhitespace(aLine
[n
]))
158 if(n
+ 1 < aLine
.getLength())
160 OUString aTemp
= aLine
.copy(n
, 2);
161 if(aTemp
.equalsIgnoreAsciiCase("to"))
164 nLineIdx
+= n
+ 2 - nCol
;
170 bool SbiScanner::readLine()
172 if(nBufPos
>= aBuf
.getLength())
175 sal_Int32 n
= nBufPos
;
176 sal_Int32 nLen
= aBuf
.getLength();
178 while(n
< nLen
&& aBuf
[n
] != '\r' && aBuf
[n
] != '\n')
181 // Trim trailing whitespace
183 while(nBufPos
< nEnd
&& BasicCharClass::isWhitespace(aBuf
[nEnd
- 1]))
186 aLine
= aBuf
.copy(nBufPos
, nEnd
- nBufPos
);
188 // Fast-forward past the line ending
189 if(n
+ 1 < nLen
&& aBuf
[n
] == '\r' && aBuf
[n
+ 1] == '\n')
198 nCol
= nCol1
= nCol2
= 0;
204 bool SbiScanner::NextSym()
206 // memorize for the EOLN-case
207 sal_Int32 nOldLine
= nLine
;
208 sal_Int32 nOldCol1
= nCol1
;
209 sal_Int32 nOldCol2
= nCol2
;
210 sal_Unicode buf
[ BUF_SIZE
], *p
= buf
;
212 eScanType
= SbxVARIANT
;
214 bHash
= bSymbol
= bNumber
= bSpaces
= false;
215 bool bCompilerDirective
= false;
224 nOldCol1
= nOldCol2
= 0;
227 const sal_Int32 nLineIdxScanStart
= nLineIdx
;
229 if(nCol
< aLine
.getLength() && BasicCharClass::isWhitespace(aLine
[nCol
]))
232 while(nCol
< aLine
.getLength() && BasicCharClass::isWhitespace(aLine
[nCol
]))
242 if(nCol
>= aLine
.getLength())
245 if( bPrevLineExtentsComment
)
246 goto PrevLineCommentLbl
;
248 if(nCol
< aLine
.getLength() && aLine
[nCol
] == '#')
250 sal_Int32 nLineTempIdx
= nLineIdx
;
254 } while (nLineTempIdx
< aLine
.getLength() && !BasicCharClass::isWhitespace(aLine
[nLineTempIdx
]) && aLine
[nLineTempIdx
] != '#');
255 // leave it if it is a date literal - it will be handled later
256 if (nLineTempIdx
>= aLine
.getLength() || aLine
[nLineTempIdx
] != '#')
260 //ignore compiler directives (# is first non-space character)
262 bCompilerDirective
= true;
268 // copy character if symbol
269 if(nCol
< aLine
.getLength() && (BasicCharClass::isAlpha(aLine
[nCol
], bCompatible
) || aLine
[nCol
] == '_'))
271 // if there's nothing behind '_' , it's the end of a line!
272 if(nCol
+ 1 == aLine
.getLength() && aLine
[nCol
] == '_')
274 // Note that nCol is not incremented here...
283 // Special handling for "go to"
284 if(nCol
< aLine
.getLength() && bCompatible
&& aSym
.equalsIgnoreAsciiCase("go"))
287 // replace closing '_' by space when end of line is following
288 // (wrong line continuation otherwise)
289 if (nCol
== aLine
.getLength() && aLine
[nCol
- 1] == '_')
291 // We are going to modify a potentially shared string, so force
292 // a copy, so that aSym is not modified by the following operation
293 OUString
aSymCopy( aSym
.getStr(), aSym
.getLength() );
296 // HACK: modifying a potentially shared string here!
297 const_cast<sal_Unicode
*>(aLine
.getStr())[nLineIdx
- 1] = ' ';
301 // don't test the exclamation mark
302 // if there's a symbol behind it
303 else if((nCol
>= aLine
.getLength() || aLine
[nCol
] != '!') ||
304 (nCol
+ 1 >= aLine
.getLength() || !BasicCharClass::isAlpha(aLine
[nCol
+ 1], bCompatible
)))
306 if(nCol
< aLine
.getLength())
308 SbxDataType
t(GetSuffixType(aLine
[nCol
]));
309 if( t
!= SbxVARIANT
)
319 // read in and convert if number
320 else if((nCol
< aLine
.getLength() && rtl::isAsciiDigit(aLine
[nCol
])) ||
321 (nCol
+ 1 < aLine
.getLength() && aLine
[nCol
] == '.' && rtl::isAsciiDigit(aLine
[nCol
+ 1])))
325 eScanType
= SbxDOUBLE
;
326 bool bScanError
= false;
327 bool bBufOverflow
= false;
328 // All this because of 'D' or 'd' floating point type, sigh...
329 while(!bScanError
&& nCol
< aLine
.getLength() && strchr("0123456789.DEde", aLine
[nCol
]))
331 // from 4.1.1996: buffer full? -> go on scanning empty
332 if( (p
-buf
) == (BUF_SIZE
-1) )
339 // point or exponent?
340 if(aLine
[nCol
] == '.')
347 else if(strchr("DdEe", aLine
[nCol
]))
354 if (nCol
+ 1 < aLine
.getLength() && (aLine
[nCol
+1] == '+' || aLine
[nCol
+1] == '-'))
358 if( (p
-buf
) == (BUF_SIZE
-1) )
375 aSym
= p
; bNumber
= true;
377 // For bad characters, scan and parse errors generate only one error.
378 ErrCode nError
= ERRCODE_NONE
;
383 aError
= OUString( aLine
[nCol
]);
384 nError
= ERRCODE_BASIC_BAD_CHAR_IN_NUMBER
;
387 rtl_math_ConversionStatus eStatus
= rtl_math_ConversionStatus_Ok
;
388 const sal_Unicode
* pParseEnd
= buf
;
389 nVal
= rtl_math_uStringToDouble( buf
, buf
+(p
-buf
), '.', ',', &eStatus
, &pParseEnd
);
390 if (pParseEnd
!= buf
+(p
-buf
))
392 // e.g. "12e" or "12e+", or with bScanError "12d"+"E".
393 sal_Int32 nChars
= buf
+(p
-buf
) - pParseEnd
;
396 // For bScanError, nLineIdx and nCol were already decremented, just
397 // add that character to the parse end.
400 // Copy error position from original string, not the buffer
401 // replacement where "12dE" => "12EE".
402 aError
= aLine
.copy( nCol
, nChars
);
403 nError
= ERRCODE_BASIC_BAD_CHAR_IN_NUMBER
;
405 else if (eStatus
!= rtl_math_ConversionStatus_Ok
)
407 // Keep the scan error and character at position, if any.
409 nError
= ERRCODE_BASIC_MATH_OVERFLOW
;
417 if( nVal
>= SbxMININT
&& nVal
<= SbxMAXINT
)
418 eScanType
= SbxINTEGER
;
419 else if( nVal
>= SbxMINLNG
&& nVal
<= SbxMAXLNG
)
424 GenError( ERRCODE_BASIC_MATH_OVERFLOW
);
427 if( nCol
< aLine
.getLength() )
429 SbxDataType
t(GetSuffixType(aLine
[nCol
]));
430 if( t
!= SbxVARIANT
)
439 // Hex/octal number? Read in and convert:
440 else if(aLine
.getLength() - nCol
> 1 && aLine
[nCol
] == '&')
443 sal_Unicode base
= 16;
444 sal_Unicode xch
= aLine
[nCol
];
446 switch( rtl::toAsciiUpperCase( xch
) )
454 // treated as an operator
455 --nLineIdx
; --nCol
; nCol1
= nCol
-1;
460 // Hex literals are signed Integers ( as defined by basic
461 // e.g. -2,147,483,648 through 2,147,483,647 (signed)
463 bool bOverflow
= false;
464 while(nCol
< aLine
.getLength() && BasicCharClass::isAlphaNumeric(aLine
[nCol
], false))
466 sal_Unicode ch
= rtl::toAsciiUpperCase(aLine
[nCol
]);
468 if( ((base
== 16 ) && rtl::isAsciiHexDigit( ch
) ) ||
469 ((base
== 8) && rtl::isAsciiOctalDigit( ch
)))
473 lu
= ( lu
* base
) + i
;
474 if( lu
> SAL_MAX_UINT32
)
481 aError
= OUString(ch
);
482 GenError( ERRCODE_BASIC_BAD_CHAR_IN_NUMBER
);
485 if(nCol
< aLine
.getLength() && aLine
[nCol
] == '&')
490 // tdf#62326 - If the value of the hex string lies within the range of 0x8000 (SbxMAXINT + 1)
491 // and 0xFFFF (SbxMAXUINT) inclusive, cast the value to 16 bit in order to get
492 // signed integers, e.g., SbxMININT through SbxMAXINT
493 sal_Int32 ls
= (lu
> SbxMAXINT
&& lu
<= SbxMAXUINT
) ? static_cast<sal_Int16
>(lu
) : static_cast<sal_Int32
>(lu
);
494 nVal
= static_cast<double>(ls
);
495 eScanType
= ( ls
>= SbxMININT
&& ls
<= SbxMAXINT
) ? SbxINTEGER
: SbxLONG
;
497 GenError( ERRCODE_BASIC_MATH_OVERFLOW
);
501 else if (nLineIdx
< aLine
.getLength() && (aLine
[nLineIdx
] == '"' || aLine
[nLineIdx
] == '['))
503 sal_Unicode cSep
= aLine
[nLineIdx
];
509 sal_Int32 n
= nCol
+ 1;
510 while (nLineIdx
< aLine
.getLength())
517 while (nLineIdx
< aLine
.getLength() && (aLine
[nLineIdx
] != cSep
));
518 if (nLineIdx
< aLine
.getLength() && aLine
[nLineIdx
] == cSep
)
521 if (nLineIdx
>= aLine
.getLength() || aLine
[nLineIdx
] != cSep
|| cSep
== ']')
523 // If VBA Interop then doesn't eat the [] chars
524 if ( cSep
== ']' && bVBASupportOn
)
525 aSym
= aLine
.copy( n
- 1, nCol
- n
+ 1);
527 aSym
= aLine
.copy( n
, nCol
- n
- 1 );
528 // get out duplicate string delimiters
529 OUStringBuffer
aSymBuf(aSym
.getLength());
530 for ( sal_Int32 i
= 0, len
= aSym
.getLength(); i
< len
; ++i
)
532 aSymBuf
.append( aSym
[i
] );
533 if ( aSym
[i
] == cSep
&& ( i
+1 < len
) && aSym
[i
+1] == cSep
)
536 aSym
= aSymBuf
.makeStringAndClear();
538 eScanType
= SbxSTRING
;
544 aError
= OUString(cSep
);
545 GenError( ERRCODE_BASIC_EXPECTED
);
551 else if (nLineIdx
< aLine
.getLength() && aLine
[nLineIdx
] == '#')
553 sal_Int32 n
= nCol
+ 1;
559 while (nLineIdx
< aLine
.getLength() && (aLine
[nLineIdx
] != '#'));
560 if (nLineIdx
< aLine
.getLength() && aLine
[nLineIdx
] == '#')
563 aSym
= aLine
.copy( n
, nCol
- n
- 1 );
565 // parse date literal
566 std::shared_ptr
<SvNumberFormatter
> pFormatter
;
567 if (GetSbData()->pInst
)
569 pFormatter
= GetSbData()->pInst
->GetNumberFormatter();
574 pFormatter
= SbiInstance::PrepareNumberFormatter( nDummy
, nDummy
, nDummy
);
576 sal_uInt32 nIndex
= pFormatter
->GetStandardIndex( LANGUAGE_ENGLISH_US
);
577 bool bSuccess
= pFormatter
->IsNumberFormat(aSym
, nIndex
, nVal
);
580 SvNumFormatType nType_
= pFormatter
->GetType(nIndex
);
581 if( !(nType_
& SvNumFormatType::DATE
) )
586 GenError( ERRCODE_BASIC_CONVERSION
);
589 eScanType
= SbxDOUBLE
;
593 aError
= OUString('#');
594 GenError( ERRCODE_BASIC_EXPECTED
);
597 // invalid characters:
598 else if (nLineIdx
< aLine
.getLength() && aLine
[nLineIdx
] >= 0x7F)
600 GenError( ERRCODE_BASIC_SYNTAX
); nLineIdx
++; nCol
++;
606 auto nChar
= nLineIdx
< aLine
.getLength() ? aLine
[nLineIdx
] : 0;
608 if (nLineIdx
< aLine
.getLength())
612 case '<': if( aLine
[nLineIdx
] == '>' || aLine
[nLineIdx
] == '=' ) n
= 2; break;
613 case '>': if( aLine
[nLineIdx
] == '=' ) n
= 2; break;
614 case ':': if( aLine
[nLineIdx
] == '=' ) n
= 2; break;
617 aSym
= aLine
.copy(nCol
, std::min(n
, aLine
.getLength() - nCol
));
618 nLineIdx
+= n
-1; nCol
= nCol
+ n
;
625 if( bPrevLineExtentsComment
|| (eScanType
!= SbxSTRING
&&
626 ( bCompilerDirective
||
627 aSym
.startsWith("'") ||
628 aSym
.equalsIgnoreAsciiCase( "REM" ) ) ) )
630 bPrevLineExtentsComment
= false;
632 sal_Int32 nLen
= aLine
.getLength() - nLineIdx
;
633 if( bCompatible
&& aLine
[nLineIdx
+ nLen
- 1] == '_' && aLine
[nLineIdx
+ nLen
- 2] == ' ' )
634 bPrevLineExtentsComment
= true;
635 nCol2
= nCol2
+ nLen
;
639 if (nLineIdx
== nLineIdxScanStart
)
641 GenError( ERRCODE_BASIC_SYMBOL_EXPECTED
);
649 if( nCol
&& aLine
[--nLineIdx
] == '_' )
652 bool bRes
= NextSym();
653 if( aSym
.startsWith(".") )
657 // ^^^ <- spaces is legal in MSO VBA
674 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */