1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
26 #include "refdata.hxx"
28 #include <formula/token.hxx>
29 #include <formula/grammar.hxx>
30 #include <rtl/ustrbuf.hxx>
31 #include <com/sun/star/sheet/ExternalLinkInfo.hpp>
32 #include <com/sun/star/i18n/ParseResult.hpp>
36 #include <unordered_set>
38 #include <com/sun/star/uno/Sequence.hxx>
39 #include <o3tl/typed_flags_set.hxx>
41 #include <formula/FormulaCompiler.hxx>
45 // constants and data types also for external modules (ScInterpreter et al)
47 #define MAXSTRLEN 1024 /* maximum length of input string of one symbol */
49 // flag values of CharTable
50 enum class ScCharFlags
: sal_uInt32
{
54 CharBool
= 0x00000002,
55 CharWord
= 0x00000004,
56 CharValue
= 0x00000008,
57 CharString
= 0x00000010,
58 CharDontCare
= 0x00000020,
63 ValueSep
= 0x00000400,
64 ValueExp
= 0x00000800,
65 ValueSign
= 0x00001000,
66 ValueValue
= 0x00002000,
67 StringSep
= 0x00004000,
68 NameSep
= 0x00008000, // there can be only one! '\''
69 CharIdent
= 0x00010000, // identifier (built-in function) or reference start
70 Ident
= 0x00020000, // identifier or reference continuation
71 OdfLBracket
= 0x00040000, // ODF '[' reference bracket
72 OdfRBracket
= 0x00080000, // ODF ']' reference bracket
73 OdfLabelOp
= 0x00100000, // ODF '!!' automatic intersection of labels
74 OdfNameMarker
= 0x00200000, // ODF '$$' marker that starts a defined (range) name
75 CharName
= 0x00400000, // start character of a defined name
76 Name
= 0x00800000, // continuation character of a defined name
77 CharErrConst
= 0x01000000, // start character of an error constant ('#')
80 template<> struct typed_flags
<ScCharFlags
> : is_typed_flags
<ScCharFlags
, 0x01ffffff> {};
83 #define SC_COMPILER_FILE_TAB_SEP '#' // 'Doc'#Tab
89 struct ScInterpreterContext
;
94 class CompileFormulaContext
;
98 // constants and data types internal to compiler
100 struct ScRawToken final
102 friend class ScCompiler
;
103 // Friends that use a temporary ScRawToken on the stack (and therefore need
104 // the private dtor) and know what they're doing...
105 friend class ScTokenArray
;
107 formula::StackVar eType
; // type of data; this determines how the unions are used
117 formula::ParamClass eInForceArray
;
119 ScComplexRefData aRef
;
122 ScComplexRefData aRef
;
133 ScTableRefToken::Item eItem
;
137 rtl_uString
* mpDataIgnoreCase
;
141 short nJump
[ FORMULA_MAXJUMPCOUNT
+ 1 ]; // If/Chose token
143 OUString maExternalName
; // depending on the opcode, this is either the external, or the external name, or the external table name
145 // coverity[uninit_member] - members deliberately not initialized
148 ~ScRawToken() {} //! only delete via Delete()
150 formula::StackVar
GetType() const { return eType
; }
151 OpCode
GetOpCode() const { return eOp
; }
152 void NewOpCode( OpCode e
) { eOp
= e
; }
154 // Use these methods only on tokens that are not part of a token array,
155 // since the reference count is cleared!
156 void SetOpCode( OpCode eCode
);
157 void SetString( rtl_uString
* pData
, rtl_uString
* pDataIgnoreCase
);
158 void SetSingleReference( const ScSingleRefData
& rRef
);
159 void SetDoubleReference( const ScComplexRefData
& rRef
);
160 void SetDouble( double fVal
);
161 void SetErrorConstant( FormulaError nErr
);
163 // These methods are ok to use, reference count not cleared.
164 void SetName(sal_Int16 nSheet
, sal_uInt16 nIndex
);
165 void SetExternalSingleRef( sal_uInt16 nFileId
, const OUString
& rTabName
, const ScSingleRefData
& rRef
);
166 void SetExternalDoubleRef( sal_uInt16 nFileId
, const OUString
& rTabName
, const ScComplexRefData
& rRef
);
167 void SetExternalName( sal_uInt16 nFileId
, const OUString
& rName
);
168 void SetExternal(const OUString
& rStr
);
170 /** If the token is a non-external reference, determine if the reference is
171 valid. If the token is an external reference, return true. Else return
172 false. Used only in ScCompiler::NextNewToken() to preserve non-existing
173 sheet names in otherwise valid references.
175 bool IsValidReference(const ScDocument
& rDoc
) const;
177 formula::FormulaToken
* CreateToken(ScSheetLimits
& rLimits
) const; // create typified token
180 class SC_DLLPUBLIC ScCompiler final
: public formula::FormulaCompiler
184 enum ExtendedErrorDetection
186 EXTENDED_ERROR_DETECTION_NONE
= 0, // no error on unknown symbols, default (interpreter handles it)
187 EXTENDED_ERROR_DETECTION_NAME_BREAK
, // name error on unknown symbols and break, pCode incomplete
188 EXTENDED_ERROR_DETECTION_NAME_NO_BREAK
// name error on unknown symbols, don't break, continue
191 struct SAL_DLLPRIVATE Convention
193 const formula::FormulaGrammar::AddressConvention meConv
;
195 Convention( formula::FormulaGrammar::AddressConvention eConvP
);
196 virtual ~Convention();
198 virtual void makeRefStr(
199 ScSheetLimits
& rLimits
,
200 OUStringBuffer
& rBuffer
,
201 formula::FormulaGrammar::Grammar eGram
,
202 const ScAddress
& rPos
,
203 const OUString
& rErrRef
, const std::vector
<OUString
>& rTabNames
,
204 const ScComplexRefData
& rRef
, bool bSingleRef
, bool bFromRangeName
) const = 0;
206 virtual css::i18n::ParseResult
207 parseAnyToken( const OUString
& rFormula
,
209 const CharClass
* pCharClass
,
210 bool bGroupSeparator
) const = 0;
213 * Parse the symbol string and pick up the file name and the external
216 * @return true on successful parse, or false otherwise.
218 virtual bool parseExternalName( const OUString
& rSymbol
, OUString
& rFile
, OUString
& rName
,
219 const ScDocument
& rDoc
,
220 const css::uno::Sequence
< css::sheet::ExternalLinkInfo
>* pExternalLinks
) const = 0;
222 virtual OUString
makeExternalNameStr( sal_uInt16 nFileId
, const OUString
& rFile
,
223 const OUString
& rName
) const = 0;
225 virtual void makeExternalRefStr(
226 ScSheetLimits
& rLimits
,
227 OUStringBuffer
& rBuffer
, const ScAddress
& rPos
, sal_uInt16 nFileId
, const OUString
& rFileName
,
228 const OUString
& rTabName
, const ScSingleRefData
& rRef
) const = 0;
230 virtual void makeExternalRefStr(
231 ScSheetLimits
& rLimits
,
232 OUStringBuffer
& rBuffer
, const ScAddress
& rPos
,
233 sal_uInt16 nFileId
, const OUString
& rFileName
, const std::vector
<OUString
>& rTabNames
,
234 const OUString
& rTabName
, const ScComplexRefData
& rRef
) const = 0;
236 enum SpecialSymbolType
239 * Character between sheet name and address. In OOO A1 this is
240 * '.', while XL A1 and XL R1C1 this is '!'.
245 * In OOO A1, a sheet name may be prefixed with '$' to indicate an
246 * absolute sheet position.
250 virtual sal_Unicode
getSpecialSymbol( SpecialSymbolType eSymType
) const = 0;
252 virtual ScCharFlags
getCharTableFlags( sal_Unicode c
, sal_Unicode cLast
) const = 0;
255 std::unique_ptr
<ScCharFlags
[]> mpCharTable
;
257 friend struct Convention
;
261 static osl::Mutex maMutex
;
262 static const CharClass
*pCharClassEnglish
; // character classification for en_US locale
263 static const CharClass
*pCharClassLocalized
; // character classification for UI locale
264 static const Convention
*pConventions
[ formula::FormulaGrammar::CONV_LAST
];
266 static const struct AddInMap
269 const char* pEnglish
;
270 const char* pOriginal
; // programmatical name
271 const char* pUpper
; // upper case programmatical name
273 static size_t GetAddInMapCount();
278 SvNumberFormatter
* mpFormatter
;
279 const ScInterpreterContext
* mpInterpreterContext
;
281 SCTAB mnCurrentSheetTab
; // indicates current sheet number parsed so far
282 sal_Int32 mnCurrentSheetEndPos
; // position after current sheet name if parsed
284 // For CONV_XL_OOX, may be set via API by MOOXML filter.
285 css::uno::Sequence
<css::sheet::ExternalLinkInfo
> maExternalLinks
;
287 sal_Unicode cSymbol
[MAXSTRLEN
+1]; // current Symbol + 0
288 OUString aFormula
; // formula source code
289 sal_Int32 nSrcPos
; // tokenizer position (source code)
290 ScRawToken maRawToken
;
292 std::queue
<OpCode
> maPendingOpCodes
; // additional opcodes generated from a single symbol
294 const CharClass
* pCharClass
; // which character classification is used for parseAnyToken and upper/lower
295 bool mbCharClassesDiffer
; // whether pCharClass and current system locale's CharClass differ
296 sal_uInt16 mnPredetectedReference
; // reference when reading ODF, 0 (none), 1 (single) or 2 (double)
297 sal_Int32 mnRangeOpPosInSymbol
; // if and where a range operator is in symbol
298 const Convention
*pConv
;
299 ExtendedErrorDetection meExtendedErrorDetection
;
300 bool mbCloseBrackets
; // whether to close open brackets automatically, default TRUE
301 bool mbRewind
; // whether symbol is to be rewound to some step during lexical analysis
302 bool mbRefConventionChartOOXML
; // whether to use special ooxml chart syntax in case of OOXML reference convention,
303 // when parsing a formula string. [0]!GlobalNamedRange, LocalSheet!LocalNamedRange
304 std::vector
<sal_uInt16
> maExternalFiles
;
306 std::vector
<OUString
> maTabNames
; /// sheet names mangled for the current grammar for output
307 std::vector
<OUString
> &GetSetupTabNames() const; /// get or setup tab names for the current grammar
313 TableRefEntry( formula::FormulaToken
* p
) : mxToken(p
), mnLevel(0) {}
315 std::vector
<TableRefEntry
> maTableRefs
; /// "stack" of currently active ocTableRef tokens
317 // Optimizing implicit intersection is done only at the end of code generation, because the usage context may
318 // be important. Store candidate parameters and the operation they are the argument for.
319 struct PendingImplicitIntersectionOptimization
321 PendingImplicitIntersectionOptimization(formula::FormulaToken
** p
, formula::FormulaToken
* o
)
322 : parameterLocation( p
), parameter( *p
), operation( o
) {}
323 formula::FormulaToken
** parameterLocation
;
324 formula::FormulaTokenRef parameter
;
325 formula::FormulaTokenRef operation
;
327 std::vector
< PendingImplicitIntersectionOptimization
> mPendingImplicitIntersectionOptimizations
;
328 std::unordered_set
<formula::FormulaTokenRef
> mUnhandledPossibleImplicitIntersections
;
330 std::set
<OpCode
> mUnhandledPossibleImplicitIntersectionsOpCodes
;
333 bool NextNewToken(bool bInArray
);
334 bool ToUpperAsciiOrI18nIsAscii( OUString
& rUpper
, const OUString
& rOrg
) const;
336 virtual void SetError(FormulaError nError
) override
;
338 struct Whitespace final
343 Whitespace() : nCount(0), cChar(0x20) {}
344 void reset( sal_Unicode c
) { nCount
= 0; cChar
= c
; }
347 static void addWhitespace( std::vector
<ScCompiler::Whitespace
> & rvSpaces
,
348 ScCompiler::Whitespace
& rSpace
, sal_Unicode c
, sal_Int32 n
= 1 );
350 std::vector
<Whitespace
> NextSymbol(bool bInArray
);
352 bool ParseValue( const OUString
& );
353 bool ParseOpCode( const OUString
&, bool bInArray
);
354 bool ParseOpCode2( std::u16string_view
);
356 bool ParseReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
357 bool ParseSingleReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
358 bool ParseDoubleReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
359 bool ParsePredetectedReference( const OUString
& rSymbol
);
360 bool ParsePredetectedErrRefReference( const OUString
& rName
, const OUString
* pErrRef
);
361 bool ParseMacro( const OUString
& );
362 bool ParseNamedRange( const OUString
&, bool onlyCheck
= false );
363 bool ParseExternalNamedRange( const OUString
& rSymbol
, bool& rbInvalidExternalNameRange
);
364 bool ParseDBRange( const OUString
& );
365 bool ParseColRowName( const OUString
& );
366 bool ParseBoolean( const OUString
& );
367 void AutoCorrectParsedSymbol();
368 const ScRangeData
* GetRangeData( SCTAB
& rSheet
, const OUString
& rUpperName
) const;
370 void AdjustSheetLocalNameRelReferences( SCTAB nDelta
);
371 void SetRelNameReference();
373 /** Obtain range data for ocName token, global or sheet local.
375 Prerequisite: rToken is a FormulaIndexToken so IsGlobal() and
376 GetIndex() can be called on it. We don't check with RTTI.
378 ScRangeData
* GetRangeData( const formula::FormulaToken
& pToken
) const;
380 bool HasPossibleNamedRangeConflict(SCTAB nTab
) const;
383 static const CharClass
* GetCharClassLocalized();
384 static const CharClass
* GetCharClassEnglish();
387 ScCompiler( sc::CompileFormulaContext
& rCxt
, const ScAddress
& rPos
,
388 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
390 /** If eGrammar == GRAM_UNSPECIFIED then the grammar of rDocument is used,
392 ScCompiler( ScDocument
& rDocument
, const ScAddress
&,
393 formula::FormulaGrammar::Grammar eGrammar
= formula::FormulaGrammar::GRAM_UNSPECIFIED
,
394 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
396 ScCompiler( sc::CompileFormulaContext
& rCxt
, const ScAddress
& rPos
, ScTokenArray
& rArr
,
397 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
399 /** If eGrammar == GRAM_UNSPECIFIED then the grammar of rDocument is used,
401 ScCompiler( ScDocument
& rDocument
, const ScAddress
&, ScTokenArray
& rArr
,
402 formula::FormulaGrammar::Grammar eGrammar
= formula::FormulaGrammar::GRAM_UNSPECIFIED
,
403 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
405 virtual ~ScCompiler() override
;
408 static void DeInit(); /// all
410 // for ScAddress::Format()
411 static void CheckTabQuotes( OUString
& aTabName
,
412 const formula::FormulaGrammar::AddressConvention eConv
= formula::FormulaGrammar::CONV_OOO
);
414 /** Analyzes a string for a 'Doc'#Tab construct, or 'Do''c'#Tab etc...
416 @returns the position of the unquoted # hash mark in 'Doc'#Tab, or
418 static sal_Int32
GetDocTabPos( const OUString
& rString
);
420 // Check if it is a valid english function name
421 static bool IsEnglishSymbol( const OUString
& rName
);
423 bool ParseErrorConstant( const OUString
& );
424 bool ParseTableRefItem( const OUString
& );
425 bool ParseTableRefColumn( const OUString
& );
427 /** Calls GetToken() if PeekNextNoSpaces() is of given OpCode. */
428 bool GetTokenIfOpCode( OpCode eOp
);
431 * When auto correction is set, the jump command reorder must be enabled.
433 void SetAutoCorrection( bool bVal
);
434 void SetCloseBrackets( bool bVal
) { mbCloseBrackets
= bVal
; }
435 void SetRefConventionChartOOXML( bool bVal
) { mbRefConventionChartOOXML
= bVal
; }
436 void SetRefConvention( const Convention
*pConvP
);
437 void SetRefConvention( const formula::FormulaGrammar::AddressConvention eConv
);
439 static const Convention
* GetRefConvention( formula::FormulaGrammar::AddressConvention eConv
);
441 /** Overwrite FormulaCompiler::GetOpCodeMap() forwarding to
444 OpCodeMapPtr
GetOpCodeMap( const sal_Int32 nLanguage
) const { return GetFinalOpCodeMap(nLanguage
); }
446 /// Set symbol map if not empty.
447 void SetFormulaLanguage( const OpCodeMapPtr
& xMap
);
449 void SetGrammar( const formula::FormulaGrammar::Grammar eGrammar
);
451 void SetNumberFormatter( SvNumberFormatter
* pFormatter
);
454 /** Set grammar and reference convention from within SetFormulaLanguage()
458 The new grammar to be set and the associated reference convention.
461 The previous grammar that was active before SetFormulaLanguage().
463 void SetGrammarAndRefConvention(
464 const formula::FormulaGrammar::Grammar eNewGrammar
,
465 const formula::FormulaGrammar::Grammar eOldGrammar
);
468 /// Set external link info for ScAddress::CONV_XL_OOX.
469 void SetExternalLinks(
470 const css::uno::Sequence
<
471 css::sheet::ExternalLinkInfo
>& rLinks
)
473 maExternalLinks
= rLinks
;
476 void CreateStringFromXMLTokenArray( OUString
& rFormula
, OUString
& rFormulaNmsp
);
478 void SetExtendedErrorDetection( ExtendedErrorDetection eVal
) { meExtendedErrorDetection
= eVal
; }
480 bool IsCorrected() const { return bCorrected
; }
481 const OUString
& GetCorrectedFormula() const { return aCorrectedFormula
; }
484 * Tokenize formula expression string into an array of tokens.
486 * @param rFormula formula expression to tokenize.
488 * @return heap allocated token array object. The caller <i>must</i>
489 * manage the life cycle of this object.
491 std::unique_ptr
<ScTokenArray
> CompileString( const OUString
& rFormula
);
492 std::unique_ptr
<ScTokenArray
> CompileString( const OUString
& rFormula
, const OUString
& rFormulaNmsp
);
493 const ScAddress
& GetPos() const { return aPos
; }
496 static void MoveRelWrap( const ScTokenArray
& rArr
, const ScDocument
& rDoc
, const ScAddress
& rPos
,
497 SCCOL nMaxCol
, SCROW nMaxRow
);
499 /** If the character is allowed as tested by nFlags (SC_COMPILER_C_...
500 bits) for all known address conventions. If more than one bit is given
501 in nFlags, all bits must match. */
502 static bool IsCharFlagAllConventions(
503 OUString
const & rStr
, sal_Int32 nPos
, ScCharFlags nFlags
);
505 /** TODO : Move this to somewhere appropriate. */
506 static bool DoubleRefToPosSingleRefScalarCase(const ScRange
& rRange
, ScAddress
& rAdr
,
507 const ScAddress
& rFormulaPos
);
509 bool HasUnhandledPossibleImplicitIntersections() const { return !mUnhandledPossibleImplicitIntersections
.empty(); }
511 const std::set
<OpCode
>& UnhandledPossibleImplicitIntersectionsOpCodes() { return mUnhandledPossibleImplicitIntersectionsOpCodes
; }
516 virtual OUString
FindAddInFunction( const OUString
& rUpperName
, bool bLocalFirst
) const override
;
517 virtual void fillFromAddInCollectionUpperName( const NonConstOpCodeMapPtr
& xMap
) const override
;
518 virtual void fillFromAddInCollectionEnglishName( const NonConstOpCodeMapPtr
& xMap
) const override
;
519 virtual void fillFromAddInMap( const NonConstOpCodeMapPtr
& xMap
, formula::FormulaGrammar::Grammar _eGrammar
) const override
;
520 virtual void fillAddInToken(::std::vector
< css::sheet::FormulaOpCodeMapEntry
>& _rVec
,bool _bIsEnglish
) const override
;
522 virtual bool HandleExternalReference(const formula::FormulaToken
& _aToken
) override
;
523 virtual bool HandleRange() override
;
524 virtual bool HandleColRowName() override
;
525 virtual bool HandleDbData() override
;
526 virtual bool HandleTableRef() override
;
528 virtual formula::FormulaTokenRef
ExtendRangeReference( formula::FormulaToken
& rTok1
, formula::FormulaToken
& rTok2
) override
;
529 virtual void CreateStringFromExternal( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
530 virtual void CreateStringFromSingleRef( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
531 virtual void CreateStringFromDoubleRef( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
532 virtual void CreateStringFromMatrix( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
533 virtual void CreateStringFromIndex( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
534 virtual void LocalizeString( OUString
& rName
) const override
; // modify rName - input: exact name
536 virtual formula::ParamClass
GetForceArrayParameter( const formula::FormulaToken
* pToken
, sal_uInt16 nParam
) const override
;
538 /// Access the CharTable flags
539 ScCharFlags
GetCharTableFlags( sal_Unicode c
, sal_Unicode cLast
)
540 { return c
< 128 ? pConv
->getCharTableFlags(c
, cLast
) : ScCharFlags::NONE
; }
542 virtual void HandleIIOpCode(formula::FormulaToken
* token
, formula::FormulaToken
*** pppToken
, sal_uInt8 nNumParams
) override
;
543 bool HandleIIOpCodeInternal(formula::FormulaToken
* token
, formula::FormulaToken
*** pppToken
, sal_uInt8 nNumParams
);
544 bool SkipImplicitIntersectionOptimization(const formula::FormulaToken
* token
) const;
545 virtual void PostProcessCode() override
;
546 virtual void AnnotateOperands() override
;
547 static bool ParameterMayBeImplicitIntersection(const formula::FormulaToken
* token
, int parameter
);
548 void ReplaceDoubleRefII(formula::FormulaToken
** ppDoubleRefTok
);
549 bool AdjustSumRangeShape(const ScComplexRefData
& rBaseRange
, ScComplexRefData
& rSumRange
);
550 void CorrectSumRange(const ScComplexRefData
& rBaseRange
, ScComplexRefData
& rSumRange
, formula::FormulaToken
** ppSumRangeToken
);
551 void AnnotateTrimOnDoubleRefs();
554 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */