1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
26 #include "refdata.hxx"
28 #include <formula/token.hxx>
29 #include <formula/grammar.hxx>
30 #include <rtl/ustrbuf.hxx>
31 #include <com/sun/star/sheet/ExternalLinkInfo.hpp>
32 #include <com/sun/star/i18n/ParseResult.hpp>
36 #include <unordered_set>
38 #include <com/sun/star/uno/Sequence.hxx>
39 #include <o3tl/typed_flags_set.hxx>
41 #include <formula/FormulaCompiler.hxx>
45 // constants and data types also for external modules (ScInterpreter et al)
47 #define MAXSTRLEN 1024 /* maximum length of input string of one symbol */
49 // flag values of CharTable
50 enum class ScCharFlags
: sal_uInt32
{
54 CharBool
= 0x00000002,
55 CharWord
= 0x00000004,
56 CharValue
= 0x00000008,
57 CharString
= 0x00000010,
58 CharDontCare
= 0x00000020,
63 ValueSep
= 0x00000400,
64 ValueExp
= 0x00000800,
65 ValueSign
= 0x00001000,
66 ValueValue
= 0x00002000,
67 StringSep
= 0x00004000,
68 NameSep
= 0x00008000, // there can be only one! '\''
69 CharIdent
= 0x00010000, // identifier (built-in function) or reference start
70 Ident
= 0x00020000, // identifier or reference continuation
71 OdfLBracket
= 0x00040000, // ODF '[' reference bracket
72 OdfRBracket
= 0x00080000, // ODF ']' reference bracket
73 OdfLabelOp
= 0x00100000, // ODF '!!' automatic intersection of labels
74 OdfNameMarker
= 0x00200000, // ODF '$$' marker that starts a defined (range) name
75 CharName
= 0x00400000, // start character of a defined name
76 Name
= 0x00800000, // continuation character of a defined name
77 CharErrConst
= 0x01000000, // start character of an error constant ('#')
80 template<> struct typed_flags
<ScCharFlags
> : is_typed_flags
<ScCharFlags
, 0x01ffffff> {};
83 #define SC_COMPILER_FILE_TAB_SEP '#' // 'Doc'#Tab
89 struct ScInterpreterContext
;
94 class CompileFormulaContext
;
98 // constants and data types internal to compiler
100 struct ScRawToken final
102 friend class ScCompiler
;
103 // Friends that use a temporary ScRawToken on the stack (and therefore need
104 // the private dtor) and know what they're doing...
105 friend class ScTokenArray
;
107 formula::StackVar eType
; // type of data; this determines how the unions are used
117 formula::ParamClass eInForceArray
;
119 ScComplexRefData aRef
;
122 ScComplexRefData aRef
;
133 ScTableRefToken::Item eItem
;
137 rtl_uString
* mpDataIgnoreCase
;
141 short nJump
[ FORMULA_MAXJUMPCOUNT
+ 1 ]; // If/Chose token
143 OUString maExternalName
; // depending on the opcode, this is either the external, or the external name, or the external table name
145 // coverity[uninit_member] - members deliberately not initialized
148 ~ScRawToken() {} //! only delete via Delete()
150 formula::StackVar
GetType() const { return eType
; }
151 OpCode
GetOpCode() const { return eOp
; }
152 void NewOpCode( OpCode e
) { eOp
= e
; }
154 // Use these methods only on tokens that are not part of a token array,
155 // since the reference count is cleared!
156 void SetOpCode( OpCode eCode
);
157 void SetString( rtl_uString
* pData
, rtl_uString
* pDataIgnoreCase
);
158 void SetSingleReference( const ScSingleRefData
& rRef
);
159 void SetDoubleReference( const ScComplexRefData
& rRef
);
160 void SetDouble( double fVal
);
161 void SetErrorConstant( FormulaError nErr
);
163 // These methods are ok to use, reference count not cleared.
164 void SetName(sal_Int16 nSheet
, sal_uInt16 nIndex
);
165 void SetExternalSingleRef( sal_uInt16 nFileId
, const OUString
& rTabName
, const ScSingleRefData
& rRef
);
166 void SetExternalDoubleRef( sal_uInt16 nFileId
, const OUString
& rTabName
, const ScComplexRefData
& rRef
);
167 void SetExternalName( sal_uInt16 nFileId
, const OUString
& rName
);
168 void SetExternal(const OUString
& rStr
);
170 /** If the token is a non-external reference, determine if the reference is
171 valid. If the token is an external reference, return true. Else return
172 false. Used only in ScCompiler::NextNewToken() to preserve non-existing
173 sheet names in otherwise valid references.
175 bool IsValidReference(const ScDocument
& rDoc
) const;
177 formula::FormulaToken
* CreateToken(ScSheetLimits
& rLimits
) const; // create typified token
180 class SC_DLLPUBLIC ScCompiler final
: public formula::FormulaCompiler
184 enum ExtendedErrorDetection
186 EXTENDED_ERROR_DETECTION_NONE
= 0, // no error on unknown symbols, default (interpreter handles it)
187 EXTENDED_ERROR_DETECTION_NAME_BREAK
, // name error on unknown symbols and break, pCode incomplete
188 EXTENDED_ERROR_DETECTION_NAME_NO_BREAK
// name error on unknown symbols, don't break, continue
191 struct SAL_DLLPRIVATE Convention
193 const formula::FormulaGrammar::AddressConvention meConv
;
195 Convention( formula::FormulaGrammar::AddressConvention eConvP
);
196 virtual ~Convention();
198 virtual void makeRefStr(
199 ScSheetLimits
& rLimits
,
200 OUStringBuffer
& rBuffer
,
201 formula::FormulaGrammar::Grammar eGram
,
202 const ScAddress
& rPos
,
203 const OUString
& rErrRef
, const std::vector
<OUString
>& rTabNames
,
204 const ScComplexRefData
& rRef
, bool bSingleRef
, bool bFromRangeName
) const = 0;
206 virtual css::i18n::ParseResult
207 parseAnyToken( const OUString
& rFormula
,
209 const CharClass
* pCharClass
,
210 bool bGroupSeparator
) const = 0;
213 * Parse the symbol string and pick up the file name and the external
216 * @return true on successful parse, or false otherwise.
218 virtual bool parseExternalName( const OUString
& rSymbol
, OUString
& rFile
, OUString
& rName
,
219 const ScDocument
& rDoc
,
220 const css::uno::Sequence
< css::sheet::ExternalLinkInfo
>* pExternalLinks
) const = 0;
222 virtual OUString
makeExternalNameStr( sal_uInt16 nFileId
, const OUString
& rFile
,
223 const OUString
& rName
) const = 0;
225 virtual void makeExternalRefStr(
226 ScSheetLimits
& rLimits
,
227 OUStringBuffer
& rBuffer
, const ScAddress
& rPos
, sal_uInt16 nFileId
, const OUString
& rFileName
,
228 const OUString
& rTabName
, const ScSingleRefData
& rRef
) const = 0;
230 virtual void makeExternalRefStr(
231 ScSheetLimits
& rLimits
,
232 OUStringBuffer
& rBuffer
, const ScAddress
& rPos
,
233 sal_uInt16 nFileId
, const OUString
& rFileName
, const std::vector
<OUString
>& rTabNames
,
234 const OUString
& rTabName
, const ScComplexRefData
& rRef
) const = 0;
236 enum SpecialSymbolType
239 * Character between sheet name and address. In OOO A1 this is
240 * '.', while XL A1 and XL R1C1 this is '!'.
245 * In OOO A1, a sheet name may be prefixed with '$' to indicate an
246 * absolute sheet position.
250 virtual sal_Unicode
getSpecialSymbol( SpecialSymbolType eSymType
) const = 0;
252 virtual ScCharFlags
getCharTableFlags( sal_Unicode c
, sal_Unicode cLast
) const = 0;
255 std::unique_ptr
<ScCharFlags
[]> mpCharTable
;
257 friend struct Convention
;
261 static const CharClass
*pCharClassEnglish
; // character classification for en_US locale
262 static const CharClass
*pCharClassLocalized
; // character classification for UI locale
263 static const Convention
*pConventions
[ formula::FormulaGrammar::CONV_LAST
];
265 static const struct AddInMap
268 const char* pEnglish
;
269 const char* pOriginal
; // programmatical name
270 const char* pUpper
; // upper case programmatical name
272 static size_t GetAddInMapCount();
277 SvNumberFormatter
* mpFormatter
;
278 const ScInterpreterContext
* mpInterpreterContext
;
280 SCTAB mnCurrentSheetTab
; // indicates current sheet number parsed so far
281 sal_Int32 mnCurrentSheetEndPos
; // position after current sheet name if parsed
283 // For CONV_XL_OOX, may be set via API by MOOXML filter.
284 css::uno::Sequence
<css::sheet::ExternalLinkInfo
> maExternalLinks
;
286 sal_Unicode cSymbol
[MAXSTRLEN
+1]; // current Symbol + 0
287 OUString aFormula
; // formula source code
288 sal_Int32 nSrcPos
; // tokenizer position (source code)
289 ScRawToken maRawToken
;
291 std::queue
<OpCode
> maPendingOpCodes
; // additional opcodes generated from a single symbol
293 const CharClass
* pCharClass
; // which character classification is used for parseAnyToken and upper/lower
294 bool mbCharClassesDiffer
; // whether pCharClass and current system locale's CharClass differ
295 sal_uInt16 mnPredetectedReference
; // reference when reading ODF, 0 (none), 1 (single) or 2 (double)
296 sal_Int32 mnRangeOpPosInSymbol
; // if and where a range operator is in symbol
297 const Convention
*pConv
;
298 ExtendedErrorDetection meExtendedErrorDetection
;
299 bool mbCloseBrackets
; // whether to close open brackets automatically, default TRUE
300 bool mbRewind
; // whether symbol is to be rewound to some step during lexical analysis
301 bool mbRefConventionChartOOXML
; // whether to use special ooxml chart syntax in case of OOXML reference convention,
302 // when parsing a formula string. [0]!GlobalNamedRange, LocalSheet!LocalNamedRange
303 std::vector
<sal_uInt16
> maExternalFiles
;
305 std::vector
<OUString
> maTabNames
; /// sheet names mangled for the current grammar for output
306 std::vector
<OUString
> &GetSetupTabNames() const; /// get or setup tab names for the current grammar
312 TableRefEntry( formula::FormulaToken
* p
) : mxToken(p
), mnLevel(0) {}
314 std::vector
<TableRefEntry
> maTableRefs
; /// "stack" of currently active ocTableRef tokens
316 // Optimizing implicit intersection is done only at the end of code generation, because the usage context may
317 // be important. Store candidate parameters and the operation they are the argument for.
318 struct PendingImplicitIntersectionOptimization
320 PendingImplicitIntersectionOptimization(formula::FormulaToken
** p
, formula::FormulaToken
* o
)
321 : parameterLocation( p
), parameter( *p
), operation( o
) {}
322 formula::FormulaToken
** parameterLocation
;
323 formula::FormulaTokenRef parameter
;
324 formula::FormulaTokenRef operation
;
326 std::vector
< PendingImplicitIntersectionOptimization
> mPendingImplicitIntersectionOptimizations
;
327 std::unordered_set
<formula::FormulaTokenRef
> mUnhandledPossibleImplicitIntersections
;
329 std::set
<OpCode
> mUnhandledPossibleImplicitIntersectionsOpCodes
;
332 bool NextNewToken(bool bInArray
);
333 bool ToUpperAsciiOrI18nIsAscii( OUString
& rUpper
, const OUString
& rOrg
) const;
335 virtual void SetError(FormulaError nError
) override
;
337 struct Whitespace final
342 Whitespace() : nCount(0), cChar(0x20) {}
343 void reset( sal_Unicode c
) { nCount
= 0; cChar
= c
; }
346 static void addWhitespace( std::vector
<ScCompiler::Whitespace
> & rvSpaces
,
347 ScCompiler::Whitespace
& rSpace
, sal_Unicode c
, sal_Int32 n
= 1 );
349 std::vector
<Whitespace
> NextSymbol(bool bInArray
);
351 bool ParseValue( const OUString
& );
352 bool ParseOpCode( const OUString
&, bool bInArray
);
353 bool ParseOpCode2( const OUString
& );
355 bool ParseReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
356 bool ParseSingleReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
357 bool ParseDoubleReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
358 bool ParsePredetectedReference( const OUString
& rSymbol
);
359 bool ParsePredetectedErrRefReference( const OUString
& rName
, const OUString
* pErrRef
);
360 bool ParseMacro( const OUString
& );
361 bool ParseNamedRange( const OUString
&, bool onlyCheck
= false );
362 bool ParseExternalNamedRange( const OUString
& rSymbol
, bool& rbInvalidExternalNameRange
);
363 bool ParseDBRange( const OUString
& );
364 bool ParseColRowName( const OUString
& );
365 bool ParseBoolean( const OUString
& );
366 void AutoCorrectParsedSymbol();
367 const ScRangeData
* GetRangeData( SCTAB
& rSheet
, const OUString
& rUpperName
) const;
369 void AdjustSheetLocalNameRelReferences( SCTAB nDelta
);
370 void SetRelNameReference();
372 /** Obtain range data for ocName token, global or sheet local.
374 Prerequisite: rToken is a FormulaIndexToken so IsGlobal() and
375 GetIndex() can be called on it. We don't check with RTTI.
377 ScRangeData
* GetRangeData( const formula::FormulaToken
& pToken
) const;
379 bool HasPossibleNamedRangeConflict(SCTAB nTab
) const;
381 static const CharClass
* GetCharClassEnglish();
382 static const CharClass
* GetCharClassLocalized();
385 ScCompiler( sc::CompileFormulaContext
& rCxt
, const ScAddress
& rPos
,
386 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
388 /** If eGrammar == GRAM_UNSPECIFIED then the grammar of rDocument is used,
390 ScCompiler( ScDocument
& rDocument
, const ScAddress
&,
391 formula::FormulaGrammar::Grammar eGrammar
= formula::FormulaGrammar::GRAM_UNSPECIFIED
,
392 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
394 ScCompiler( sc::CompileFormulaContext
& rCxt
, const ScAddress
& rPos
, ScTokenArray
& rArr
,
395 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
397 /** If eGrammar == GRAM_UNSPECIFIED then the grammar of rDocument is used,
399 ScCompiler( ScDocument
& rDocument
, const ScAddress
&, ScTokenArray
& rArr
,
400 formula::FormulaGrammar::Grammar eGrammar
= formula::FormulaGrammar::GRAM_UNSPECIFIED
,
401 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
403 virtual ~ScCompiler() override
;
406 static void DeInit(); /// all
408 // for ScAddress::Format()
409 static void CheckTabQuotes( OUString
& aTabName
,
410 const formula::FormulaGrammar::AddressConvention eConv
= formula::FormulaGrammar::CONV_OOO
);
412 /** Analyzes a string for a 'Doc'#Tab construct, or 'Do''c'#Tab etc...
414 @returns the position of the unquoted # hash mark in 'Doc'#Tab, or
416 static sal_Int32
GetDocTabPos( const OUString
& rString
);
418 static bool EnQuote( OUString
& rStr
);
420 // Check if it is a valid english function name
421 static bool IsEnglishSymbol( const OUString
& rName
);
423 bool ParseErrorConstant( const OUString
& );
424 bool ParseTableRefItem( const OUString
& );
425 bool ParseTableRefColumn( const OUString
& );
427 /** Calls GetToken() if PeekNextNoSpaces() is of given OpCode. */
428 bool GetTokenIfOpCode( OpCode eOp
);
431 * When auto correction is set, the jump command reorder must be enabled.
433 void SetAutoCorrection( bool bVal
);
434 void SetCloseBrackets( bool bVal
) { mbCloseBrackets
= bVal
; }
435 void SetRefConventionChartOOXML( bool bVal
) { mbRefConventionChartOOXML
= bVal
; }
436 void SetRefConvention( const Convention
*pConvP
);
437 void SetRefConvention( const formula::FormulaGrammar::AddressConvention eConv
);
439 static const Convention
* GetRefConvention( formula::FormulaGrammar::AddressConvention eConv
);
441 /// Set symbol map if not empty.
442 void SetFormulaLanguage( const OpCodeMapPtr
& xMap
);
444 void SetGrammar( const formula::FormulaGrammar::Grammar eGrammar
);
446 void SetNumberFormatter( SvNumberFormatter
* pFormatter
);
449 /** Set grammar and reference convention from within SetFormulaLanguage()
453 The new grammar to be set and the associated reference convention.
456 The previous grammar that was active before SetFormulaLanguage().
458 void SetGrammarAndRefConvention(
459 const formula::FormulaGrammar::Grammar eNewGrammar
,
460 const formula::FormulaGrammar::Grammar eOldGrammar
);
463 /// Set external link info for ScAddress::CONV_XL_OOX.
464 void SetExternalLinks(
465 const css::uno::Sequence
<
466 css::sheet::ExternalLinkInfo
>& rLinks
)
468 maExternalLinks
= rLinks
;
471 void CreateStringFromXMLTokenArray( OUString
& rFormula
, OUString
& rFormulaNmsp
);
473 void SetExtendedErrorDetection( ExtendedErrorDetection eVal
) { meExtendedErrorDetection
= eVal
; }
475 bool IsCorrected() const { return bCorrected
; }
476 const OUString
& GetCorrectedFormula() const { return aCorrectedFormula
; }
479 * Tokenize formula expression string into an array of tokens.
481 * @param rFormula formula expression to tokenize.
483 * @return heap allocated token array object. The caller <i>must</i>
484 * manage the life cycle of this object.
486 std::unique_ptr
<ScTokenArray
> CompileString( const OUString
& rFormula
);
487 std::unique_ptr
<ScTokenArray
> CompileString( const OUString
& rFormula
, const OUString
& rFormulaNmsp
);
488 const ScAddress
& GetPos() const { return aPos
; }
491 static void MoveRelWrap( const ScTokenArray
& rArr
, const ScDocument
& rDoc
, const ScAddress
& rPos
,
492 SCCOL nMaxCol
, SCROW nMaxRow
);
494 /** If the character is allowed as tested by nFlags (SC_COMPILER_C_...
495 bits) for all known address conventions. If more than one bit is given
496 in nFlags, all bits must match. */
497 static bool IsCharFlagAllConventions(
498 OUString
const & rStr
, sal_Int32 nPos
, ScCharFlags nFlags
);
500 /** TODO : Move this to somewhere appropriate. */
501 static bool DoubleRefToPosSingleRefScalarCase(const ScRange
& rRange
, ScAddress
& rAdr
,
502 const ScAddress
& rFormulaPos
);
504 bool HasUnhandledPossibleImplicitIntersections() const { return !mUnhandledPossibleImplicitIntersections
.empty(); }
506 const std::set
<OpCode
>& UnhandledPossibleImplicitIntersectionsOpCodes() { return mUnhandledPossibleImplicitIntersectionsOpCodes
; }
511 virtual OUString
FindAddInFunction( const OUString
& rUpperName
, bool bLocalFirst
) const override
;
512 virtual void fillFromAddInCollectionUpperName( const NonConstOpCodeMapPtr
& xMap
) const override
;
513 virtual void fillFromAddInCollectionEnglishName( const NonConstOpCodeMapPtr
& xMap
) const override
;
514 virtual void fillFromAddInMap( const NonConstOpCodeMapPtr
& xMap
, formula::FormulaGrammar::Grammar _eGrammar
) const override
;
515 virtual void fillAddInToken(::std::vector
< css::sheet::FormulaOpCodeMapEntry
>& _rVec
,bool _bIsEnglish
) const override
;
517 virtual bool HandleExternalReference(const formula::FormulaToken
& _aToken
) override
;
518 virtual bool HandleRange() override
;
519 virtual bool HandleColRowName() override
;
520 virtual bool HandleDbData() override
;
521 virtual bool HandleTableRef() override
;
523 virtual formula::FormulaTokenRef
ExtendRangeReference( formula::FormulaToken
& rTok1
, formula::FormulaToken
& rTok2
) override
;
524 virtual void CreateStringFromExternal( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
525 virtual void CreateStringFromSingleRef( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
526 virtual void CreateStringFromDoubleRef( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
527 virtual void CreateStringFromMatrix( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
528 virtual void CreateStringFromIndex( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
529 virtual void LocalizeString( OUString
& rName
) const override
; // modify rName - input: exact name
531 virtual formula::ParamClass
GetForceArrayParameter( const formula::FormulaToken
* pToken
, sal_uInt16 nParam
) const override
;
533 /// Access the CharTable flags
534 ScCharFlags
GetCharTableFlags( sal_Unicode c
, sal_Unicode cLast
)
535 { return c
< 128 ? pConv
->getCharTableFlags(c
, cLast
) : ScCharFlags::NONE
; }
537 virtual void HandleIIOpCode(formula::FormulaToken
* token
, formula::FormulaToken
*** pppToken
, sal_uInt8 nNumParams
) override
;
538 bool HandleIIOpCodeInternal(formula::FormulaToken
* token
, formula::FormulaToken
*** pppToken
, sal_uInt8 nNumParams
);
539 bool SkipImplicitIntersectionOptimization(const formula::FormulaToken
* token
) const;
540 virtual void PostProcessCode() override
;
541 virtual void AnnotateOperands() override
;
542 static bool ParameterMayBeImplicitIntersection(const formula::FormulaToken
* token
, int parameter
);
543 void ReplaceDoubleRefII(formula::FormulaToken
** ppDoubleRefTok
);
544 bool AdjustSumRangeShape(const ScComplexRefData
& rBaseRange
, ScComplexRefData
& rSumRange
);
545 void CorrectSumRange(const ScComplexRefData
& rBaseRange
, ScComplexRefData
& rSumRange
, formula::FormulaToken
** ppSumRangeToken
);
546 void AnnotateTrimOnDoubleRefs();
549 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */