1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
26 #include "refdata.hxx"
28 #include <formula/token.hxx>
29 #include <formula/grammar.hxx>
30 #include <rtl/ustrbuf.hxx>
31 #include <com/sun/star/sheet/ExternalLinkInfo.hpp>
32 #include <com/sun/star/i18n/ParseResult.hpp>
36 #include <unordered_set>
38 #include <com/sun/star/uno/Sequence.hxx>
39 #include <o3tl/typed_flags_set.hxx>
41 #include <formula/FormulaCompiler.hxx>
45 // constants and data types also for external modules (ScInterpreter et al)
47 #define MAXSTRLEN 1024 /* maximum length of input string of one symbol */
49 // flag values of CharTable
50 enum class ScCharFlags
: sal_uInt32
{
54 CharBool
= 0x00000002,
55 CharWord
= 0x00000004,
56 CharValue
= 0x00000008,
57 CharString
= 0x00000010,
58 CharDontCare
= 0x00000020,
63 ValueSep
= 0x00000400,
64 ValueExp
= 0x00000800,
65 ValueSign
= 0x00001000,
66 ValueValue
= 0x00002000,
67 StringSep
= 0x00004000,
68 NameSep
= 0x00008000, // there can be only one! '\''
69 CharIdent
= 0x00010000, // identifier (built-in function) or reference start
70 Ident
= 0x00020000, // identifier or reference continuation
71 OdfLBracket
= 0x00040000, // ODF '[' reference bracket
72 OdfRBracket
= 0x00080000, // ODF ']' reference bracket
73 OdfLabelOp
= 0x00100000, // ODF '!!' automatic intersection of labels
74 OdfNameMarker
= 0x00200000, // ODF '$$' marker that starts a defined (range) name
75 CharName
= 0x00400000, // start character of a defined name
76 Name
= 0x00800000, // continuation character of a defined name
77 CharErrConst
= 0x01000000, // start character of an error constant ('#')
80 template<> struct typed_flags
<ScCharFlags
> : is_typed_flags
<ScCharFlags
, 0x01ffffff> {};
83 #define SC_COMPILER_FILE_TAB_SEP '#' // 'Doc'#Tab
89 struct ScInterpreterContext
;
94 class CompileFormulaContext
;
98 // constants and data types internal to compiler
100 struct ScRawToken final
102 friend class ScCompiler
;
103 // Friends that use a temporary ScRawToken on the stack (and therefore need
104 // the private dtor) and know what they're doing...
105 friend class ScTokenArray
;
107 formula::StackVar eType
; // type of data; this determines how the unions are used
117 formula::ParamClass eInForceArray
;
119 ScComplexRefData aRef
;
122 ScComplexRefData aRef
;
133 ScTableRefToken::Item eItem
;
137 rtl_uString
* mpDataIgnoreCase
;
141 short nJump
[ FORMULA_MAXJUMPCOUNT
+ 1 ]; // If/Chose token
143 OUString maExternalName
; // depending on the opcode, this is either the external, or the external name, or the external table name
145 // coverity[uninit_member] - members deliberately not initialized
148 ~ScRawToken() {} //! only delete via Delete()
150 formula::StackVar
GetType() const { return eType
; }
151 OpCode
GetOpCode() const { return eOp
; }
152 void NewOpCode( OpCode e
) { eOp
= e
; }
154 // Use these methods only on tokens that are not part of a token array,
155 // since the reference count is cleared!
156 void SetOpCode( OpCode eCode
);
157 void SetString( rtl_uString
* pData
, rtl_uString
* pDataIgnoreCase
);
158 void SetSingleReference( const ScSingleRefData
& rRef
);
159 void SetDoubleReference( const ScComplexRefData
& rRef
);
160 void SetDouble( double fVal
);
161 void SetErrorConstant( FormulaError nErr
);
163 // These methods are ok to use, reference count not cleared.
164 void SetName(sal_Int16 nSheet
, sal_uInt16 nIndex
);
165 void SetExternalSingleRef( sal_uInt16 nFileId
, const OUString
& rTabName
, const ScSingleRefData
& rRef
);
166 void SetExternalDoubleRef( sal_uInt16 nFileId
, const OUString
& rTabName
, const ScComplexRefData
& rRef
);
167 void SetExternalName( sal_uInt16 nFileId
, const OUString
& rName
);
168 void SetExternal(const OUString
& rStr
);
170 /** If the token is a non-external reference, determine if the reference is
171 valid. If the token is an external reference, return true. Else return
172 false. Used only in ScCompiler::NextNewToken() to preserve non-existing
173 sheet names in otherwise valid references.
175 bool IsValidReference(const ScDocument
& rDoc
) const;
177 formula::FormulaToken
* CreateToken(ScSheetLimits
& rLimits
) const; // create typified token
180 class SC_DLLPUBLIC ScCompiler final
: public formula::FormulaCompiler
184 enum ExtendedErrorDetection
186 EXTENDED_ERROR_DETECTION_NONE
= 0, // no error on unknown symbols, default (interpreter handles it)
187 EXTENDED_ERROR_DETECTION_NAME_BREAK
, // name error on unknown symbols and break, pCode incomplete
188 EXTENDED_ERROR_DETECTION_NAME_NO_BREAK
// name error on unknown symbols, don't break, continue
191 struct SAL_DLLPRIVATE Convention
193 const formula::FormulaGrammar::AddressConvention meConv
;
195 Convention( formula::FormulaGrammar::AddressConvention eConvP
);
196 virtual ~Convention();
198 virtual void makeRefStr(
199 ScSheetLimits
& rLimits
,
200 OUStringBuffer
& rBuffer
,
201 formula::FormulaGrammar::Grammar eGram
,
202 const ScAddress
& rPos
,
203 const OUString
& rErrRef
, const std::vector
<OUString
>& rTabNames
,
204 const ScComplexRefData
& rRef
, bool bSingleRef
, bool bFromRangeName
) const = 0;
206 virtual css::i18n::ParseResult
207 parseAnyToken( const OUString
& rFormula
,
209 const CharClass
* pCharClass
,
210 bool bGroupSeparator
) const = 0;
213 * Parse the symbol string and pick up the file name and the external
216 * @return true on successful parse, or false otherwise.
218 virtual bool parseExternalName( const OUString
& rSymbol
, OUString
& rFile
, OUString
& rName
,
219 const ScDocument
& rDoc
,
220 const css::uno::Sequence
< css::sheet::ExternalLinkInfo
>* pExternalLinks
) const = 0;
222 virtual OUString
makeExternalNameStr( sal_uInt16 nFileId
, const OUString
& rFile
,
223 const OUString
& rName
) const = 0;
225 virtual void makeExternalRefStr(
226 ScSheetLimits
& rLimits
,
227 OUStringBuffer
& rBuffer
, const ScAddress
& rPos
, sal_uInt16 nFileId
, const OUString
& rFileName
,
228 const OUString
& rTabName
, const ScSingleRefData
& rRef
) const = 0;
230 virtual void makeExternalRefStr(
231 ScSheetLimits
& rLimits
,
232 OUStringBuffer
& rBuffer
, const ScAddress
& rPos
,
233 sal_uInt16 nFileId
, const OUString
& rFileName
, const std::vector
<OUString
>& rTabNames
,
234 const OUString
& rTabName
, const ScComplexRefData
& rRef
) const = 0;
236 enum SpecialSymbolType
239 * Character between sheet name and address. In OOO A1 this is
240 * '.', while XL A1 and XL R1C1 this is '!'.
245 * In OOO A1, a sheet name may be prefixed with '$' to indicate an
246 * absolute sheet position.
250 virtual sal_Unicode
getSpecialSymbol( SpecialSymbolType eSymType
) const = 0;
252 virtual ScCharFlags
getCharTableFlags( sal_Unicode c
, sal_Unicode cLast
) const = 0;
255 std::unique_ptr
<ScCharFlags
[]> mpCharTable
;
257 friend struct Convention
;
261 static const CharClass
*pCharClassEnglish
; // character classification for en_US locale
262 static const CharClass
*pCharClassLocalized
; // character classification for UI locale
263 static const Convention
*pConventions
[ formula::FormulaGrammar::CONV_LAST
];
265 static const struct AddInMap
268 const char* pEnglish
;
269 const char* pOriginal
; // programmatical name
270 const char* pUpper
; // upper case programmatical name
272 static size_t GetAddInMapCount();
277 SvNumberFormatter
* mpFormatter
;
278 const ScInterpreterContext
* mpInterpreterContext
;
280 SCTAB mnCurrentSheetTab
; // indicates current sheet number parsed so far
281 sal_Int32 mnCurrentSheetEndPos
; // position after current sheet name if parsed
283 // For CONV_XL_OOX, may be set via API by MOOXML filter.
284 css::uno::Sequence
<css::sheet::ExternalLinkInfo
> maExternalLinks
;
286 sal_Unicode cSymbol
[MAXSTRLEN
+1]; // current Symbol + 0
287 OUString aFormula
; // formula source code
288 sal_Int32 nSrcPos
; // tokenizer position (source code)
289 ScRawToken maRawToken
;
291 std::queue
<OpCode
> maPendingOpCodes
; // additional opcodes generated from a single symbol
293 const CharClass
* pCharClass
; // which character classification is used for parseAnyToken and upper/lower
294 bool mbCharClassesDiffer
; // whether pCharClass and current system locale's CharClass differ
295 sal_uInt16 mnPredetectedReference
; // reference when reading ODF, 0 (none), 1 (single) or 2 (double)
296 sal_Int32 mnRangeOpPosInSymbol
; // if and where a range operator is in symbol
297 const Convention
*pConv
;
298 ExtendedErrorDetection meExtendedErrorDetection
;
299 bool mbCloseBrackets
; // whether to close open brackets automatically, default TRUE
300 bool mbRewind
; // whether symbol is to be rewound to some step during lexical analysis
301 bool mbRefConventionChartOOXML
; // whether to use special ooxml chart syntax in case of OOXML reference convention,
302 // when parsing a formula string. [0]!GlobalNamedRange, LocalSheet!LocalNamedRange
303 std::vector
<sal_uInt16
> maExternalFiles
;
305 std::vector
<OUString
> maTabNames
; /// sheet names mangled for the current grammar for output
306 std::vector
<OUString
> &GetSetupTabNames() const; /// get or setup tab names for the current grammar
312 TableRefEntry( formula::FormulaToken
* p
) : mxToken(p
), mnLevel(0) {}
314 std::vector
<TableRefEntry
> maTableRefs
; /// "stack" of currently active ocTableRef tokens
316 // Optimizing implicit intersection is done only at the end of code generation, because the usage context may
317 // be important. Store candidate parameters and the operation they are the argument for.
318 struct PendingImplicitIntersectionOptimization
320 PendingImplicitIntersectionOptimization(formula::FormulaToken
** p
, formula::FormulaToken
* o
)
321 : parameterLocation( p
), parameter( *p
), operation( o
) {}
322 formula::FormulaToken
** parameterLocation
;
323 formula::FormulaTokenRef parameter
;
324 formula::FormulaTokenRef operation
;
326 std::vector
< PendingImplicitIntersectionOptimization
> mPendingImplicitIntersectionOptimizations
;
327 std::unordered_set
<formula::FormulaTokenRef
> mUnhandledPossibleImplicitIntersections
;
329 std::set
<OpCode
> mUnhandledPossibleImplicitIntersectionsOpCodes
;
332 bool NextNewToken(bool bInArray
);
333 bool ToUpperAsciiOrI18nIsAscii( OUString
& rUpper
, const OUString
& rOrg
) const;
335 virtual void SetError(FormulaError nError
) override
;
337 struct Whitespace final
342 Whitespace() : nCount(0), cChar(0x20) {}
343 void reset( sal_Unicode c
) { nCount
= 0; cChar
= c
; }
346 static void addWhitespace( std::vector
<ScCompiler::Whitespace
> & rvSpaces
,
347 ScCompiler::Whitespace
& rSpace
, sal_Unicode c
, sal_Int32 n
= 1 );
349 std::vector
<Whitespace
> NextSymbol(bool bInArray
);
351 bool ParseValue( const OUString
& );
352 bool ParseOpCode( const OUString
&, bool bInArray
);
353 bool ParseOpCode2( std::u16string_view
);
355 bool ParseReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
356 bool ParseSingleReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
357 bool ParseDoubleReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
358 bool ParsePredetectedReference( const OUString
& rSymbol
);
359 bool ParsePredetectedErrRefReference( const OUString
& rName
, const OUString
* pErrRef
);
360 bool ParseMacro( const OUString
& );
361 bool ParseNamedRange( const OUString
&, bool onlyCheck
= false );
362 bool ParseExternalNamedRange( const OUString
& rSymbol
, bool& rbInvalidExternalNameRange
);
363 bool ParseDBRange( const OUString
& );
364 bool ParseColRowName( const OUString
& );
365 bool ParseBoolean( const OUString
& );
366 void AutoCorrectParsedSymbol();
367 const ScRangeData
* GetRangeData( SCTAB
& rSheet
, const OUString
& rUpperName
) const;
369 void AdjustSheetLocalNameRelReferences( SCTAB nDelta
);
370 void SetRelNameReference();
372 /** Obtain range data for ocName token, global or sheet local.
374 Prerequisite: rToken is a FormulaIndexToken so IsGlobal() and
375 GetIndex() can be called on it. We don't check with RTTI.
377 ScRangeData
* GetRangeData( const formula::FormulaToken
& pToken
) const;
379 bool HasPossibleNamedRangeConflict(SCTAB nTab
) const;
382 static const CharClass
* GetCharClassLocalized();
383 static const CharClass
* GetCharClassEnglish();
386 ScCompiler( sc::CompileFormulaContext
& rCxt
, const ScAddress
& rPos
,
387 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
389 /** If eGrammar == GRAM_UNSPECIFIED then the grammar of rDocument is used,
391 ScCompiler( ScDocument
& rDocument
, const ScAddress
&,
392 formula::FormulaGrammar::Grammar eGrammar
= formula::FormulaGrammar::GRAM_UNSPECIFIED
,
393 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
395 ScCompiler( sc::CompileFormulaContext
& rCxt
, const ScAddress
& rPos
, ScTokenArray
& rArr
,
396 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
398 /** If eGrammar == GRAM_UNSPECIFIED then the grammar of rDocument is used,
400 ScCompiler( ScDocument
& rDocument
, const ScAddress
&, ScTokenArray
& rArr
,
401 formula::FormulaGrammar::Grammar eGrammar
= formula::FormulaGrammar::GRAM_UNSPECIFIED
,
402 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
404 virtual ~ScCompiler() override
;
407 static void DeInit(); /// all
409 // for ScAddress::Format()
410 static void CheckTabQuotes( OUString
& aTabName
,
411 const formula::FormulaGrammar::AddressConvention eConv
= formula::FormulaGrammar::CONV_OOO
);
413 /** Analyzes a string for a 'Doc'#Tab construct, or 'Do''c'#Tab etc...
415 @returns the position of the unquoted # hash mark in 'Doc'#Tab, or
417 static sal_Int32
GetDocTabPos( const OUString
& rString
);
419 // Check if it is a valid english function name
420 static bool IsEnglishSymbol( const OUString
& rName
);
422 bool ParseErrorConstant( const OUString
& );
423 bool ParseTableRefItem( const OUString
& );
424 bool ParseTableRefColumn( const OUString
& );
426 /** Calls GetToken() if PeekNextNoSpaces() is of given OpCode. */
427 bool GetTokenIfOpCode( OpCode eOp
);
430 * When auto correction is set, the jump command reorder must be enabled.
432 void SetAutoCorrection( bool bVal
);
433 void SetCloseBrackets( bool bVal
) { mbCloseBrackets
= bVal
; }
434 void SetRefConventionChartOOXML( bool bVal
) { mbRefConventionChartOOXML
= bVal
; }
435 void SetRefConvention( const Convention
*pConvP
);
436 void SetRefConvention( const formula::FormulaGrammar::AddressConvention eConv
);
438 static const Convention
* GetRefConvention( formula::FormulaGrammar::AddressConvention eConv
);
440 /** Overwrite FormulaCompiler::GetOpCodeMap() forwarding to
443 OpCodeMapPtr
GetOpCodeMap( const sal_Int32 nLanguage
) const { return GetFinalOpCodeMap(nLanguage
); }
445 /// Set symbol map if not empty.
446 void SetFormulaLanguage( const OpCodeMapPtr
& xMap
);
448 void SetGrammar( const formula::FormulaGrammar::Grammar eGrammar
);
450 void SetNumberFormatter( SvNumberFormatter
* pFormatter
);
453 /** Set grammar and reference convention from within SetFormulaLanguage()
457 The new grammar to be set and the associated reference convention.
460 The previous grammar that was active before SetFormulaLanguage().
462 void SetGrammarAndRefConvention(
463 const formula::FormulaGrammar::Grammar eNewGrammar
,
464 const formula::FormulaGrammar::Grammar eOldGrammar
);
467 /// Set external link info for ScAddress::CONV_XL_OOX.
468 void SetExternalLinks(
469 const css::uno::Sequence
<
470 css::sheet::ExternalLinkInfo
>& rLinks
)
472 maExternalLinks
= rLinks
;
475 void CreateStringFromXMLTokenArray( OUString
& rFormula
, OUString
& rFormulaNmsp
);
477 void SetExtendedErrorDetection( ExtendedErrorDetection eVal
) { meExtendedErrorDetection
= eVal
; }
479 bool IsCorrected() const { return bCorrected
; }
480 const OUString
& GetCorrectedFormula() const { return aCorrectedFormula
; }
483 * Tokenize formula expression string into an array of tokens.
485 * @param rFormula formula expression to tokenize.
487 * @return heap allocated token array object. The caller <i>must</i>
488 * manage the life cycle of this object.
490 std::unique_ptr
<ScTokenArray
> CompileString( const OUString
& rFormula
);
491 std::unique_ptr
<ScTokenArray
> CompileString( const OUString
& rFormula
, const OUString
& rFormulaNmsp
);
492 const ScAddress
& GetPos() const { return aPos
; }
495 static void MoveRelWrap( const ScTokenArray
& rArr
, const ScDocument
& rDoc
, const ScAddress
& rPos
,
496 SCCOL nMaxCol
, SCROW nMaxRow
);
498 /** If the character is allowed as tested by nFlags (SC_COMPILER_C_...
499 bits) for all known address conventions. If more than one bit is given
500 in nFlags, all bits must match. */
501 static bool IsCharFlagAllConventions(
502 OUString
const & rStr
, sal_Int32 nPos
, ScCharFlags nFlags
);
504 /** TODO : Move this to somewhere appropriate. */
505 static bool DoubleRefToPosSingleRefScalarCase(const ScRange
& rRange
, ScAddress
& rAdr
,
506 const ScAddress
& rFormulaPos
);
508 bool HasUnhandledPossibleImplicitIntersections() const { return !mUnhandledPossibleImplicitIntersections
.empty(); }
510 const std::set
<OpCode
>& UnhandledPossibleImplicitIntersectionsOpCodes() { return mUnhandledPossibleImplicitIntersectionsOpCodes
; }
515 virtual OUString
FindAddInFunction( const OUString
& rUpperName
, bool bLocalFirst
) const override
;
516 virtual void fillFromAddInCollectionUpperName( const NonConstOpCodeMapPtr
& xMap
) const override
;
517 virtual void fillFromAddInCollectionEnglishName( const NonConstOpCodeMapPtr
& xMap
) const override
;
518 virtual void fillFromAddInMap( const NonConstOpCodeMapPtr
& xMap
, formula::FormulaGrammar::Grammar _eGrammar
) const override
;
519 virtual void fillAddInToken(::std::vector
< css::sheet::FormulaOpCodeMapEntry
>& _rVec
,bool _bIsEnglish
) const override
;
521 virtual bool HandleExternalReference(const formula::FormulaToken
& _aToken
) override
;
522 virtual bool HandleRange() override
;
523 virtual bool HandleColRowName() override
;
524 virtual bool HandleDbData() override
;
525 virtual bool HandleTableRef() override
;
527 virtual formula::FormulaTokenRef
ExtendRangeReference( formula::FormulaToken
& rTok1
, formula::FormulaToken
& rTok2
) override
;
528 virtual void CreateStringFromExternal( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
529 virtual void CreateStringFromSingleRef( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
530 virtual void CreateStringFromDoubleRef( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
531 virtual void CreateStringFromMatrix( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
532 virtual void CreateStringFromIndex( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
533 virtual void LocalizeString( OUString
& rName
) const override
; // modify rName - input: exact name
535 virtual formula::ParamClass
GetForceArrayParameter( const formula::FormulaToken
* pToken
, sal_uInt16 nParam
) const override
;
537 /// Access the CharTable flags
538 ScCharFlags
GetCharTableFlags( sal_Unicode c
, sal_Unicode cLast
)
539 { return c
< 128 ? pConv
->getCharTableFlags(c
, cLast
) : ScCharFlags::NONE
; }
541 virtual void HandleIIOpCode(formula::FormulaToken
* token
, formula::FormulaToken
*** pppToken
, sal_uInt8 nNumParams
) override
;
542 bool HandleIIOpCodeInternal(formula::FormulaToken
* token
, formula::FormulaToken
*** pppToken
, sal_uInt8 nNumParams
);
543 bool SkipImplicitIntersectionOptimization(const formula::FormulaToken
* token
) const;
544 virtual void PostProcessCode() override
;
545 virtual void AnnotateOperands() override
;
546 static bool ParameterMayBeImplicitIntersection(const formula::FormulaToken
* token
, int parameter
);
547 void ReplaceDoubleRefII(formula::FormulaToken
** ppDoubleRefTok
);
548 bool AdjustSumRangeShape(const ScComplexRefData
& rBaseRange
, ScComplexRefData
& rSumRange
);
549 void CorrectSumRange(const ScComplexRefData
& rBaseRange
, ScComplexRefData
& rSumRange
, formula::FormulaToken
** ppSumRangeToken
);
550 void AnnotateTrimOnDoubleRefs();
553 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */