1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_SC_INC_COMPILER_HXX
21 #define INCLUDED_SC_INC_COMPILER_HXX
27 #include "refdata.hxx"
29 #include <formula/token.hxx>
30 #include <formula/grammar.hxx>
31 #include <rtl/ustrbuf.hxx>
32 #include <com/sun/star/sheet/ExternalLinkInfo.hpp>
33 #include <com/sun/star/i18n/ParseResult.hpp>
38 #include <com/sun/star/uno/Sequence.hxx>
39 #include <o3tl/typed_flags_set.hxx>
41 #include <formula/FormulaCompiler.hxx>
45 // constants and data types also for external modules (ScInterpreter et al)
47 #define MAXSTRLEN 1024 /* maximum length of input string of one symbol */
49 // flag values of CharTable
50 enum class ScCharFlags
: sal_uInt32
{
54 CharBool
= 0x00000002,
55 CharWord
= 0x00000004,
56 CharValue
= 0x00000008,
57 CharString
= 0x00000010,
58 CharDontCare
= 0x00000020,
63 ValueSep
= 0x00000400,
64 ValueExp
= 0x00000800,
65 ValueSign
= 0x00001000,
66 ValueValue
= 0x00002000,
67 StringSep
= 0x00004000,
68 NameSep
= 0x00008000, // there can be only one! '\''
69 CharIdent
= 0x00010000, // identifier (built-in function) or reference start
70 Ident
= 0x00020000, // identifier or reference continuation
71 OdfLBracket
= 0x00040000, // ODF '[' reference bracket
72 OdfRBracket
= 0x00080000, // ODF ']' reference bracket
73 OdfLabelOp
= 0x00100000, // ODF '!!' automatic intersection of labels
74 OdfNameMarker
= 0x00200000, // ODF '$$' marker that starts a defined (range) name
75 CharName
= 0x00400000, // start character of a defined name
76 Name
= 0x00800000, // continuation character of a defined name
77 CharErrConst
= 0x01000000, // start character of an error constant ('#')
80 template<> struct typed_flags
<ScCharFlags
> : is_typed_flags
<ScCharFlags
, 0x01ffffff> {};
83 #define SC_COMPILER_FILE_TAB_SEP '#' // 'Doc'#Tab
89 struct ScInterpreterContext
;
94 class CompileFormulaContext
;
98 // constants and data types internal to compiler
100 struct ScRawToken final
102 friend class ScCompiler
;
103 // Friends that use a temporary ScRawToken on the stack (and therefore need
104 // the private dtor) and know what they're doing...
105 friend class ScTokenArray
;
107 formula::StackVar eType
; // type of data; this determines how the unions are used
113 formula::ParamClass eInForceArray
;
115 ScComplexRefData aRef
;
118 ScComplexRefData aRef
;
129 ScTableRefToken::Item eItem
;
133 rtl_uString
* mpDataIgnoreCase
;
137 short nJump
[ FORMULA_MAXJUMPCOUNT
+ 1 ]; // If/Chose token
139 OUString maExternalName
; // depending on the opcode, this is either the external, or the external name, or the external table name
141 // coverity[uninit_member] - members deliberately not initialized
144 ~ScRawToken() {} //! only delete via Delete()
146 formula::StackVar
GetType() const { return eType
; }
147 OpCode
GetOpCode() const { return eOp
; }
148 void NewOpCode( OpCode e
) { eOp
= e
; }
150 // Use these methods only on tokens that are not part of a token array,
151 // since the reference count is cleared!
152 void SetOpCode( OpCode eCode
);
153 void SetString( rtl_uString
* pData
, rtl_uString
* pDataIgoreCase
);
154 void SetSingleReference( const ScSingleRefData
& rRef
);
155 void SetDoubleReference( const ScComplexRefData
& rRef
);
156 void SetDouble( double fVal
);
157 void SetErrorConstant( FormulaError nErr
);
159 // These methods are ok to use, reference count not cleared.
160 void SetName(sal_Int16 nSheet
, sal_uInt16 nIndex
);
161 void SetExternalSingleRef( sal_uInt16 nFileId
, const OUString
& rTabName
, const ScSingleRefData
& rRef
);
162 void SetExternalDoubleRef( sal_uInt16 nFileId
, const OUString
& rTabName
, const ScComplexRefData
& rRef
);
163 void SetExternalName( sal_uInt16 nFileId
, const OUString
& rName
);
164 void SetExternal(const OUString
& rStr
);
166 /** If the token is a non-external reference, determine if the reference is
167 valid. If the token is an external reference, return true. Else return
168 false. Used only in ScCompiler::NextNewToken() to preserve non-existing
169 sheet names in otherwise valid references.
171 bool IsValidReference(const ScDocument
& rDoc
) const;
173 formula::FormulaToken
* CreateToken(ScSheetLimits
& rLimits
) const; // create typified token
176 class SC_DLLPUBLIC ScCompiler
: public formula::FormulaCompiler
180 enum ExtendedErrorDetection
182 EXTENDED_ERROR_DETECTION_NONE
= 0, // no error on unknown symbols, default (interpreter handles it)
183 EXTENDED_ERROR_DETECTION_NAME_BREAK
, // name error on unknown symbols and break, pCode incomplete
184 EXTENDED_ERROR_DETECTION_NAME_NO_BREAK
// name error on unknown symbols, don't break, continue
187 struct SAL_DLLPRIVATE Convention
189 const formula::FormulaGrammar::AddressConvention meConv
;
191 Convention( formula::FormulaGrammar::AddressConvention eConvP
);
192 virtual ~Convention();
194 virtual void makeRefStr(
195 ScSheetLimits
& rLimits
,
196 OUStringBuffer
& rBuffer
,
197 formula::FormulaGrammar::Grammar eGram
,
198 const ScAddress
& rPos
,
199 const OUString
& rErrRef
, const std::vector
<OUString
>& rTabNames
,
200 const ScComplexRefData
& rRef
, bool bSingleRef
, bool bFromRangeName
) const = 0;
202 virtual css::i18n::ParseResult
203 parseAnyToken( const OUString
& rFormula
,
205 const CharClass
* pCharClass
,
206 bool bGroupSeparator
) const = 0;
209 * Parse the symbol string and pick up the file name and the external
212 * @return true on successful parse, or false otherwise.
214 virtual bool parseExternalName( const OUString
& rSymbol
, OUString
& rFile
, OUString
& rName
,
215 const ScDocument
& rDoc
,
216 const css::uno::Sequence
< css::sheet::ExternalLinkInfo
>* pExternalLinks
) const = 0;
218 virtual OUString
makeExternalNameStr( sal_uInt16 nFileId
, const OUString
& rFile
,
219 const OUString
& rName
) const = 0;
221 virtual void makeExternalRefStr(
222 ScSheetLimits
& rLimits
,
223 OUStringBuffer
& rBuffer
, const ScAddress
& rPos
, sal_uInt16 nFileId
, const OUString
& rFileName
,
224 const OUString
& rTabName
, const ScSingleRefData
& rRef
) const = 0;
226 virtual void makeExternalRefStr(
227 ScSheetLimits
& rLimits
,
228 OUStringBuffer
& rBuffer
, const ScAddress
& rPos
,
229 sal_uInt16 nFileId
, const OUString
& rFileName
, const std::vector
<OUString
>& rTabNames
,
230 const OUString
& rTabName
, const ScComplexRefData
& rRef
) const = 0;
232 enum SpecialSymbolType
235 * Character between sheet name and address. In OOO A1 this is
236 * '.', while XL A1 and XL R1C1 this is '!'.
241 * In OOO A1, a sheet name may be prefixed with '$' to indicate an
242 * absolute sheet position.
246 virtual sal_Unicode
getSpecialSymbol( SpecialSymbolType eSymType
) const = 0;
248 virtual ScCharFlags
getCharTableFlags( sal_Unicode c
, sal_Unicode cLast
) const = 0;
251 std::unique_ptr
<ScCharFlags
[]> mpCharTable
;
253 friend struct Convention
;
257 static const CharClass
*pCharClassEnglish
; // character classification for en_US locale
258 static const CharClass
*pCharClassLocalized
; // character classification for UI locale
259 static const Convention
*pConventions
[ formula::FormulaGrammar::CONV_LAST
];
261 static const struct AddInMap
264 const char* pEnglish
;
265 const char* pOriginal
; // programmatical name
266 const char* pUpper
; // upper case programmatical name
268 static size_t GetAddInMapCount();
273 SvNumberFormatter
* mpFormatter
;
274 const ScInterpreterContext
* mpInterpreterContext
;
276 SCTAB mnCurrentSheetTab
; // indicates current sheet number parsed so far
277 sal_Int32 mnCurrentSheetEndPos
; // position after current sheet name if parsed
279 // For CONV_XL_OOX, may be set via API by MOOXML filter.
280 css::uno::Sequence
<css::sheet::ExternalLinkInfo
> maExternalLinks
;
282 sal_Unicode cSymbol
[MAXSTRLEN
+1]; // current Symbol + 0
283 OUString aFormula
; // formula source code
284 sal_Int32 nSrcPos
; // tokenizer position (source code)
285 mutable ScRawToken maRawToken
;
287 std::queue
<OpCode
> maPendingOpCodes
; // additional opcodes generated from a single symbol
289 const CharClass
* pCharClass
; // which character classification is used for parseAnyToken and upper/lower
290 bool mbCharClassesDiffer
; // whether pCharClass and current system locale's CharClass differ
291 sal_uInt16 mnPredetectedReference
; // reference when reading ODF, 0 (none), 1 (single) or 2 (double)
292 sal_Int32 mnRangeOpPosInSymbol
; // if and where a range operator is in symbol
293 const Convention
*pConv
;
294 ExtendedErrorDetection meExtendedErrorDetection
;
295 bool mbCloseBrackets
; // whether to close open brackets automatically, default TRUE
296 bool mbRewind
; // whether symbol is to be rewound to some step during lexical analysis
297 std::vector
<sal_uInt16
> maExternalFiles
;
299 std::vector
<OUString
> maTabNames
; /// sheet names mangled for the current grammar for output
300 std::vector
<OUString
> &GetSetupTabNames() const; /// get or setup tab names for the current grammar
306 TableRefEntry( formula::FormulaToken
* p
) : mxToken(p
), mnLevel(0) {}
308 std::vector
<TableRefEntry
> maTableRefs
; /// "stack" of currently active ocTableRef tokens
310 // Optimizing implicit intersection is done only at the end of code generation, because the usage context may
311 // be important. Store candidate parameters and the operation they are the argument for.
312 struct PendingImplicitIntersectionOptimization
314 PendingImplicitIntersectionOptimization(formula::FormulaToken
** p
, formula::FormulaToken
* o
)
315 : parameterLocation( p
), parameter( *p
), operation( o
) {}
316 formula::FormulaToken
** parameterLocation
;
317 formula::FormulaTokenRef parameter
;
318 formula::FormulaTokenRef operation
;
320 std::vector
< PendingImplicitIntersectionOptimization
> mPendingImplicitIntersectionOptimizations
;
321 std::set
<formula::FormulaTokenRef
> mUnhandledPossibleImplicitIntersections
;
323 std::set
<OpCode
> mUnhandledPossibleImplicitIntersectionsOpCodes
;
326 bool NextNewToken(bool bInArray
);
327 bool ToUpperAsciiOrI18nIsAscii( OUString
& rUpper
, const OUString
& rOrg
) const;
329 virtual void SetError(FormulaError nError
) override
;
330 sal_Int32
NextSymbol(bool bInArray
);
331 bool IsValue( const OUString
& );
332 bool IsOpCode( const OUString
&, bool bInArray
);
333 bool IsOpCode2( const OUString
& );
335 bool IsReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
336 bool IsSingleReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
337 bool IsDoubleReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
338 bool IsPredetectedReference( const OUString
& rSymbol
);
339 bool IsPredetectedErrRefReference( const OUString
& rName
, const OUString
* pErrRef
);
340 bool IsMacro( const OUString
& );
341 bool IsNamedRange( const OUString
& );
342 bool IsExternalNamedRange( const OUString
& rSymbol
, bool& rbInvalidExternalNameRange
);
343 bool IsDBRange( const OUString
& );
344 bool IsColRowName( const OUString
& );
345 bool IsBoolean( const OUString
& );
346 void AutoCorrectParsedSymbol();
347 const ScRangeData
* GetRangeData( SCTAB
& rSheet
, const OUString
& rUpperName
) const;
349 void AdjustSheetLocalNameRelReferences( SCTAB nDelta
);
350 void SetRelNameReference();
352 /** Obtain range data for ocName token, global or sheet local.
354 Prerequisite: rToken is a FormulaIndexToken so IsGlobal() and
355 GetIndex() can be called on it. We don't check with RTTI.
357 ScRangeData
* GetRangeData( const formula::FormulaToken
& pToken
) const;
359 static const CharClass
* GetCharClassEnglish();
360 static const CharClass
* GetCharClassLocalized();
363 ScCompiler( sc::CompileFormulaContext
& rCxt
, const ScAddress
& rPos
,
364 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
366 /** If eGrammar == GRAM_UNSPECIFIED then the grammar of rDocument is used,
368 ScCompiler( ScDocument
& rDocument
, const ScAddress
&,
369 formula::FormulaGrammar::Grammar eGrammar
= formula::FormulaGrammar::GRAM_UNSPECIFIED
,
370 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
372 ScCompiler( sc::CompileFormulaContext
& rCxt
, const ScAddress
& rPos
, ScTokenArray
& rArr
,
373 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
375 /** If eGrammar == GRAM_UNSPECIFIED then the grammar of rDocument is used,
377 ScCompiler( ScDocument
& rDocument
, const ScAddress
&, ScTokenArray
& rArr
,
378 formula::FormulaGrammar::Grammar eGrammar
= formula::FormulaGrammar::GRAM_UNSPECIFIED
,
379 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
381 virtual ~ScCompiler() override
;
384 static void DeInit(); /// all
386 // for ScAddress::Format()
387 static void CheckTabQuotes( OUString
& aTabName
,
388 const formula::FormulaGrammar::AddressConvention eConv
= formula::FormulaGrammar::CONV_OOO
);
390 /** Analyzes a string for a 'Doc'#Tab construct, or 'Do''c'#Tab etc...
392 @returns the position of the unquoted # hash mark in 'Doc'#Tab, or
394 static sal_Int32
GetDocTabPos( const OUString
& rString
);
396 static bool EnQuote( OUString
& rStr
);
398 // Check if it is a valid english function name
399 static bool IsEnglishSymbol( const OUString
& rName
);
401 bool IsErrorConstant( const OUString
& ) const;
402 bool IsTableRefItem( const OUString
& ) const;
403 bool IsTableRefColumn( const OUString
& ) const;
405 /** Calls GetToken() if PeekNextNoSpaces() is of given OpCode. */
406 bool GetTokenIfOpCode( OpCode eOp
);
409 * When auto correction is set, the jump command reorder must be enabled.
411 void SetAutoCorrection( bool bVal
);
412 void SetCloseBrackets( bool bVal
) { mbCloseBrackets
= bVal
; }
413 void SetRefConvention( const Convention
*pConvP
);
414 void SetRefConvention( const formula::FormulaGrammar::AddressConvention eConv
);
416 static const Convention
* GetRefConvention( formula::FormulaGrammar::AddressConvention eConv
);
418 /// Set symbol map if not empty.
419 void SetFormulaLanguage( const OpCodeMapPtr
& xMap
);
421 void SetGrammar( const formula::FormulaGrammar::Grammar eGrammar
);
423 void SetNumberFormatter( SvNumberFormatter
* pFormatter
);
426 /** Set grammar and reference convention from within SetFormulaLanguage()
430 The new grammar to be set and the associated reference convention.
433 The previous grammar that was active before SetFormulaLanguage().
435 void SetGrammarAndRefConvention(
436 const formula::FormulaGrammar::Grammar eNewGrammar
,
437 const formula::FormulaGrammar::Grammar eOldGrammar
);
440 /// Set external link info for ScAddress::CONV_XL_OOX.
441 void SetExternalLinks(
442 const css::uno::Sequence
<
443 css::sheet::ExternalLinkInfo
>& rLinks
)
445 maExternalLinks
= rLinks
;
448 void CreateStringFromXMLTokenArray( OUString
& rFormula
, OUString
& rFormulaNmsp
);
450 void SetExtendedErrorDetection( ExtendedErrorDetection eVal
) { meExtendedErrorDetection
= eVal
; }
452 bool IsCorrected() const { return bCorrected
; }
453 const OUString
& GetCorrectedFormula() const { return aCorrectedFormula
; }
456 * Tokenize formula expression string into an array of tokens.
458 * @param rFormula formula expression to tokenize.
460 * @return heap allocated token array object. The caller <i>must</i>
461 * manage the life cycle of this object.
463 std::unique_ptr
<ScTokenArray
> CompileString( const OUString
& rFormula
);
464 std::unique_ptr
<ScTokenArray
> CompileString( const OUString
& rFormula
, const OUString
& rFormulaNmsp
);
465 const ScAddress
& GetPos() const { return aPos
; }
468 static void MoveRelWrap( const ScTokenArray
& rArr
, const ScDocument
& rDoc
, const ScAddress
& rPos
,
469 SCCOL nMaxCol
, SCROW nMaxRow
);
471 /** If the character is allowed as tested by nFlags (SC_COMPILER_C_...
472 bits) for all known address conventions. If more than one bit is given
473 in nFlags, all bits must match. */
474 static bool IsCharFlagAllConventions(
475 OUString
const & rStr
, sal_Int32 nPos
, ScCharFlags nFlags
);
477 /** TODO : Move this to somewhere appropriate. */
478 static bool DoubleRefToPosSingleRefScalarCase(const ScRange
& rRange
, ScAddress
& rAdr
,
479 const ScAddress
& rFormulaPos
);
481 bool HasUnhandledPossibleImplicitIntersections() const { return !mUnhandledPossibleImplicitIntersections
.empty(); }
483 const std::set
<OpCode
>& UnhandledPossibleImplicitIntersectionsOpCodes() { return mUnhandledPossibleImplicitIntersectionsOpCodes
; }
488 virtual OUString
FindAddInFunction( const OUString
& rUpperName
, bool bLocalFirst
) const override
;
489 virtual void fillFromAddInCollectionUpperName( const NonConstOpCodeMapPtr
& xMap
) const override
;
490 virtual void fillFromAddInCollectionEnglishName( const NonConstOpCodeMapPtr
& xMap
) const override
;
491 virtual void fillFromAddInMap( const NonConstOpCodeMapPtr
& xMap
, formula::FormulaGrammar::Grammar _eGrammar
) const override
;
492 virtual void fillAddInToken(::std::vector
< css::sheet::FormulaOpCodeMapEntry
>& _rVec
,bool _bIsEnglish
) const override
;
494 virtual bool HandleExternalReference(const formula::FormulaToken
& _aToken
) override
;
495 virtual bool HandleRange() override
;
496 virtual bool HandleColRowName() override
;
497 virtual bool HandleDbData() override
;
498 virtual bool HandleTableRef() override
;
500 virtual formula::FormulaTokenRef
ExtendRangeReference( formula::FormulaToken
& rTok1
, formula::FormulaToken
& rTok2
) override
;
501 virtual void CreateStringFromExternal( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
502 virtual void CreateStringFromSingleRef( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
503 virtual void CreateStringFromDoubleRef( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
504 virtual void CreateStringFromMatrix( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
505 virtual void CreateStringFromIndex( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
506 virtual void LocalizeString( OUString
& rName
) const override
; // modify rName - input: exact name
508 virtual formula::ParamClass
GetForceArrayParameter( const formula::FormulaToken
* pToken
, sal_uInt16 nParam
) const override
;
510 /// Access the CharTable flags
511 ScCharFlags
GetCharTableFlags( sal_Unicode c
, sal_Unicode cLast
)
512 { return c
< 128 ? pConv
->getCharTableFlags(c
, cLast
) : ScCharFlags::NONE
; }
514 virtual void HandleIIOpCode(formula::FormulaToken
* token
, formula::FormulaToken
*** pppToken
, sal_uInt8 nNumParams
) override
;
515 bool HandleIIOpCodeInternal(formula::FormulaToken
* token
, formula::FormulaToken
*** pppToken
, sal_uInt8 nNumParams
);
516 bool SkipImplicitIntersectionOptimization(const formula::FormulaToken
* token
) const;
517 virtual void PostProcessCode() override
;
518 virtual void AnnotateOperands() override
;
519 static bool ParameterMayBeImplicitIntersection(const formula::FormulaToken
* token
, int parameter
);
520 void ReplaceDoubleRefII(formula::FormulaToken
** ppDoubleRefTok
);
521 bool AdjustSumRangeShape(const ScComplexRefData
& rBaseRange
, ScComplexRefData
& rSumRange
);
522 void CorrectSumRange(const ScComplexRefData
& rBaseRange
, ScComplexRefData
& rSumRange
, formula::FormulaToken
** ppSumRangeToken
);
523 void AnnotateTrimOnDoubleRefs();
528 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */