1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_SC_INC_COMPILER_HXX
21 #define INCLUDED_SC_INC_COMPILER_HXX
27 #include "refdata.hxx"
29 #include <formula/token.hxx>
30 #include <formula/grammar.hxx>
31 #include <rtl/ustrbuf.hxx>
32 #include <com/sun/star/sheet/ExternalLinkInfo.hpp>
33 #include <com/sun/star/i18n/ParseResult.hpp>
38 #include <com/sun/star/uno/Sequence.hxx>
39 #include <o3tl/typed_flags_set.hxx>
41 #include <formula/FormulaCompiler.hxx>
43 // constants and data types also for external modules (ScInterpreter et al)
45 #define MAXSTRLEN 1024 /* maximum length of input string of one symbol */
47 // flag values of CharTable
48 enum class ScCharFlags
: sal_uInt32
{
52 CharBool
= 0x00000002,
53 CharWord
= 0x00000004,
54 CharValue
= 0x00000008,
55 CharString
= 0x00000010,
56 CharDontCare
= 0x00000020,
61 ValueSep
= 0x00000400,
62 ValueExp
= 0x00000800,
63 ValueSign
= 0x00001000,
64 ValueValue
= 0x00002000,
65 StringSep
= 0x00004000,
66 NameSep
= 0x00008000, // there can be only one! '\''
67 CharIdent
= 0x00010000, // identifier (built-in function) or reference start
68 Ident
= 0x00020000, // identifier or reference continuation
69 OdfLBracket
= 0x00040000, // ODF '[' reference bracket
70 OdfRBracket
= 0x00080000, // ODF ']' reference bracket
71 OdfLabelOp
= 0x00100000, // ODF '!!' automatic intersection of labels
72 OdfNameMarker
= 0x00200000, // ODF '$$' marker that starts a defined (range) name
73 CharName
= 0x00400000, // start character of a defined name
74 Name
= 0x00800000, // continuation character of a defined name
75 CharErrConst
= 0x01000000, // start character of an error constant ('#')
78 template<> struct typed_flags
<ScCharFlags
> : is_typed_flags
<ScCharFlags
, 0x01ffffff> {};
81 #define SC_COMPILER_FILE_TAB_SEP '#' // 'Doc'#Tab
87 struct ScInterpreterContext
;
92 class CompileFormulaContext
;
96 // constants and data types internal to compiler
98 struct ScRawToken final
100 friend class ScCompiler
;
101 // Friends that use a temporary ScRawToken on the stack (and therefore need
102 // the private dtor) and know what they're doing...
103 friend class ScTokenArray
;
105 formula::StackVar eType
; // type of data; this determines how the unions are used
111 formula::ParamClass eInForceArray
;
113 ScComplexRefData aRef
;
116 ScComplexRefData aRef
;
127 ScTableRefToken::Item
const eItem
;
131 rtl_uString
* mpDataIgnoreCase
;
133 ScMatrix
* const pMat
;
135 short nJump
[ FORMULA_MAXJUMPCOUNT
+ 1 ]; // If/Chose token
137 OUString maExternalName
; // depending on the opcode, this is either the external, or the external name, or the external table name
139 // coverity[uninit_member] - members deliberately not initialized
142 ~ScRawToken() {} //! only delete via Delete()
144 formula::StackVar
GetType() const { return eType
; }
145 OpCode
GetOpCode() const { return eOp
; }
146 void NewOpCode( OpCode e
) { eOp
= e
; }
148 // Use these methods only on tokens that are not part of a token array,
149 // since the reference count is cleared!
150 void SetOpCode( OpCode eCode
);
151 void SetString( rtl_uString
* pData
, rtl_uString
* pDataIgoreCase
);
152 void SetSingleReference( const ScSingleRefData
& rRef
);
153 void SetDoubleReference( const ScComplexRefData
& rRef
);
154 void SetDouble( double fVal
);
155 void SetErrorConstant( FormulaError nErr
);
157 // These methods are ok to use, reference count not cleared.
158 void SetName(sal_Int16 nSheet
, sal_uInt16 nIndex
);
159 void SetExternalSingleRef( sal_uInt16 nFileId
, const OUString
& rTabName
, const ScSingleRefData
& rRef
);
160 void SetExternalDoubleRef( sal_uInt16 nFileId
, const OUString
& rTabName
, const ScComplexRefData
& rRef
);
161 void SetExternalName( sal_uInt16 nFileId
, const OUString
& rName
);
162 void SetExternal(const OUString
& rStr
);
164 /** If the token is a non-external reference, determine if the reference is
165 valid. If the token is an external reference, return true. Else return
166 false. Used only in ScCompiler::NextNewToken() to preserve non-existing
167 sheet names in otherwise valid references.
169 bool IsValidReference(const ScDocument
* pDoc
) const;
171 formula::FormulaToken
* CreateToken() const; // create typified token
174 class SC_DLLPUBLIC ScCompiler
: public formula::FormulaCompiler
178 enum ExtendedErrorDetection
180 EXTENDED_ERROR_DETECTION_NONE
= 0, // no error on unknown symbols, default (interpreter handles it)
181 EXTENDED_ERROR_DETECTION_NAME_BREAK
, // name error on unknown symbols and break, pCode incomplete
182 EXTENDED_ERROR_DETECTION_NAME_NO_BREAK
// name error on unknown symbols, don't break, continue
187 const formula::FormulaGrammar::AddressConvention meConv
;
189 Convention( formula::FormulaGrammar::AddressConvention eConvP
);
190 virtual ~Convention();
192 virtual void makeRefStr(
193 const ScDocument
* pDoc
,
194 OUStringBuffer
& rBuffer
,
195 formula::FormulaGrammar::Grammar eGram
,
196 const ScAddress
& rPos
,
197 const OUString
& rErrRef
, const std::vector
<OUString
>& rTabNames
,
198 const ScComplexRefData
& rRef
, bool bSingleRef
, bool bFromRangeName
) const = 0;
200 virtual css::i18n::ParseResult
201 parseAnyToken( const OUString
& rFormula
,
203 const CharClass
* pCharClass
,
204 bool bGroupSeparator
) const = 0;
207 * Parse the symbol string and pick up the file name and the external
210 * @return true on successful parse, or false otherwise.
212 virtual bool parseExternalName( const OUString
& rSymbol
, OUString
& rFile
, OUString
& rName
,
213 const ScDocument
* pDoc
,
214 const css::uno::Sequence
< css::sheet::ExternalLinkInfo
>* pExternalLinks
) const = 0;
216 virtual OUString
makeExternalNameStr( sal_uInt16 nFileId
, const OUString
& rFile
,
217 const OUString
& rName
) const = 0;
219 virtual void makeExternalRefStr(
220 OUStringBuffer
& rBuffer
, const ScAddress
& rPos
, sal_uInt16 nFileId
, const OUString
& rFileName
,
221 const OUString
& rTabName
, const ScSingleRefData
& rRef
) const = 0;
223 virtual void makeExternalRefStr(
224 const ScDocument
* pDoc
,
225 OUStringBuffer
& rBuffer
, const ScAddress
& rPos
,
226 sal_uInt16 nFileId
, const OUString
& rFileName
, const std::vector
<OUString
>& rTabNames
,
227 const OUString
& rTabName
, const ScComplexRefData
& rRef
) const = 0;
229 enum SpecialSymbolType
232 * Character between sheet name and address. In OOO A1 this is
233 * '.', while XL A1 and XL R1C1 this is '!'.
238 * In OOO A1, a sheet name may be prefixed with '$' to indicate an
239 * absolute sheet position.
243 virtual sal_Unicode
getSpecialSymbol( SpecialSymbolType eSymType
) const = 0;
245 virtual ScCharFlags
getCharTableFlags( sal_Unicode c
, sal_Unicode cLast
) const = 0;
248 std::unique_ptr
<ScCharFlags
[]> mpCharTable
;
250 friend struct Convention
;
254 static const CharClass
*pCharClassEnglish
; // character classification for en_US locale
255 static const CharClass
*pCharClassLocalized
; // character classification for UI locale
256 static const Convention
*pConventions
[ formula::FormulaGrammar::CONV_LAST
];
258 static const struct AddInMap
261 const char* pEnglish
;
262 const char* pOriginal
; // programmatical name
263 const char* pUpper
; // upper case programmatical name
265 static size_t GetAddInMapCount();
270 SvNumberFormatter
* mpFormatter
;
271 const ScInterpreterContext
* mpInterpreterContext
;
273 SCTAB mnCurrentSheetTab
; // indicates current sheet number parsed so far
274 sal_Int32 mnCurrentSheetEndPos
; // position after current sheet name if parsed
276 // For CONV_XL_OOX, may be set via API by MOOXML filter.
277 css::uno::Sequence
<css::sheet::ExternalLinkInfo
> maExternalLinks
;
279 sal_Unicode cSymbol
[MAXSTRLEN
+1]; // current Symbol + 0
280 OUString aFormula
; // formula source code
281 sal_Int32 nSrcPos
; // tokenizer position (source code)
282 mutable ScRawToken maRawToken
;
284 std::queue
<OpCode
> maPendingOpCodes
; // additional opcodes generated from a single symbol
286 const CharClass
* pCharClass
; // which character classification is used for parseAnyToken and upper/lower
287 bool mbCharClassesDiffer
; // whether pCharClass and current system locale's CharClass differ
288 sal_uInt16 mnPredetectedReference
; // reference when reading ODF, 0 (none), 1 (single) or 2 (double)
289 sal_Int32 mnRangeOpPosInSymbol
; // if and where a range operator is in symbol
290 const Convention
*pConv
;
291 ExtendedErrorDetection meExtendedErrorDetection
;
292 bool mbCloseBrackets
; // whether to close open brackets automatically, default TRUE
293 bool mbRewind
; // whether symbol is to be rewound to some step during lexical analysis
294 std::vector
<sal_uInt16
> maExternalFiles
;
296 std::vector
<OUString
> maTabNames
; /// sheet names mangled for the current grammar for output
297 std::vector
<OUString
> &GetSetupTabNames() const; /// get or setup tab names for the current grammar
301 ScTokenRef
const mxToken
;
303 TableRefEntry( formula::FormulaToken
* p
) : mxToken(p
), mnLevel(0) {}
305 std::vector
<TableRefEntry
> maTableRefs
; /// "stack" of currently active ocTableRef tokens
307 // Optimizing implicit intersection is done only at the end of code generation, because the usage context may
308 // be important. Store candidate parameters and the operation they are the argument for.
309 struct PendingImplicitIntersectionOptimization
311 PendingImplicitIntersectionOptimization(formula::FormulaToken
** p
, formula::FormulaToken
* o
)
312 : parameterLocation( p
), parameter( *p
), operation( o
) {}
313 formula::FormulaToken
** const parameterLocation
;
314 formula::FormulaTokenRef
const parameter
;
315 formula::FormulaTokenRef
const operation
;
317 std::vector
< PendingImplicitIntersectionOptimization
> mPendingImplicitIntersectionOptimizations
;
318 std::set
<formula::FormulaTokenRef
> mUnhandledPossibleImplicitIntersections
;
320 std::set
<OpCode
> mUnhandledPossibleImplicitIntersectionsOpCodes
;
323 bool NextNewToken(bool bInArray
);
324 bool ToUpperAsciiOrI18nIsAscii( OUString
& rUpper
, const OUString
& rOrg
) const;
326 virtual void SetError(FormulaError nError
) override
;
327 sal_Int32
NextSymbol(bool bInArray
);
328 bool IsValue( const OUString
& );
329 bool IsOpCode( const OUString
&, bool bInArray
);
330 bool IsOpCode2( const OUString
& );
332 bool IsReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
333 bool IsSingleReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
334 bool IsDoubleReference( const OUString
& rSymbol
, const OUString
* pErrRef
= nullptr );
335 bool IsPredetectedReference( const OUString
& rSymbol
);
336 bool IsPredetectedErrRefReference( const OUString
& rName
, const OUString
* pErrRef
);
337 bool IsMacro( const OUString
& );
338 bool IsNamedRange( const OUString
& );
339 bool IsExternalNamedRange( const OUString
& rSymbol
, bool& rbInvalidExternalNameRange
);
340 bool IsDBRange( const OUString
& );
341 bool IsColRowName( const OUString
& );
342 bool IsBoolean( const OUString
& );
343 void AutoCorrectParsedSymbol();
345 void AdjustSheetLocalNameRelReferences( SCTAB nDelta
);
346 void SetRelNameReference();
348 /** Obtain range data for ocName token, global or sheet local.
350 Prerequisite: rToken is a FormulaIndexToken so IsGlobal() and
351 GetIndex() can be called on it. We don't check with RTTI.
353 ScRangeData
* GetRangeData( const formula::FormulaToken
& pToken
) const;
355 static const CharClass
* GetCharClassEnglish();
356 static const CharClass
* GetCharClassLocalized();
359 ScCompiler( sc::CompileFormulaContext
& rCxt
, const ScAddress
& rPos
,
360 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
362 /** If eGrammar == GRAM_UNSPECIFIED then the grammar of pDocument is used,
363 if pDocument==nullptr then GRAM_DEFAULT.
365 ScCompiler( ScDocument
* pDocument
, const ScAddress
&,
366 formula::FormulaGrammar::Grammar eGrammar
= formula::FormulaGrammar::GRAM_UNSPECIFIED
,
367 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
369 ScCompiler( sc::CompileFormulaContext
& rCxt
, const ScAddress
& rPos
, ScTokenArray
& rArr
,
370 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
372 /** If eGrammar == GRAM_UNSPECIFIED then the grammar of pDocument is used,
373 if pDocument==nullptr then GRAM_DEFAULT.
375 ScCompiler( ScDocument
* pDocument
, const ScAddress
&, ScTokenArray
& rArr
,
376 formula::FormulaGrammar::Grammar eGrammar
= formula::FormulaGrammar::GRAM_UNSPECIFIED
,
377 bool bComputeII
= false, bool bMatrixFlag
= false, const ScInterpreterContext
* pContext
= nullptr );
379 virtual ~ScCompiler() override
;
382 static void DeInit(); /// all
384 // for ScAddress::Format()
385 static void CheckTabQuotes( OUString
& aTabName
,
386 const formula::FormulaGrammar::AddressConvention eConv
= formula::FormulaGrammar::CONV_OOO
);
388 /** Analyzes a string for a 'Doc'#Tab construct, or 'Do''c'#Tab etc...
390 @returns the position of the unquoted # hash mark in 'Doc'#Tab, or
392 static sal_Int32
GetDocTabPos( const OUString
& rString
);
394 static bool EnQuote( OUString
& rStr
);
395 sal_Unicode
GetNativeAddressSymbol( Convention::SpecialSymbolType eType
) const;
397 // Check if it is a valid english function name
398 bool IsEnglishSymbol( const OUString
& rName
);
400 bool IsErrorConstant( const OUString
& ) const;
401 bool IsTableRefItem( const OUString
& ) const;
402 bool IsTableRefColumn( const OUString
& ) const;
404 /** Calls GetToken() if PeekNextNoSpaces() is of given OpCode. */
405 bool GetTokenIfOpCode( OpCode eOp
);
408 * When auto correction is set, the jump command reorder must be enabled.
410 void SetAutoCorrection( bool bVal
);
411 void SetCloseBrackets( bool bVal
) { mbCloseBrackets
= bVal
; }
412 void SetRefConvention( const Convention
*pConvP
);
413 void SetRefConvention( const formula::FormulaGrammar::AddressConvention eConv
);
415 static const Convention
* GetRefConvention( formula::FormulaGrammar::AddressConvention eConv
);
417 /// Set symbol map if not empty.
418 void SetFormulaLanguage( const OpCodeMapPtr
& xMap
);
420 void SetGrammar( const formula::FormulaGrammar::Grammar eGrammar
);
422 void SetNumberFormatter( SvNumberFormatter
* pFormatter
);
425 /** Set grammar and reference convention from within SetFormulaLanguage()
429 The new grammar to be set and the associated reference convention.
432 The previous grammar that was active before SetFormulaLanguage().
434 void SetGrammarAndRefConvention(
435 const formula::FormulaGrammar::Grammar eNewGrammar
,
436 const formula::FormulaGrammar::Grammar eOldGrammar
);
439 /// Set external link info for ScAddress::CONV_XL_OOX.
440 void SetExternalLinks(
441 const css::uno::Sequence
<
442 css::sheet::ExternalLinkInfo
>& rLinks
)
444 maExternalLinks
= rLinks
;
447 void CreateStringFromXMLTokenArray( OUString
& rFormula
, OUString
& rFormulaNmsp
);
449 void SetExtendedErrorDetection( ExtendedErrorDetection eVal
) { meExtendedErrorDetection
= eVal
; }
451 bool IsCorrected() const { return bCorrected
; }
452 const OUString
& GetCorrectedFormula() const { return aCorrectedFormula
; }
455 * Tokenize formula expression string into an array of tokens.
457 * @param rFormula formula expression to tokenize.
459 * @return heap allocated token array object. The caller <i>must</i>
460 * manage the life cycle of this object.
462 std::unique_ptr
<ScTokenArray
> CompileString( const OUString
& rFormula
);
463 std::unique_ptr
<ScTokenArray
> CompileString( const OUString
& rFormula
, const OUString
& rFormulaNmsp
);
464 const ScAddress
& GetPos() const { return aPos
; }
467 static void MoveRelWrap( const ScTokenArray
& rArr
, const ScDocument
* pDoc
, const ScAddress
& rPos
,
468 SCCOL nMaxCol
, SCROW nMaxRow
);
470 /** If the character is allowed as tested by nFlags (SC_COMPILER_C_...
471 bits) for all known address conventions. If more than one bit is given
472 in nFlags, all bits must match. */
473 static bool IsCharFlagAllConventions(
474 OUString
const & rStr
, sal_Int32 nPos
, ScCharFlags nFlags
);
476 /** TODO : Move this to somewhere appropriate. */
477 static bool DoubleRefToPosSingleRefScalarCase(const ScRange
& rRange
, ScAddress
& rAdr
,
478 const ScAddress
& rFormulaPos
);
480 bool HasUnhandledPossibleImplicitIntersections() const { return !mUnhandledPossibleImplicitIntersections
.empty(); }
482 const std::set
<OpCode
>& UnhandledPossibleImplicitIntersectionsOpCodes() { return mUnhandledPossibleImplicitIntersectionsOpCodes
; }
487 virtual OUString
FindAddInFunction( const OUString
& rUpperName
, bool bLocalFirst
) const override
;
488 virtual void fillFromAddInCollectionUpperName( const NonConstOpCodeMapPtr
& xMap
) const override
;
489 virtual void fillFromAddInCollectionEnglishName( const NonConstOpCodeMapPtr
& xMap
) const override
;
490 virtual void fillFromAddInMap( const NonConstOpCodeMapPtr
& xMap
, formula::FormulaGrammar::Grammar _eGrammar
) const override
;
491 virtual void fillAddInToken(::std::vector
< css::sheet::FormulaOpCodeMapEntry
>& _rVec
,bool _bIsEnglish
) const override
;
493 virtual bool HandleExternalReference(const formula::FormulaToken
& _aToken
) override
;
494 virtual bool HandleRange() override
;
495 virtual bool HandleColRowName() override
;
496 virtual bool HandleDbData() override
;
497 virtual bool HandleTableRef() override
;
499 virtual formula::FormulaTokenRef
ExtendRangeReference( formula::FormulaToken
& rTok1
, formula::FormulaToken
& rTok2
) override
;
500 virtual void CreateStringFromExternal( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
501 virtual void CreateStringFromSingleRef( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
502 virtual void CreateStringFromDoubleRef( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
503 virtual void CreateStringFromMatrix( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
504 virtual void CreateStringFromIndex( OUStringBuffer
& rBuffer
, const formula::FormulaToken
* pToken
) const override
;
505 virtual void LocalizeString( OUString
& rName
) const override
; // modify rName - input: exact name
507 virtual formula::ParamClass
GetForceArrayParameter( const formula::FormulaToken
* pToken
, sal_uInt16 nParam
) const override
;
509 /// Access the CharTable flags
510 ScCharFlags
GetCharTableFlags( sal_Unicode c
, sal_Unicode cLast
)
511 { return c
< 128 ? pConv
->getCharTableFlags(c
, cLast
) : ScCharFlags::NONE
; }
513 virtual void HandleIIOpCode(formula::FormulaToken
* token
, formula::FormulaToken
*** pppToken
, sal_uInt8 nNumParams
) override
;
514 bool HandleIIOpCodeInternal(formula::FormulaToken
* token
, formula::FormulaToken
*** pppToken
, sal_uInt8 nNumParams
);
515 bool SkipImplicitIntersectionOptimization(const formula::FormulaToken
* token
) const;
516 virtual void PostProcessCode() override
;
517 static bool ParameterMayBeImplicitIntersection(const formula::FormulaToken
* token
, int parameter
);
518 void ReplaceDoubleRefII(formula::FormulaToken
** ppDoubleRefTok
);
519 bool AdjustSumRangeShape(const ScComplexRefData
& rBaseRange
, ScComplexRefData
& rSumRange
);
520 void CorrectSumRange(const ScComplexRefData
& rBaseRange
, ScComplexRefData
& rSumRange
, formula::FormulaToken
** ppSumRangeToken
);
525 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */