Bump version to 6.4-15
[LibreOffice.git] / include / formula / FormulaCompiler.hxx
blob185dd7a04ada023c9ecd1339d855909c0b19b87d
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_FORMULA_FORMULACOMPILER_HXX
21 #define INCLUDED_FORMULA_FORMULACOMPILER_HXX
23 #include <memory>
24 #include <unordered_map>
25 #include <vector>
27 #include <com/sun/star/uno/Sequence.hxx>
28 #include <formula/formuladllapi.h>
29 #include <formula/grammar.hxx>
30 #include <formula/opcode.hxx>
31 #include <formula/tokenarray.hxx>
32 #include <formula/types.hxx>
33 #include <formula/paramclass.hxx>
34 #include <rtl/ustrbuf.hxx>
35 #include <rtl/ustring.hxx>
36 #include <sal/types.h>
37 #include <tools/debug.hxx>
39 #define FORMULA_MAXJUMPCOUNT 32 /* maximum number of jumps (ocChoose) */
40 #define FORMULA_MAXTOKENS 8192 /* maximum number of tokens in formula */
41 #define FORMULA_MAXPARAMS 255 /* maximum number of parameters per function (byte) */
42 #define FORMULA_MAXPARAMSII 8 /* maximum number of parameters for functions that have implicit intersection ranges */
45 namespace com { namespace sun { namespace star {
46 namespace sheet {
47 struct FormulaOpCodeMapEntry;
48 struct FormulaToken;
50 }}}
52 class CharClass;
53 enum class FormulaError : sal_uInt16;
54 enum class SvNumFormatType : sal_Int16;
56 namespace formula
59 struct FormulaArrayStack
61 FormulaArrayStack* pNext;
62 FormulaTokenArray* pArr;
63 sal_uInt16 nIndex;
64 FormulaTokenRef mpLastToken;
65 bool bTemp;
68 typedef std::unordered_map< OUString, OpCode > OpCodeHashMap;
69 typedef std::unordered_map< OUString, OUString > ExternalHashMap;
71 class FORMULA_DLLPUBLIC FormulaCompiler
73 private:
74 FormulaCompiler(const FormulaCompiler&) = delete;
75 FormulaCompiler& operator=(const FormulaCompiler&) = delete;
76 public:
77 FormulaCompiler(bool bComputeII = false, bool bMatrixFlag = false);
78 FormulaCompiler(FormulaTokenArray& _rArr, bool bComputeII = false, bool bMatrixFlag = false);
79 virtual ~FormulaCompiler();
81 /** Mappings from strings to OpCodes and vice versa. */
82 class FORMULA_DLLPUBLIC OpCodeMap final
84 OpCodeHashMap maHashMap; /// Hash map of symbols, OUString -> OpCode
85 std::unique_ptr<OUString[]> mpTable; /// Array of symbols, OpCode -> OUString, offset==OpCode
86 ExternalHashMap maExternalHashMap; /// Hash map of ocExternal, Filter String -> AddIn String
87 ExternalHashMap maReverseExternalHashMap; /// Hash map of ocExternal, AddIn String -> Filter String
88 FormulaGrammar::Grammar meGrammar; /// Grammar, language and reference convention
89 sal_uInt16 mnSymbols; /// Count of OpCode symbols
90 bool mbCore : 1; /// If mapping was setup by core, not filters
91 bool mbEnglish : 1; /// If English symbols and external names
93 OpCodeMap( const OpCodeMap& ) = delete;
94 OpCodeMap& operator=( const OpCodeMap& ) = delete;
96 public:
98 OpCodeMap(sal_uInt16 nSymbols, bool bCore, FormulaGrammar::Grammar eGrammar ) :
99 maHashMap(nSymbols),
100 mpTable( new OUString[ nSymbols ]),
101 meGrammar( eGrammar),
102 mnSymbols( nSymbols),
103 mbCore( bCore),
104 mbEnglish ( FormulaGrammar::isEnglish(eGrammar) )
108 /** Copy mappings from r into this map, effectively replacing this map.
110 Override known legacy bad function names with
111 correct ones if the conditions can be derived from the
112 current maps.
114 void copyFrom( const OpCodeMap& r );
116 /// Get the symbol String -> OpCode hash map for finds.
117 const OpCodeHashMap& getHashMap() const { return maHashMap; }
119 /// Get the symbol String -> AddIn String hash map for finds.
120 const ExternalHashMap& getExternalHashMap() const { return maExternalHashMap; }
122 /// Get the AddIn String -> symbol String hash map for finds.
123 const ExternalHashMap& getReverseExternalHashMap() const { return maReverseExternalHashMap; }
125 /// Get the symbol string matching an OpCode.
126 const OUString& getSymbol( const OpCode eOp ) const
128 DBG_ASSERT( sal_uInt16(eOp) < mnSymbols, "OpCodeMap::getSymbol: OpCode out of range");
129 if (sal_uInt16(eOp) < mnSymbols)
130 return mpTable[ eOp ];
131 static OUString s_sEmpty;
132 return s_sEmpty;
135 /// Get the first character of the symbol string matching an OpCode.
136 sal_Unicode getSymbolChar( const OpCode eOp ) const { return getSymbol(eOp)[0]; };
138 /// Get the grammar.
139 FormulaGrammar::Grammar getGrammar() const { return meGrammar; }
141 /// Get the symbol count.
142 sal_uInt16 getSymbolCount() const { return mnSymbols; }
144 /** Are these English symbols, as opposed to native language (which may
145 be English as well)? */
146 bool isEnglish() const { return mbEnglish; }
148 /// Is it an ODF 1.1 compatibility mapping?
149 bool isPODF() const { return FormulaGrammar::isPODF( meGrammar); }
151 /* TODO: add isAPI() once a FormulaLanguage was added. */
153 /// Is it an ODFF / ODF 1.2 mapping?
154 bool isODFF() const { return FormulaGrammar::isODFF( meGrammar); }
156 /// Is it an OOXML mapping?
157 bool isOOXML() const { return FormulaGrammar::isOOXML( meGrammar); }
159 /// Does it have external symbol/name mappings?
160 bool hasExternals() const { return !maExternalHashMap.empty(); }
162 /// Put entry of symbol String and OpCode pair.
163 void putOpCode( const OUString & rStr, const OpCode eOp, const CharClass* pCharClass );
165 /// Put entry of symbol String and AddIn international String pair.
166 void putExternal( const OUString & rSymbol, const OUString & rAddIn );
168 /** Put entry of symbol String and AddIn international String pair,
169 failing silently if rAddIn name already exists. */
170 void putExternalSoftly( const OUString & rSymbol, const OUString & rAddIn );
172 /// Core implementation of XFormulaOpCodeMapper::getMappings()
173 css::uno::Sequence< css::sheet::FormulaToken >
174 createSequenceOfFormulaTokens(const FormulaCompiler& _rCompiler,
175 const css::uno::Sequence< OUString >& rNames ) const;
177 /// Core implementation of XFormulaOpCodeMapper::getAvailableMappings()
178 css::uno::Sequence< css::sheet::FormulaOpCodeMapEntry >
179 createSequenceOfAvailableMappings( const FormulaCompiler& _rCompiler,const sal_Int32 nGroup ) const;
181 /** The value used in createSequenceOfAvailableMappings() and thus in
182 XFormulaOpCodeMapper::getMappings() for an unknown symbol. */
183 static sal_Int32 getOpCodeUnknown() { return -1; }
185 private:
187 /** Conditionally put a mapping in copyFrom() context.
189 Does NOT check eOp range!
191 void putCopyOpCode( const OUString& rSymbol, OpCode eOp );
194 public:
195 typedef std::shared_ptr< const OpCodeMap > OpCodeMapPtr;
196 typedef std::shared_ptr< OpCodeMap > NonConstOpCodeMapPtr;
198 /** Get OpCodeMap for formula language.
199 @param nLanguage
200 One of css::sheet::FormulaLanguage constants.
201 @return Map for nLanguage. If nLanguage is unknown, a NULL map is returned.
203 OpCodeMapPtr GetOpCodeMap( const sal_Int32 nLanguage ) const;
205 /** Create an internal symbol map from API mapping.
206 @param bEnglish
207 Use English number parser / formatter instead of native.
209 static OpCodeMapPtr CreateOpCodeMap(
210 const css::uno::Sequence< const css::sheet::FormulaOpCodeMapEntry > & rMapping,
211 bool bEnglish );
213 /** Get current OpCodeMap in effect. */
214 const OpCodeMapPtr& GetCurrentOpCodeMap() const { return mxSymbols; }
216 /** Get OpCode for English symbol.
217 Used in XFunctionAccess to create token array.
218 @param rName
219 Symbol to lookup. MUST be upper case.
221 OpCode GetEnglishOpCode( const OUString& rName ) const;
223 FormulaError GetErrorConstant( const OUString& rName ) const;
224 void AppendErrorConstant( OUStringBuffer& rBuffer, FormulaError nError ) const;
226 void EnableJumpCommandReorder( bool bEnable );
227 void EnableStopOnError( bool bEnable );
229 static bool IsOpCodeVolatile( OpCode eOp );
230 static bool IsOpCodeJumpCommand( OpCode eOp );
232 static bool DeQuote( OUString& rStr );
235 static const OUString& GetNativeSymbol( OpCode eOp );
236 static sal_Unicode GetNativeSymbolChar( OpCode eOp );
237 static bool IsMatrixFunction(OpCode _eOpCode); // if a function _always_ returns a Matrix
239 SvNumFormatType GetNumFormatType() const { return nNumFmt; }
240 bool CompileTokenArray();
242 void CreateStringFromTokenArray( OUString& rFormula );
243 void CreateStringFromTokenArray( OUStringBuffer& rBuffer );
244 const FormulaToken* CreateStringFromToken( OUString& rFormula, const FormulaToken* pToken );
245 const FormulaToken* CreateStringFromToken( OUStringBuffer& rBuffer, const FormulaToken* pToken,
246 bool bAllowArrAdvance = false );
248 void AppendBoolean( OUStringBuffer& rBuffer, bool bVal ) const;
249 void AppendDouble( OUStringBuffer& rBuffer, double fVal ) const;
250 static void AppendString( OUStringBuffer& rBuffer, const OUString & rStr );
252 /** Set symbol map corresponding to one of predefined formula::FormulaGrammar::Grammar,
253 including an address reference convention. */
254 FormulaGrammar::Grammar GetGrammar() const { return meGrammar; }
256 /** Whether current symbol set and grammar need transformation of Table
257 structured references to A1 style references when writing / exporting
258 (creating strings).
260 bool NeedsTableRefTransformation() const;
262 /** If a parameter nParam (0-based) is to be forced to array for OpCode
263 eOp, i.e. classified as ParamClass::ForceArray or
264 ParamClass::ReferenceOrForceArray type. */
265 virtual formula::ParamClass GetForceArrayParameter( const FormulaToken* pToken, sal_uInt16 nParam ) const;
267 static void UpdateSeparatorsNative( const OUString& rSep, const OUString& rArrayColSep, const OUString& rArrayRowSep );
268 static void ResetNativeSymbols();
269 static void SetNativeSymbols( const OpCodeMapPtr& xMap );
271 /** Sets the implicit intersection compute flag */
272 void SetComputeIIFlag(bool bSet) { mbComputeII = bSet; }
274 /** Sets the matrix flag for the formula*/
275 void SetMatrixFlag(bool bSet) { mbMatrixFlag = bSet; }
277 /** Separators mapped when loading opcodes from the resource, values other
278 than RESOURCE_BASE may override the resource strings. Used by OpCodeList
279 implementation via loadSymbols().
281 enum class SeparatorType
283 RESOURCE_BASE,
284 SEMICOLON_BASE
287 protected:
288 virtual OUString FindAddInFunction( const OUString& rUpperName, bool bLocalFirst ) const;
289 virtual void fillFromAddInCollectionUpperName( const NonConstOpCodeMapPtr& xMap ) const;
290 virtual void fillFromAddInMap( const NonConstOpCodeMapPtr& xMap, FormulaGrammar::Grammar _eGrammar ) const;
291 virtual void fillFromAddInCollectionEnglishName( const NonConstOpCodeMapPtr& xMap ) const;
292 virtual void fillAddInToken(::std::vector< css::sheet::FormulaOpCodeMapEntry >& _rVec, bool _bIsEnglish) const;
294 virtual void SetError(FormulaError nError);
295 virtual FormulaTokenRef ExtendRangeReference( FormulaToken & rTok1, FormulaToken & rTok2 );
296 virtual bool HandleExternalReference(const FormulaToken& _aToken);
297 virtual bool HandleRange();
298 virtual bool HandleColRowName();
299 virtual bool HandleDbData();
300 virtual bool HandleTableRef();
302 virtual void CreateStringFromExternal( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
303 virtual void CreateStringFromSingleRef( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
304 virtual void CreateStringFromDoubleRef( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
305 virtual void CreateStringFromMatrix( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
306 virtual void CreateStringFromIndex( OUStringBuffer& rBuffer, const FormulaToken* pToken ) const;
307 virtual void LocalizeString( OUString& rName ) const; // modify rName - input: exact name
309 bool GetToken();
310 OpCode NextToken();
311 void PutCode( FormulaTokenRef& );
312 void Factor();
313 void RangeLine();
314 void UnionLine();
315 void IntersectionLine();
316 void UnaryLine();
317 void PostOpLine();
318 void PowLine();
319 void MulDivLine();
320 void AddSubLine();
321 void ConcatLine();
322 void CompareLine();
323 OpCode Expression();
324 void PopTokenArray();
325 void PushTokenArray( FormulaTokenArray*, bool );
327 bool MergeRangeReference( FormulaToken * * const pCode1, FormulaToken * const * const pCode2 );
329 // Returns whether the opcode has implicit intersection ranges as parameters.
330 // Called for (most) opcodes to possibly handle implicit intersection for the parameters.
331 virtual void HandleIIOpCode(FormulaToken* /*token*/,
332 FormulaToken*** /*pppToken*/, sal_uInt8 /*nNumParams*/) {}
334 // Called from CompileTokenArray() after RPN code generation is done.
335 virtual void PostProcessCode() {}
337 OUString aCorrectedFormula; // autocorrected Formula
338 OUString aCorrectedSymbol; // autocorrected Symbol
340 OpCodeMapPtr mxSymbols; // which symbols are used
342 FormulaTokenRef mpToken; // current token
343 FormulaTokenRef pCurrentFactorToken; // current factor token (of Factor() method)
344 sal_uInt16 nCurrentFactorParam; // current factor token's parameter, 1-based
345 FormulaTokenArray* pArr;
346 FormulaTokenArrayPlainIterator maArrIterator;
347 FormulaTokenRef mpLastToken; // last token
349 FormulaToken** pCode;
350 FormulaArrayStack* pStack;
352 OpCode eLastOp;
353 short nRecursion; // GetToken() recursions
354 SvNumFormatType nNumFmt; // set during CompileTokenArray()
355 sal_uInt16 pc; // program counter
357 FormulaGrammar::Grammar meGrammar; // The grammar used, language plus convention.
359 bool bAutoCorrect; // whether to apply AutoCorrection
360 bool bCorrected; // AutoCorrection was applied
361 bool glSubTotal; // if code contains one or more subtotal functions
362 bool needsRPNTokenCheck; // whether to make FormulaTokenArray check all tokens at the end
364 bool mbJumpCommandReorder; /// Whether or not to reorder RPN for jump commands.
365 bool mbStopOnError; /// Whether to stop compilation on first encountered error.
367 bool mbComputeII; // whether to attempt computing implicit intersection ranges while building the RPN array.
368 bool mbMatrixFlag; // whether the formula is a matrix formula (needed for II computation)
370 private:
371 void InitSymbolsNative() const; /// only SymbolsNative, on first document creation
372 void InitSymbolsEnglish() const; /// only SymbolsEnglish, maybe later
373 void InitSymbolsPODF() const; /// only SymbolsPODF, on demand
374 void InitSymbolsAPI() const; /// only SymbolsAPI, on demand
375 void InitSymbolsODFF() const; /// only SymbolsODFF, on demand
376 void InitSymbolsEnglishXL() const; /// only SymbolsEnglishXL, on demand
377 void InitSymbolsOOXML() const; /// only SymbolsOOXML, on demand
379 void loadSymbols(const std::pair<const char*, int>* pSymbols, FormulaGrammar::Grammar eGrammar, NonConstOpCodeMapPtr& rxMap,
380 SeparatorType eSepType = SeparatorType::SEMICOLON_BASE) const;
382 /** Check pCurrentFactorToken for nParam's (0-based) ForceArray types and
383 set ForceArray at rCurr if so. Set nParam+1 as 1-based
384 nCurrentFactorParam for subsequent ForceArrayOperator() calls.
386 void CheckSetForceArrayParameter( FormulaTokenRef const & rCurr, sal_uInt8 nParam );
388 void ForceArrayOperator( FormulaTokenRef const & rCurr );
390 class CurrentFactor
392 FormulaTokenRef pPrevFac;
393 sal_uInt16 nPrevParam;
394 FormulaCompiler* pCompiler;
395 CurrentFactor( const CurrentFactor& ) = delete;
396 CurrentFactor& operator=( const CurrentFactor& ) = delete;
397 public:
398 explicit CurrentFactor( FormulaCompiler* pComp )
399 : pPrevFac( pComp->pCurrentFactorToken )
400 , nPrevParam( pComp->nCurrentFactorParam )
401 , pCompiler( pComp )
403 ~CurrentFactor()
405 pCompiler->pCurrentFactorToken = pPrevFac;
406 pCompiler->nCurrentFactorParam = nPrevParam;
408 // yes, this operator= may modify the RValue
409 void operator=( FormulaTokenRef const & r )
411 pCompiler->ForceArrayOperator( r );
412 pCompiler->pCurrentFactorToken = r;
413 pCompiler->nCurrentFactorParam = 0;
415 void operator=( FormulaToken* p )
417 FormulaTokenRef xTemp( p );
418 *this = xTemp;
420 operator FormulaTokenRef&()
421 { return pCompiler->pCurrentFactorToken; }
422 FormulaToken* operator->()
423 { return pCompiler->pCurrentFactorToken.operator->(); }
424 operator FormulaToken*()
425 { return operator->(); }
429 mutable NonConstOpCodeMapPtr mxSymbolsODFF; // ODFF symbols
430 mutable NonConstOpCodeMapPtr mxSymbolsPODF; // ODF 1.1 symbols
431 mutable NonConstOpCodeMapPtr mxSymbolsAPI; // XFunctionAccess API symbols
432 mutable NonConstOpCodeMapPtr mxSymbolsNative; // native symbols
433 mutable NonConstOpCodeMapPtr mxSymbolsEnglish; // English symbols
434 mutable NonConstOpCodeMapPtr mxSymbolsEnglishXL; // English Excel symbols (for VBA formula parsing)
435 mutable NonConstOpCodeMapPtr mxSymbolsOOXML; // Excel OOXML symbols
437 static FormulaTokenArray smDummyTokenArray;
440 } // formula
443 #endif // INCLUDED_FORMULA_FORMULACOMPILER_HXX
446 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */