1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_FORMULA_FORMULACOMPILER_HXX
21 #define INCLUDED_FORMULA_FORMULACOMPILER_HXX
24 #include <unordered_map>
27 #include <com/sun/star/uno/Sequence.hxx>
28 #include <formula/formuladllapi.h>
29 #include <formula/grammar.hxx>
30 #include <formula/opcode.hxx>
31 #include <formula/tokenarray.hxx>
32 #include <formula/types.hxx>
33 #include <formula/paramclass.hxx>
34 #include <rtl/ustrbuf.hxx>
35 #include <rtl/ustring.hxx>
36 #include <sal/types.h>
37 #include <tools/debug.hxx>
39 #define FORMULA_MAXJUMPCOUNT 32 /* maximum number of jumps (ocChoose) */
40 #define FORMULA_MAXTOKENS 8192 /* maximum number of tokens in formula */
41 #define FORMULA_MAXPARAMS 255 /* maximum number of parameters per function (byte) */
42 #define FORMULA_MAXPARAMSII 8 /* maximum number of parameters for functions that have implicit intersection ranges */
45 namespace com
{ namespace sun
{ namespace star
{
47 struct FormulaOpCodeMapEntry
;
53 enum class FormulaError
: sal_uInt16
;
54 enum class SvNumFormatType
: sal_Int16
;
59 struct FormulaArrayStack
61 FormulaArrayStack
* pNext
;
62 FormulaTokenArray
* pArr
;
64 FormulaTokenRef mpLastToken
;
68 typedef std::unordered_map
< OUString
, OpCode
> OpCodeHashMap
;
69 typedef std::unordered_map
< OUString
, OUString
> ExternalHashMap
;
71 class FORMULA_DLLPUBLIC FormulaCompiler
74 FormulaCompiler(const FormulaCompiler
&) = delete;
75 FormulaCompiler
& operator=(const FormulaCompiler
&) = delete;
77 FormulaCompiler(bool bComputeII
= false, bool bMatrixFlag
= false);
78 FormulaCompiler(FormulaTokenArray
& _rArr
, bool bComputeII
= false, bool bMatrixFlag
= false);
79 virtual ~FormulaCompiler();
81 /** Mappings from strings to OpCodes and vice versa. */
82 class FORMULA_DLLPUBLIC OpCodeMap final
84 OpCodeHashMap maHashMap
; /// Hash map of symbols, OUString -> OpCode
85 std::unique_ptr
<OUString
[]> mpTable
; /// Array of symbols, OpCode -> OUString, offset==OpCode
86 ExternalHashMap maExternalHashMap
; /// Hash map of ocExternal, Filter String -> AddIn String
87 ExternalHashMap maReverseExternalHashMap
; /// Hash map of ocExternal, AddIn String -> Filter String
88 FormulaGrammar::Grammar meGrammar
; /// Grammar, language and reference convention
89 sal_uInt16 mnSymbols
; /// Count of OpCode symbols
90 bool mbCore
: 1; /// If mapping was setup by core, not filters
91 bool mbEnglish
: 1; /// If English symbols and external names
93 OpCodeMap( const OpCodeMap
& ) = delete;
94 OpCodeMap
& operator=( const OpCodeMap
& ) = delete;
98 OpCodeMap(sal_uInt16 nSymbols
, bool bCore
, FormulaGrammar::Grammar eGrammar
) :
100 mpTable( new OUString
[ nSymbols
]),
101 meGrammar( eGrammar
),
102 mnSymbols( nSymbols
),
104 mbEnglish ( FormulaGrammar::isEnglish(eGrammar
) )
108 /** Copy mappings from r into this map, effectively replacing this map.
110 Override known legacy bad function names with
111 correct ones if the conditions can be derived from the
114 void copyFrom( const OpCodeMap
& r
);
116 /// Get the symbol String -> OpCode hash map for finds.
117 const OpCodeHashMap
& getHashMap() const { return maHashMap
; }
119 /// Get the symbol String -> AddIn String hash map for finds.
120 const ExternalHashMap
& getExternalHashMap() const { return maExternalHashMap
; }
122 /// Get the AddIn String -> symbol String hash map for finds.
123 const ExternalHashMap
& getReverseExternalHashMap() const { return maReverseExternalHashMap
; }
125 /// Get the symbol string matching an OpCode.
126 const OUString
& getSymbol( const OpCode eOp
) const
128 DBG_ASSERT( sal_uInt16(eOp
) < mnSymbols
, "OpCodeMap::getSymbol: OpCode out of range");
129 if (sal_uInt16(eOp
) < mnSymbols
)
130 return mpTable
[ eOp
];
131 static OUString s_sEmpty
;
135 /// Get the first character of the symbol string matching an OpCode.
136 sal_Unicode
getSymbolChar( const OpCode eOp
) const { return getSymbol(eOp
)[0]; };
139 FormulaGrammar::Grammar
getGrammar() const { return meGrammar
; }
141 /// Get the symbol count.
142 sal_uInt16
getSymbolCount() const { return mnSymbols
; }
144 /** Are these English symbols, as opposed to native language (which may
145 be English as well)? */
146 bool isEnglish() const { return mbEnglish
; }
148 /// Is it an ODF 1.1 compatibility mapping?
149 bool isPODF() const { return FormulaGrammar::isPODF( meGrammar
); }
151 /* TODO: add isAPI() once a FormulaLanguage was added. */
153 /// Is it an ODFF / ODF 1.2 mapping?
154 bool isODFF() const { return FormulaGrammar::isODFF( meGrammar
); }
156 /// Is it an OOXML mapping?
157 bool isOOXML() const { return FormulaGrammar::isOOXML( meGrammar
); }
159 /// Does it have external symbol/name mappings?
160 bool hasExternals() const { return !maExternalHashMap
.empty(); }
162 /// Put entry of symbol String and OpCode pair.
163 void putOpCode( const OUString
& rStr
, const OpCode eOp
, const CharClass
* pCharClass
);
165 /// Put entry of symbol String and AddIn international String pair.
166 void putExternal( const OUString
& rSymbol
, const OUString
& rAddIn
);
168 /** Put entry of symbol String and AddIn international String pair,
169 failing silently if rAddIn name already exists. */
170 void putExternalSoftly( const OUString
& rSymbol
, const OUString
& rAddIn
);
172 /// Core implementation of XFormulaOpCodeMapper::getMappings()
173 css::uno::Sequence
< css::sheet::FormulaToken
>
174 createSequenceOfFormulaTokens(const FormulaCompiler
& _rCompiler
,
175 const css::uno::Sequence
< OUString
>& rNames
) const;
177 /// Core implementation of XFormulaOpCodeMapper::getAvailableMappings()
178 css::uno::Sequence
< css::sheet::FormulaOpCodeMapEntry
>
179 createSequenceOfAvailableMappings( const FormulaCompiler
& _rCompiler
,const sal_Int32 nGroup
) const;
181 /** The value used in createSequenceOfAvailableMappings() and thus in
182 XFormulaOpCodeMapper::getMappings() for an unknown symbol. */
183 static sal_Int32
getOpCodeUnknown() { return -1; }
187 /** Conditionally put a mapping in copyFrom() context.
189 Does NOT check eOp range!
191 void putCopyOpCode( const OUString
& rSymbol
, OpCode eOp
);
195 typedef std::shared_ptr
< const OpCodeMap
> OpCodeMapPtr
;
196 typedef std::shared_ptr
< OpCodeMap
> NonConstOpCodeMapPtr
;
198 /** Get OpCodeMap for formula language.
200 One of css::sheet::FormulaLanguage constants.
201 @return Map for nLanguage. If nLanguage is unknown, a NULL map is returned.
203 OpCodeMapPtr
GetOpCodeMap( const sal_Int32 nLanguage
) const;
205 /** Create an internal symbol map from API mapping.
207 Use English number parser / formatter instead of native.
209 static OpCodeMapPtr
CreateOpCodeMap(
210 const css::uno::Sequence
< const css::sheet::FormulaOpCodeMapEntry
> & rMapping
,
213 /** Get current OpCodeMap in effect. */
214 const OpCodeMapPtr
& GetCurrentOpCodeMap() const { return mxSymbols
; }
216 /** Get OpCode for English symbol.
217 Used in XFunctionAccess to create token array.
219 Symbol to lookup. MUST be upper case.
221 OpCode
GetEnglishOpCode( const OUString
& rName
) const;
223 FormulaError
GetErrorConstant( const OUString
& rName
) const;
224 void AppendErrorConstant( OUStringBuffer
& rBuffer
, FormulaError nError
) const;
226 void EnableJumpCommandReorder( bool bEnable
);
227 void EnableStopOnError( bool bEnable
);
229 static bool IsOpCodeVolatile( OpCode eOp
);
230 static bool IsOpCodeJumpCommand( OpCode eOp
);
232 static bool DeQuote( OUString
& rStr
);
235 static const OUString
& GetNativeSymbol( OpCode eOp
);
236 static sal_Unicode
GetNativeSymbolChar( OpCode eOp
);
237 static bool IsMatrixFunction(OpCode _eOpCode
); // if a function _always_ returns a Matrix
239 SvNumFormatType
GetNumFormatType() const { return nNumFmt
; }
240 bool CompileTokenArray();
242 void CreateStringFromTokenArray( OUString
& rFormula
);
243 void CreateStringFromTokenArray( OUStringBuffer
& rBuffer
);
244 const FormulaToken
* CreateStringFromToken( OUString
& rFormula
, const FormulaToken
* pToken
);
245 const FormulaToken
* CreateStringFromToken( OUStringBuffer
& rBuffer
, const FormulaToken
* pToken
,
246 bool bAllowArrAdvance
= false );
248 void AppendBoolean( OUStringBuffer
& rBuffer
, bool bVal
) const;
249 void AppendDouble( OUStringBuffer
& rBuffer
, double fVal
) const;
250 static void AppendString( OUStringBuffer
& rBuffer
, const OUString
& rStr
);
252 /** Set symbol map corresponding to one of predefined formula::FormulaGrammar::Grammar,
253 including an address reference convention. */
254 FormulaGrammar::Grammar
GetGrammar() const { return meGrammar
; }
256 /** Whether current symbol set and grammar need transformation of Table
257 structured references to A1 style references when writing / exporting
260 bool NeedsTableRefTransformation() const;
262 /** If a parameter nParam (0-based) is to be forced to array for OpCode
263 eOp, i.e. classified as ParamClass::ForceArray or
264 ParamClass::ReferenceOrForceArray type. */
265 virtual formula::ParamClass
GetForceArrayParameter( const FormulaToken
* pToken
, sal_uInt16 nParam
) const;
267 static void UpdateSeparatorsNative( const OUString
& rSep
, const OUString
& rArrayColSep
, const OUString
& rArrayRowSep
);
268 static void ResetNativeSymbols();
269 static void SetNativeSymbols( const OpCodeMapPtr
& xMap
);
271 /** Sets the implicit intersection compute flag */
272 void SetComputeIIFlag(bool bSet
) { mbComputeII
= bSet
; }
274 /** Sets the matrix flag for the formula*/
275 void SetMatrixFlag(bool bSet
) { mbMatrixFlag
= bSet
; }
277 /** Separators mapped when loading opcodes from the resource, values other
278 than RESOURCE_BASE may override the resource strings. Used by OpCodeList
279 implementation via loadSymbols().
281 enum class SeparatorType
288 virtual OUString
FindAddInFunction( const OUString
& rUpperName
, bool bLocalFirst
) const;
289 virtual void fillFromAddInCollectionUpperName( const NonConstOpCodeMapPtr
& xMap
) const;
290 virtual void fillFromAddInMap( const NonConstOpCodeMapPtr
& xMap
, FormulaGrammar::Grammar _eGrammar
) const;
291 virtual void fillFromAddInCollectionEnglishName( const NonConstOpCodeMapPtr
& xMap
) const;
292 virtual void fillAddInToken(::std::vector
< css::sheet::FormulaOpCodeMapEntry
>& _rVec
, bool _bIsEnglish
) const;
294 virtual void SetError(FormulaError nError
);
295 virtual FormulaTokenRef
ExtendRangeReference( FormulaToken
& rTok1
, FormulaToken
& rTok2
);
296 virtual bool HandleExternalReference(const FormulaToken
& _aToken
);
297 virtual bool HandleRange();
298 virtual bool HandleColRowName();
299 virtual bool HandleDbData();
300 virtual bool HandleTableRef();
302 virtual void CreateStringFromExternal( OUStringBuffer
& rBuffer
, const FormulaToken
* pToken
) const;
303 virtual void CreateStringFromSingleRef( OUStringBuffer
& rBuffer
, const FormulaToken
* pToken
) const;
304 virtual void CreateStringFromDoubleRef( OUStringBuffer
& rBuffer
, const FormulaToken
* pToken
) const;
305 virtual void CreateStringFromMatrix( OUStringBuffer
& rBuffer
, const FormulaToken
* pToken
) const;
306 virtual void CreateStringFromIndex( OUStringBuffer
& rBuffer
, const FormulaToken
* pToken
) const;
307 virtual void LocalizeString( OUString
& rName
) const; // modify rName - input: exact name
311 void PutCode( FormulaTokenRef
& );
315 void IntersectionLine();
324 void PopTokenArray();
325 void PushTokenArray( FormulaTokenArray
*, bool );
327 bool MergeRangeReference( FormulaToken
* * const pCode1
, FormulaToken
* const * const pCode2
);
329 // Returns whether the opcode has implicit intersection ranges as parameters.
330 // Called for (most) opcodes to possibly handle implicit intersection for the parameters.
331 virtual void HandleIIOpCode(FormulaToken
* /*token*/,
332 FormulaToken
*** /*pppToken*/, sal_uInt8
/*nNumParams*/) {}
334 // Called from CompileTokenArray() after RPN code generation is done.
335 virtual void PostProcessCode() {}
337 OUString aCorrectedFormula
; // autocorrected Formula
338 OUString aCorrectedSymbol
; // autocorrected Symbol
340 OpCodeMapPtr mxSymbols
; // which symbols are used
342 FormulaTokenRef mpToken
; // current token
343 FormulaTokenRef pCurrentFactorToken
; // current factor token (of Factor() method)
344 sal_uInt16 nCurrentFactorParam
; // current factor token's parameter, 1-based
345 FormulaTokenArray
* pArr
;
346 FormulaTokenArrayPlainIterator maArrIterator
;
347 FormulaTokenRef mpLastToken
; // last token
349 FormulaToken
** pCode
;
350 FormulaArrayStack
* pStack
;
353 short nRecursion
; // GetToken() recursions
354 SvNumFormatType nNumFmt
; // set during CompileTokenArray()
355 sal_uInt16 pc
; // program counter
357 FormulaGrammar::Grammar meGrammar
; // The grammar used, language plus convention.
359 bool bAutoCorrect
; // whether to apply AutoCorrection
360 bool bCorrected
; // AutoCorrection was applied
361 bool glSubTotal
; // if code contains one or more subtotal functions
362 bool needsRPNTokenCheck
; // whether to make FormulaTokenArray check all tokens at the end
364 bool mbJumpCommandReorder
; /// Whether or not to reorder RPN for jump commands.
365 bool mbStopOnError
; /// Whether to stop compilation on first encountered error.
367 bool mbComputeII
; // whether to attempt computing implicit intersection ranges while building the RPN array.
368 bool mbMatrixFlag
; // whether the formula is a matrix formula (needed for II computation)
371 void InitSymbolsNative() const; /// only SymbolsNative, on first document creation
372 void InitSymbolsEnglish() const; /// only SymbolsEnglish, maybe later
373 void InitSymbolsPODF() const; /// only SymbolsPODF, on demand
374 void InitSymbolsAPI() const; /// only SymbolsAPI, on demand
375 void InitSymbolsODFF() const; /// only SymbolsODFF, on demand
376 void InitSymbolsEnglishXL() const; /// only SymbolsEnglishXL, on demand
377 void InitSymbolsOOXML() const; /// only SymbolsOOXML, on demand
379 void loadSymbols(const std::pair
<const char*, int>* pSymbols
, FormulaGrammar::Grammar eGrammar
, NonConstOpCodeMapPtr
& rxMap
,
380 SeparatorType eSepType
= SeparatorType::SEMICOLON_BASE
) const;
382 /** Check pCurrentFactorToken for nParam's (0-based) ForceArray types and
383 set ForceArray at rCurr if so. Set nParam+1 as 1-based
384 nCurrentFactorParam for subsequent ForceArrayOperator() calls.
386 void CheckSetForceArrayParameter( FormulaTokenRef
const & rCurr
, sal_uInt8 nParam
);
388 void ForceArrayOperator( FormulaTokenRef
const & rCurr
);
392 FormulaTokenRef pPrevFac
;
393 sal_uInt16 nPrevParam
;
394 FormulaCompiler
* pCompiler
;
395 CurrentFactor( const CurrentFactor
& ) = delete;
396 CurrentFactor
& operator=( const CurrentFactor
& ) = delete;
398 explicit CurrentFactor( FormulaCompiler
* pComp
)
399 : pPrevFac( pComp
->pCurrentFactorToken
)
400 , nPrevParam( pComp
->nCurrentFactorParam
)
405 pCompiler
->pCurrentFactorToken
= pPrevFac
;
406 pCompiler
->nCurrentFactorParam
= nPrevParam
;
408 // yes, this operator= may modify the RValue
409 void operator=( FormulaTokenRef
const & r
)
411 pCompiler
->ForceArrayOperator( r
);
412 pCompiler
->pCurrentFactorToken
= r
;
413 pCompiler
->nCurrentFactorParam
= 0;
415 void operator=( FormulaToken
* p
)
417 FormulaTokenRef
xTemp( p
);
420 operator FormulaTokenRef
&()
421 { return pCompiler
->pCurrentFactorToken
; }
422 FormulaToken
* operator->()
423 { return pCompiler
->pCurrentFactorToken
.operator->(); }
424 operator FormulaToken
*()
425 { return operator->(); }
429 mutable NonConstOpCodeMapPtr mxSymbolsODFF
; // ODFF symbols
430 mutable NonConstOpCodeMapPtr mxSymbolsPODF
; // ODF 1.1 symbols
431 mutable NonConstOpCodeMapPtr mxSymbolsAPI
; // XFunctionAccess API symbols
432 mutable NonConstOpCodeMapPtr mxSymbolsNative
; // native symbols
433 mutable NonConstOpCodeMapPtr mxSymbolsEnglish
; // English symbols
434 mutable NonConstOpCodeMapPtr mxSymbolsEnglishXL
; // English Excel symbols (for VBA formula parsing)
435 mutable NonConstOpCodeMapPtr mxSymbolsOOXML
; // Excel OOXML symbols
437 static FormulaTokenArray smDummyTokenArray
;
443 #endif // INCLUDED_FORMULA_FORMULACOMPILER_HXX
446 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */