1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_FORMULA_FORMULACOMPILER_HXX
21 #define INCLUDED_FORMULA_FORMULACOMPILER_HXX
24 #include <unordered_map>
27 #include <com/sun/star/uno/Sequence.hxx>
28 #include <formula/formuladllapi.h>
29 #include <formula/grammar.hxx>
30 #include <formula/opcode.hxx>
31 #include <formula/token.hxx>
32 #include <formula/types.hxx>
33 #include <rtl/ustrbuf.hxx>
34 #include <rtl/ustring.hxx>
35 #include <sal/log.hxx>
36 #include <sal/types.h>
37 #include <tools/debug.hxx>
39 #define FORMULA_MAXJUMPCOUNT 32 /* maximum number of jumps (ocChoose) */
40 #define FORMULA_MAXTOKENS 8192 /* maximum number of tokens in formula */
41 #define FORMULA_MAXPARAMS 255 /* maximum number of parameters per function (byte) */
44 namespace com
{ namespace sun
{ namespace star
{
46 struct FormulaOpCodeMapEntry
;
52 enum class FormulaError
: sal_uInt16
;
56 class FormulaTokenArray
;
58 struct FormulaArrayStack
60 FormulaArrayStack
* pNext
;
61 FormulaTokenArray
* pArr
;
62 FormulaTokenRef mpLastToken
;
67 typedef std::unordered_map
< OUString
, OpCode
, OUStringHash
> OpCodeHashMap
;
68 typedef std::unordered_map
< OUString
, OUString
, OUStringHash
> ExternalHashMap
;
70 class FORMULA_DLLPUBLIC FormulaCompiler
73 FormulaCompiler(const FormulaCompiler
&) = delete;
74 FormulaCompiler
& operator=(const FormulaCompiler
&) = delete;
77 FormulaCompiler(FormulaTokenArray
& _rArr
);
78 virtual ~FormulaCompiler();
80 /** Mappings from strings to OpCodes and vice versa. */
81 class FORMULA_DLLPUBLIC OpCodeMap final
83 OpCodeHashMap
* mpHashMap
; /// Hash map of symbols, OUString -> OpCode
84 OUString
* mpTable
; /// Array of symbols, OpCode -> OUString, offset==OpCode
85 ExternalHashMap
* mpExternalHashMap
; /// Hash map of ocExternal, Filter String -> AddIn String
86 ExternalHashMap
* mpReverseExternalHashMap
; /// Hash map of ocExternal, AddIn String -> Filter String
87 FormulaGrammar::Grammar meGrammar
; /// Grammar, language and reference convention
88 sal_uInt16 mnSymbols
; /// Count of OpCode symbols
89 bool mbCore
: 1; /// If mapping was setup by core, not filters
90 bool mbEnglish
: 1; /// If English symbols and external names
92 OpCodeMap( const OpCodeMap
& ) = delete;
93 OpCodeMap
& operator=( const OpCodeMap
& ) = delete;
97 OpCodeMap(sal_uInt16 nSymbols
, bool bCore
, FormulaGrammar::Grammar eGrammar
) :
98 mpHashMap( new OpCodeHashMap( nSymbols
)),
99 mpTable( new OUString
[ nSymbols
]),
100 mpExternalHashMap( new ExternalHashMap
),
101 mpReverseExternalHashMap( new ExternalHashMap
),
102 meGrammar( eGrammar
),
103 mnSymbols( nSymbols
),
106 mbEnglish
= FormulaGrammar::isEnglish( meGrammar
);
110 /** Copy mappings from r into this map, effectively replacing this map.
112 Override known legacy bad function names with
113 correct ones if the conditions can be derived from the
116 void copyFrom( const OpCodeMap
& r
);
118 /// Get the symbol String -> OpCode hash map for finds.
119 inline const OpCodeHashMap
* getHashMap() const { return mpHashMap
; }
121 /// Get the symbol String -> AddIn String hash map for finds.
122 inline const ExternalHashMap
* getExternalHashMap() const { return mpExternalHashMap
; }
124 /// Get the AddIn String -> symbol String hash map for finds.
125 inline const ExternalHashMap
* getReverseExternalHashMap() const { return mpReverseExternalHashMap
; }
127 /// Get the symbol string matching an OpCode.
128 inline const OUString
& getSymbol( const OpCode eOp
) const
130 DBG_ASSERT( sal_uInt16(eOp
) < mnSymbols
, "OpCodeMap::getSymbol: OpCode out of range");
131 if (sal_uInt16(eOp
) < mnSymbols
)
132 return mpTable
[ eOp
];
133 static OUString s_sEmpty
;
137 /// Get the first character of the symbol string matching an OpCode.
138 inline sal_Unicode
getSymbolChar( const OpCode eOp
) const { return getSymbol(eOp
)[0]; };
141 inline FormulaGrammar::Grammar
getGrammar() const { return meGrammar
; }
143 /// Get the symbol count.
144 inline sal_uInt16
getSymbolCount() const { return mnSymbols
; }
146 /** Are these English symbols, as opposed to native language (which may
147 be English as well)? */
148 inline bool isEnglish() const { return mbEnglish
; }
150 /// Is it an ODF 1.1 compatibility mapping?
151 inline bool isPODF() const { return FormulaGrammar::isPODF( meGrammar
); }
153 /* TODO: add isAPI() once a FormulaLanguage was added. */
155 /// Is it an ODFF / ODF 1.2 mapping?
156 inline bool isODFF() const { return FormulaGrammar::isODFF( meGrammar
); }
158 /// Is it an OOXML mapping?
159 inline bool isOOXML() const { return FormulaGrammar::isOOXML( meGrammar
); }
161 /// Does it have external symbol/name mappings?
162 inline bool hasExternals() const { return !mpExternalHashMap
->empty(); }
164 /// Put entry of symbol String and OpCode pair.
165 void putOpCode( const OUString
& rStr
, const OpCode eOp
, const CharClass
* pCharClass
);
167 /// Put entry of symbol String and AddIn international String pair.
168 void putExternal( const OUString
& rSymbol
, const OUString
& rAddIn
);
170 /** Put entry of symbol String and AddIn international String pair,
171 failing silently if rAddIn name already exists. */
172 void putExternalSoftly( const OUString
& rSymbol
, const OUString
& rAddIn
);
174 /// Core implementation of XFormulaOpCodeMapper::getMappings()
175 css::uno::Sequence
< css::sheet::FormulaToken
>
176 createSequenceOfFormulaTokens(const FormulaCompiler
& _rCompiler
,
177 const css::uno::Sequence
< OUString
>& rNames
) const;
179 /// Core implementation of XFormulaOpCodeMapper::getAvailableMappings()
180 css::uno::Sequence
< css::sheet::FormulaOpCodeMapEntry
>
181 createSequenceOfAvailableMappings( const FormulaCompiler
& _rCompiler
,const sal_Int32 nGroup
) const;
183 /** The value used in createSequenceOfAvailableMappings() and thus in
184 XFormulaOpCodeMapper::getMappings() for an unknown symbol. */
185 static sal_Int32
getOpCodeUnknown();
189 /** Conditionally put a mapping in copyFrom() context.
191 Does NOT check eOp range!
193 void putCopyOpCode( const OUString
& rSymbol
, OpCode eOp
);
197 typedef std::shared_ptr
< const OpCodeMap
> OpCodeMapPtr
;
198 typedef std::shared_ptr
< OpCodeMap
> NonConstOpCodeMapPtr
;
200 /** Get OpCodeMap for formula language.
202 One of css::sheet::FormulaLanguage constants.
203 @return Map for nLanguage. If nLanguage is unknown, a NULL map is returned.
205 OpCodeMapPtr
GetOpCodeMap( const sal_Int32 nLanguage
) const;
207 /** Create an internal symbol map from API mapping.
209 Use English number parser / formatter instead of native.
211 static OpCodeMapPtr
CreateOpCodeMap(
212 const css::uno::Sequence
< const css::sheet::FormulaOpCodeMapEntry
> & rMapping
,
215 /** Get current OpCodeMap in effect. */
216 const OpCodeMapPtr
& GetCurrentOpCodeMap() const { return mxSymbols
; }
218 /** Get OpCode for English symbol.
219 Used in XFunctionAccess to create token array.
221 Symbol to lookup. MUST be upper case.
223 OpCode
GetEnglishOpCode( const OUString
& rName
) const;
225 FormulaError
GetErrorConstant( const OUString
& rName
) const;
227 void EnableJumpCommandReorder( bool bEnable
);
228 void EnableStopOnError( bool bEnable
);
230 static bool IsOpCodeVolatile( OpCode eOp
);
231 static bool IsOpCodeJumpCommand( OpCode eOp
);
233 static bool DeQuote( OUString
& rStr
);
236 static const OUString
& GetNativeSymbol( OpCode eOp
);
237 static sal_Unicode
GetNativeSymbolChar( OpCode eOp
);
238 static bool IsMatrixFunction(OpCode _eOpCode
); // if a function _always_ returns a Matrix
240 short GetNumFormatType() const { return nNumFmt
; }
241 bool CompileTokenArray();
243 void CreateStringFromTokenArray( OUString
& rFormula
);
244 void CreateStringFromTokenArray( OUStringBuffer
& rBuffer
);
245 const FormulaToken
* CreateStringFromToken( OUString
& rFormula
, const FormulaToken
* pToken
);
246 const FormulaToken
* CreateStringFromToken( OUStringBuffer
& rBuffer
, const FormulaToken
* pToken
,
247 bool bAllowArrAdvance
= false );
249 void AppendBoolean( OUStringBuffer
& rBuffer
, bool bVal
) const;
250 void AppendDouble( OUStringBuffer
& rBuffer
, double fVal
) const;
251 static void AppendString( OUStringBuffer
& rBuffer
, const OUString
& rStr
);
253 /** Set symbol map corresponding to one of predefined formula::FormulaGrammar::Grammar,
254 including an address reference convention. */
255 inline FormulaGrammar::Grammar
GetGrammar() const { return meGrammar
; }
257 /** Whether current symbol set and grammar need transformation of Table
258 structured references to A1 style references when writing / exporting
261 bool NeedsTableRefTransformation() const;
263 static void UpdateSeparatorsNative( const OUString
& rSep
, const OUString
& rArrayColSep
, const OUString
& rArrayRowSep
);
264 static void ResetNativeSymbols();
265 static void SetNativeSymbols( const OpCodeMapPtr
& xMap
);
267 /** Separators mapped when loading opcodes from the resource, values other
268 than RESOURCE_BASE may override the resource strings. Used by OpCodeList
269 implementation via loadSymbols().
279 virtual OUString
FindAddInFunction( const OUString
& rUpperName
, bool bLocalFirst
) const;
280 virtual void fillFromAddInCollectionUpperName( const NonConstOpCodeMapPtr
& xMap
) const;
281 virtual void fillFromAddInMap( const NonConstOpCodeMapPtr
& xMap
, FormulaGrammar::Grammar _eGrammar
) const;
282 virtual void fillFromAddInCollectionEnglishName( const NonConstOpCodeMapPtr
& xMap
) const;
283 virtual void fillAddInToken(::std::vector
< css::sheet::FormulaOpCodeMapEntry
>& _rVec
, bool _bIsEnglish
) const;
285 virtual void SetError(FormulaError nError
);
286 virtual FormulaTokenRef
ExtendRangeReference( FormulaToken
& rTok1
, FormulaToken
& rTok2
);
287 virtual bool HandleExternalReference(const FormulaToken
& _aToken
);
288 virtual bool HandleRange();
289 virtual bool HandleColRowName();
290 virtual bool HandleDbData();
291 virtual bool HandleTableRef();
293 virtual void CreateStringFromExternal( OUStringBuffer
& rBuffer
, const FormulaToken
* pToken
) const;
294 virtual void CreateStringFromSingleRef( OUStringBuffer
& rBuffer
, const FormulaToken
* pToken
) const;
295 virtual void CreateStringFromDoubleRef( OUStringBuffer
& rBuffer
, const FormulaToken
* pToken
) const;
296 virtual void CreateStringFromMatrix( OUStringBuffer
& rBuffer
, const FormulaToken
* pToken
) const;
297 virtual void CreateStringFromIndex( OUStringBuffer
& rBuffer
, const FormulaToken
* pToken
) const;
298 virtual void LocalizeString( OUString
& rName
) const; // modify rName - input: exact name
300 /** Whether parameter nParam (0-based) is forced to array for OpCode eOp.
301 Calc: ForceArray or ReferenceOrForceArray type. */
302 virtual bool IsForceArrayParameter( const FormulaToken
* pToken
, sal_uInt16 nParam
) const;
304 void AppendErrorConstant( OUStringBuffer
& rBuffer
, FormulaError nError
) const;
308 void PutCode( FormulaTokenRef
& );
312 void IntersectionLine();
322 void PopTokenArray();
323 void PushTokenArray( FormulaTokenArray
*, bool );
325 bool MergeRangeReference( FormulaToken
* * const pCode1
, FormulaToken
* const * const pCode2
);
327 OUString aCorrectedFormula
; // autocorrected Formula
328 OUString aCorrectedSymbol
; // autocorrected Symbol
330 OpCodeMapPtr mxSymbols
; // which symbols are used
332 FormulaTokenRef mpToken
; // current token
333 FormulaTokenRef pCurrentFactorToken
; // current factor token (of Factor() method)
334 sal_uInt16 nCurrentFactorParam
; // current factor token's parameter, 1-based
335 FormulaTokenArray
* pArr
;
336 FormulaTokenRef mpLastToken
; // last token
338 FormulaToken
** pCode
;
339 FormulaArrayStack
* pStack
;
342 short nRecursion
; // GetToken() recursions
343 short nNumFmt
; // set during CompileTokenArray()
344 sal_uInt16 pc
; // program counter
346 FormulaGrammar::Grammar meGrammar
; // The grammar used, language plus convention.
348 bool bAutoCorrect
; // whether to apply AutoCorrection
349 bool bCorrected
; // AutoCorrection was applied
350 bool glSubTotal
; // if code contains one or more subtotal functions
352 bool mbJumpCommandReorder
; /// Whether or not to reorder RPN for jump commands.
353 bool mbStopOnError
; /// Whether to stop compilation on first encountered error.
356 void InitSymbolsNative() const; /// only SymbolsNative, on first document creation
357 void InitSymbolsEnglish() const; /// only SymbolsEnglish, maybe later
358 void InitSymbolsPODF() const; /// only SymbolsPODF, on demand
359 void InitSymbolsAPI() const; /// only SymbolsAPI, on demand
360 void InitSymbolsODFF() const; /// only SymbolsODFF, on demand
361 void InitSymbolsEnglishXL() const; /// only SymbolsEnglishXL, on demand
362 void InitSymbolsOOXML() const; /// only SymbolsOOXML, on demand
364 void loadSymbols( sal_uInt16 nSymbols
, FormulaGrammar::Grammar eGrammar
, NonConstOpCodeMapPtr
& rxMap
,
365 SeparatorType eSepType
= SEMICOLON_BASE
) const;
367 /** Check pCurrentFactorToken for nParam's (0-based) ForceArray types and
368 set ForceArray at rCurr if so. Set nParam+1 as 1-based
369 nCurrentFactorParam for subsequent ForceArrayOperator() calls.
371 void CheckSetForceArrayParameter( FormulaTokenRef
& rCurr
, sal_uInt8 nParam
);
373 void ForceArrayOperator( FormulaTokenRef
& rCurr
);
377 FormulaTokenRef pPrevFac
;
378 sal_uInt16 nPrevParam
;
379 FormulaCompiler
* pCompiler
;
380 CurrentFactor( const CurrentFactor
& ) = delete;
381 CurrentFactor
& operator=( const CurrentFactor
& ) = delete;
383 explicit CurrentFactor( FormulaCompiler
* pComp
)
384 : pPrevFac( pComp
->pCurrentFactorToken
)
385 , nPrevParam( pComp
->nCurrentFactorParam
)
390 pCompiler
->pCurrentFactorToken
= pPrevFac
;
391 pCompiler
->nCurrentFactorParam
= nPrevParam
;
393 // yes, this operator= may modify the RValue
394 void operator=( FormulaTokenRef
& r
)
396 pCompiler
->ForceArrayOperator( r
);
397 pCompiler
->pCurrentFactorToken
= r
;
398 pCompiler
->nCurrentFactorParam
= 0;
400 void operator=( FormulaToken
* p
)
402 FormulaTokenRef
xTemp( p
);
405 operator FormulaTokenRef
&()
406 { return pCompiler
->pCurrentFactorToken
; }
407 FormulaToken
* operator->()
408 { return pCompiler
->pCurrentFactorToken
.operator->(); }
409 operator FormulaToken
*()
410 { return operator->(); }
414 mutable NonConstOpCodeMapPtr mxSymbolsODFF
; // ODFF symbols
415 mutable NonConstOpCodeMapPtr mxSymbolsPODF
; // ODF 1.1 symbols
416 mutable NonConstOpCodeMapPtr mxSymbolsAPI
; // XFunctionAccess API symbols
417 mutable NonConstOpCodeMapPtr mxSymbolsNative
; // native symbols
418 mutable NonConstOpCodeMapPtr mxSymbolsEnglish
; // English symbols
419 mutable NonConstOpCodeMapPtr mxSymbolsEnglishXL
; // English Excel symbols (for VBA formula parsing)
420 mutable NonConstOpCodeMapPtr mxSymbolsOOXML
; // Excel OOXML symbols
426 #endif // INCLUDED_FORMULA_FORMULACOMPILER_HXX
429 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */