Update ooo320-m1
[ooovba.git] / sc / inc / compiler.hxx
blob20b1e643f415aa5fc0a26e03c4e4330781257069
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: compiler.hxx,v $
10 * $Revision: 1.36.30.4 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #ifndef SC_COMPILER_HXX
32 #define SC_COMPILER_HXX
34 #ifndef INCLUDED_STRING_H
35 #include <string.h>
36 #define INCLUDED_STRING_H
37 #endif
38 #include <tools/mempool.hxx>
39 #include "scdllapi.h"
40 #include "global.hxx"
41 #include "refdata.hxx"
42 #include "formula/token.hxx"
43 #include "formula/intruref.hxx"
44 #include "formula/grammar.hxx"
45 #include <unotools/charclass.hxx>
46 #include <rtl/ustrbuf.hxx>
47 #include <com/sun/star/sheet/ExternalLinkInfo.hpp>
48 #include <vector>
50 #include <formula/FormulaCompiler.hxx>
53 #ifndef BOOST_SHARED_PTR_HPP_INCLUDED
54 #include <boost/shared_ptr.hpp>
55 #endif
57 #ifndef INCLUDED_HASH_MAP
58 #include <hash_map>
59 #define INCLUDED_HASH_MAP
60 #endif
62 //-----------------------------------------------
64 // constants and data types also for external modules (ScInterpreter et al)
66 #define MAXCODE 512 /* maximum number of tokens in formula */
67 #define MAXSTRLEN 1024 /* maximum length of input string of one symbol */
68 #define MAXJUMPCOUNT 32 /* maximum number of jumps (ocChose) */
70 // flag values of CharTable
71 #define SC_COMPILER_C_ILLEGAL 0x00000000
72 #define SC_COMPILER_C_CHAR 0x00000001
73 #define SC_COMPILER_C_CHAR_BOOL 0x00000002
74 #define SC_COMPILER_C_CHAR_WORD 0x00000004
75 #define SC_COMPILER_C_CHAR_VALUE 0x00000008
76 #define SC_COMPILER_C_CHAR_STRING 0x00000010
77 #define SC_COMPILER_C_CHAR_DONTCARE 0x00000020
78 #define SC_COMPILER_C_BOOL 0x00000040
79 #define SC_COMPILER_C_WORD 0x00000080
80 #define SC_COMPILER_C_WORD_SEP 0x00000100
81 #define SC_COMPILER_C_VALUE 0x00000200
82 #define SC_COMPILER_C_VALUE_SEP 0x00000400
83 #define SC_COMPILER_C_VALUE_EXP 0x00000800
84 #define SC_COMPILER_C_VALUE_SIGN 0x00001000
85 #define SC_COMPILER_C_VALUE_VALUE 0x00002000
86 #define SC_COMPILER_C_STRING_SEP 0x00004000
87 #define SC_COMPILER_C_NAME_SEP 0x00008000 // there can be only one! '\''
88 #define SC_COMPILER_C_CHAR_IDENT 0x00010000 // identifier (built-in function) or reference start
89 #define SC_COMPILER_C_IDENT 0x00020000 // identifier or reference continuation
90 #define SC_COMPILER_C_ODF_LBRACKET 0x00040000 // ODF '[' reference bracket
91 #define SC_COMPILER_C_ODF_RBRACKET 0x00080000 // ODF ']' reference bracket
92 #define SC_COMPILER_C_ODF_LABEL_OP 0x00100000 // ODF '!!' automatic intersection of labels
93 #define SC_COMPILER_C_ODF_NAME_MARKER 0x00200000 // ODF '$$' marker that starts a defined (range) name
94 #define SC_COMPILER_C_CHAR_NAME 0x00400000 // start character of a defined name
95 #define SC_COMPILER_C_NAME 0x00800000 // continuation character of a defined name
97 #define SC_COMPILER_FILE_TAB_SEP '#' // 'Doc'#Tab
100 class ScDocument;
101 class ScMatrix;
102 class ScRangeData;
103 class ScExternalRefManager;
104 class ScTokenArray;
106 // constants and data types internal to compiler
108 #if 0
110 OpCode eOp; // OpCode
111 formula::StackVar eType; // type of data
112 USHORT nRefCnt; // reference count
113 BOOL bRaw; // not cloned yet and trimmed to real size
115 #endif
117 #define SC_TOKEN_FIX_MEMBERS \
118 OpCode eOp; \
119 formula::StackVar eType; \
120 USHORT nRefCnt; \
121 BOOL bRaw;
123 struct ScDoubleRawToken
125 private:
126 SC_TOKEN_FIX_MEMBERS
127 public:
128 union
129 { // union only to assure alignment identical to ScRawToken
130 double nValue;
131 struct {
132 BYTE cByte;
133 bool bHasForceArray;
134 } sbyte;
136 DECL_FIXEDMEMPOOL_NEWDEL( ScDoubleRawToken );
139 struct ScRawToken
141 friend class ScCompiler;
142 // Friends that use a temporary ScRawToken on the stack (and therefor need
143 // the private dtor) and know what they're doing..
144 friend class ScTokenArray;
145 friend USHORT lcl_ScRawTokenOffset();
146 private:
147 SC_TOKEN_FIX_MEMBERS
148 public:
149 union {
150 double nValue;
151 struct {
152 BYTE cByte;
153 bool bHasForceArray;
154 } sbyte;
155 ScComplexRefData aRef;
156 struct {
157 sal_uInt16 nFileId;
158 sal_Unicode cTabName[MAXSTRLEN+1];
159 ScComplexRefData aRef;
160 } extref;
161 struct {
162 sal_uInt16 nFileId;
163 sal_Unicode cName[MAXSTRLEN+1];
164 } extname;
165 ScMatrix* pMat;
166 USHORT nIndex; // index into name collection
167 sal_Unicode cStr[ MAXSTRLEN+1 ]; // string (up to 255 characters + 0)
168 short nJump[MAXJUMPCOUNT+1]; // If/Chose token
171 //! other members not initialized
172 ScRawToken() : bRaw( TRUE ) {}
173 private:
174 ~ScRawToken() {} //! only delete via Delete()
175 public:
176 DECL_FIXEDMEMPOOL_NEWDEL( ScRawToken );
177 formula::StackVar GetType() const { return (formula::StackVar) eType; }
178 OpCode GetOpCode() const { return (OpCode) eOp; }
179 void NewOpCode( OpCode e ) { eOp = e; }
180 void IncRef() { nRefCnt++; }
181 void DecRef() { if( !--nRefCnt ) Delete(); }
182 USHORT GetRef() const { return nRefCnt; }
183 SC_DLLPUBLIC void Delete();
185 // Use these methods only on tokens that are not part of a token array,
186 // since the reference count is cleared!
187 void SetOpCode( OpCode eCode );
188 void SetString( const sal_Unicode* pStr );
189 void SetSingleReference( const ScSingleRefData& rRef );
190 void SetDoubleReference( const ScComplexRefData& rRef );
191 void SetDouble( double fVal );
192 //UNUSED2008-05 void SetInt( int nVal );
193 //UNUSED2008-05 void SetMatrix( ScMatrix* p );
195 // These methods are ok to use, reference count not cleared.
196 //UNUSED2008-05 ScComplexRefData& GetReference();
197 //UNUSED2008-05 void SetReference( ScComplexRefData& rRef );
198 void SetName( USHORT n );
199 void SetExternalSingleRef( sal_uInt16 nFileId, const String& rTabName, const ScSingleRefData& rRef );
200 void SetExternalDoubleRef( sal_uInt16 nFileId, const String& rTabName, const ScComplexRefData& rRef );
201 void SetExternalName( sal_uInt16 nFileId, const String& rName );
202 void SetMatrix( ScMatrix* p );
203 void SetExternal(const sal_Unicode* pStr);
205 ScRawToken* Clone() const; // real copy!
206 formula::FormulaToken* CreateToken() const; // create typified token
207 void Load( SvStream&, USHORT nVer );
209 static xub_StrLen GetStrLen( const sal_Unicode* pStr ); // as long as a "string" is an array
210 static size_t GetStrLenBytes( xub_StrLen nLen )
211 { return nLen * sizeof(sal_Unicode); }
212 static size_t GetStrLenBytes( const sal_Unicode* pStr )
213 { return GetStrLenBytes( GetStrLen( pStr ) ); }
217 typedef formula::SimpleIntrusiveReference< struct ScRawToken > ScRawTokenRef;
219 class SC_DLLPUBLIC ScCompiler : public formula::FormulaCompiler
221 public:
223 enum EncodeUrlMode
225 ENCODE_BY_GRAMMAR,
226 ENCODE_ALWAYS,
227 ENCODE_NEVER,
230 struct Convention
232 const formula::FormulaGrammar::AddressConvention meConv;
233 const ULONG* mpCharTable;
236 Convention( formula::FormulaGrammar::AddressConvention eConvP );
237 virtual ~Convention();
239 virtual void MakeRefStr( rtl::OUStringBuffer& rBuffer,
240 const ScCompiler& rCompiler,
241 const ScComplexRefData& rRef,
242 BOOL bSingleRef ) const = 0;
243 virtual ::com::sun::star::i18n::ParseResult
244 parseAnyToken( const String& rFormula,
245 xub_StrLen nSrcPos,
246 const CharClass* pCharClass) const = 0;
249 * Parse the symbol string and pick up the file name and the external
250 * range name.
252 * @return true on successful parse, or false otherwise.
254 virtual bool parseExternalName( const String& rSymbol, String& rFile, String& rName,
255 const ScDocument* pDoc,
256 const ::com::sun::star::uno::Sequence<
257 const ::com::sun::star::sheet::ExternalLinkInfo > * pExternalLinks ) const = 0;
259 virtual String makeExternalNameStr( const String& rFile, const String& rName ) const = 0;
261 virtual void makeExternalRefStr( ::rtl::OUStringBuffer& rBuffer, const ScCompiler& rCompiler,
262 sal_uInt16 nFileId, const String& rTabName, const ScSingleRefData& rRef,
263 ScExternalRefManager* pRefMgr ) const = 0;
265 virtual void makeExternalRefStr( ::rtl::OUStringBuffer& rBuffer, const ScCompiler& rCompiler,
266 sal_uInt16 nFileId, const String& rTabName, const ScComplexRefData& rRef,
267 ScExternalRefManager* pRefMgr ) const = 0;
269 enum SpecialSymbolType
272 * Character between sheet name and address. In OOO A1 this is
273 * '.', while XL A1 and XL R1C1 this is '!'.
275 SHEET_SEPARATOR,
278 * In OOO A1, a sheet name may be prefixed with '$' to indicate an
279 * absolute sheet position.
281 ABS_SHEET_PREFIX
283 virtual sal_Unicode getSpecialSymbol( SpecialSymbolType eSymType ) const = 0;
285 friend struct Convention;
287 private:
290 static CharClass *pCharClassEnglish; // character classification for en_US locale
291 static const Convention *pConventions[ formula::FormulaGrammar::CONV_LAST ];
293 static const Convention * const pConvOOO_A1;
294 static const Convention * const pConvOOO_A1_ODF;
295 static const Convention * const pConvXL_A1;
296 static const Convention * const pConvXL_R1C1;
297 static const Convention * const pConvXL_OOX;
299 static struct AddInMap
301 const char* pODFF;
302 const char* pEnglish;
303 bool bMapDupToInternal; // when writing ODFF
304 const char* pOriginal; // programmatical name
305 const char* pUpper; // upper case programmatical name
306 } maAddInMap[];
307 static const AddInMap* GetAddInMap();
308 static size_t GetAddInMapCount();
310 ScDocument* pDoc;
311 ScAddress aPos;
313 // For CONV_XL_OOX, may be set via API by MOOXML filter.
314 ::com::sun::star::uno::Sequence< const ::com::sun::star::sheet::ExternalLinkInfo > maExternalLinks;
316 sal_Unicode cSymbol[MAXSTRLEN]; // current Symbol
317 String aFormula; // formula source code
318 xub_StrLen nSrcPos; // tokenizer position (source code)
319 ScRawTokenRef pRawToken;
321 const CharClass* pCharClass; // which character classification is used for parseAnyToken
322 USHORT mnPredetectedReference; // reference when reading ODF, 0 (none), 1 (single) or 2 (double)
323 SCsTAB nMaxTab; // last sheet in document
324 sal_Int32 mnRangeOpPosInSymbol; // if and where a range operator is in symbol
325 const Convention *pConv;
326 EncodeUrlMode meEncodeUrlMode;
327 bool mbCloseBrackets; // whether to close open brackets automatically, default TRUE
328 bool mbExtendedErrorDetection;
329 bool mbRewind; // whether symbol is to be rewound to some step during lexical analysis
331 BOOL NextNewToken(bool bInArray = false);
333 virtual void SetError(USHORT nError);
334 xub_StrLen NextSymbol(bool bInArray);
335 BOOL IsValue( const String& );
336 BOOL IsOpCode( const String&, bool bInArray );
337 BOOL IsOpCode2( const String& );
338 BOOL IsString();
339 BOOL IsReference( const String& );
340 BOOL IsSingleReference( const String& );
341 BOOL IsPredetectedReference( const String& );
342 BOOL IsDoubleReference( const String& );
343 BOOL IsMacro( const String& );
344 BOOL IsNamedRange( const String& );
345 bool IsExternalNamedRange( const String& rSymbol );
346 BOOL IsDBRange( const String& );
347 BOOL IsColRowName( const String& );
348 BOOL IsBoolean( const String& );
349 void AutoCorrectParsedSymbol();
351 void SetRelNameReference();
353 static void InitCharClassEnglish();
355 public:
356 ScCompiler( ScDocument* pDocument, const ScAddress&);
358 ScCompiler( ScDocument* pDocument, const ScAddress&,ScTokenArray& rArr);
360 public:
361 static void DeInit(); /// all
363 // for ScAddress::Format()
364 static void CheckTabQuotes( String& aTabName,
365 const formula::FormulaGrammar::AddressConvention eConv = formula::FormulaGrammar::CONV_OOO );
367 static BOOL EnQuote( String& rStr );
368 sal_Unicode GetNativeAddressSymbol( Convention::SpecialSymbolType eType ) const;
371 // Check if it is a valid english function name
372 bool IsEnglishSymbol( const String& rName );
374 //! _either_ CompileForFAP _or_ AutoCorrection, _not_ both
375 // #i101512# SetCompileForFAP is in formula::FormulaCompiler
376 void SetAutoCorrection( BOOL bVal )
377 { bAutoCorrect = bVal; bIgnoreErrors = bVal; }
378 void SetCloseBrackets( bool bVal ) { mbCloseBrackets = bVal; }
379 void SetRefConvention( const Convention *pConvP );
380 void SetRefConvention( const formula::FormulaGrammar::AddressConvention eConv );
382 /// Set symbol map if not empty.
383 void SetFormulaLanguage( const OpCodeMapPtr & xMap );
385 void SetGrammar( const formula::FormulaGrammar::Grammar eGrammar );
387 void SetEncodeUrlMode( EncodeUrlMode eMode );
388 EncodeUrlMode GetEncodeUrlMode() const;
389 private:
390 /** Set grammar and reference convention from within SetFormulaLanguage()
391 or SetGrammar().
393 @param eNewGrammar
394 The new grammar to be set and the associated reference convention.
396 @param eOldGrammar
397 The previous grammar that was active before SetFormulaLanguage().
399 void SetGrammarAndRefConvention(
400 const formula::FormulaGrammar::Grammar eNewGrammar,
401 const formula::FormulaGrammar::Grammar eOldGrammar );
402 public:
404 /// Set external link info for ScAddress::CONV_XL_OOX.
405 inline void SetExternalLinks(
406 const ::com::sun::star::uno::Sequence<
407 const ::com::sun::star::sheet::ExternalLinkInfo > & rLinks )
409 maExternalLinks = rLinks;
412 void CreateStringFromXMLTokenArray( String& rFormula, String& rFormulaNmsp );
414 void SetExtendedErrorDetection( bool bVal ) { mbExtendedErrorDetection = bVal; }
416 BOOL IsCorrected() { return bCorrected; }
417 const String& GetCorrectedFormula() { return aCorrectedFormula; }
419 // Use convention from this->aPos by default
420 ScTokenArray* CompileString( const String& rFormula );
421 ScTokenArray* CompileString( const String& rFormula, const String& rFormulaNmsp );
422 const ScDocument* GetDoc() const { return pDoc; }
423 const ScAddress& GetPos() const { return aPos; }
425 void MoveRelWrap( SCCOL nMaxCol, SCROW nMaxRow );
426 static void MoveRelWrap( ScTokenArray& rArr, ScDocument* pDoc, const ScAddress& rPos,
427 SCCOL nMaxCol, SCROW nMaxRow );
429 BOOL UpdateNameReference( UpdateRefMode eUpdateRefMode,
430 const ScRange&,
431 SCsCOL nDx, SCsROW nDy, SCsTAB nDz,
432 BOOL& rChanged, BOOL bSharedFormula = FALSE);
434 ScRangeData* UpdateReference( UpdateRefMode eUpdateRefMode,
435 const ScAddress& rOldPos, const ScRange&,
436 SCsCOL nDx, SCsROW nDy, SCsTAB nDz,
437 BOOL& rChanged, BOOL& rRefSizeChanged );
439 /// Only once for converted shared formulas,
440 /// token array has to be compiled afterwards.
441 void UpdateSharedFormulaReference( UpdateRefMode eUpdateRefMode,
442 const ScAddress& rOldPos, const ScRange&,
443 SCsCOL nDx, SCsROW nDy, SCsTAB nDz );
445 ScRangeData* UpdateInsertTab(SCTAB nTable, BOOL bIsName );
446 ScRangeData* UpdateDeleteTab(SCTAB nTable, BOOL bIsMove, BOOL bIsName, BOOL& bCompile);
447 ScRangeData* UpdateMoveTab(SCTAB nOldPos, SCTAB nNewPos, BOOL bIsName );
449 BOOL HasModifiedRange();
451 /** If the character is allowed as first character in sheet names or
452 references, includes '$' and '?'. */
453 static inline BOOL IsCharWordChar( String const & rStr,
454 xub_StrLen nPos,
455 const formula::FormulaGrammar::AddressConvention eConv = formula::FormulaGrammar::CONV_OOO )
457 sal_Unicode c = rStr.GetChar( nPos );
458 if (c < 128)
460 return pConventions[eConv] ? static_cast<BOOL>(
461 (pConventions[eConv]->mpCharTable[ UINT8(c) ] & SC_COMPILER_C_CHAR_WORD) == SC_COMPILER_C_CHAR_WORD) :
462 FALSE; // no convention => assume invalid
464 else
465 return ScGlobal::pCharClass->isLetterNumeric( rStr, nPos );
468 /** If the character is allowed in sheet names, thus may be part of a
469 reference, includes '$' and '?' and such. */
470 static inline BOOL IsWordChar( String const & rStr,
471 xub_StrLen nPos,
472 const formula::FormulaGrammar::AddressConvention eConv = formula::FormulaGrammar::CONV_OOO )
474 sal_Unicode c = rStr.GetChar( nPos );
475 if (c < 128)
477 return pConventions[eConv] ? static_cast<BOOL>(
478 (pConventions[eConv]->mpCharTable[ UINT8(c) ] & SC_COMPILER_C_WORD) == SC_COMPILER_C_WORD) :
479 FALSE; // convention not known => assume invalid
481 else
482 return ScGlobal::pCharClass->isLetterNumeric( rStr, nPos );
485 /** If the character is allowed as tested by nFlags (SC_COMPILER_C_...
486 bits) for all known address conventions. If more than one bit is given
487 in nFlags, all bits must match. If bTestLetterNumeric is FALSE and
488 char>=128, no LetterNumeric test is done and FALSE is returned. */
489 static inline bool IsCharFlagAllConventions( String const & rStr,
490 xub_StrLen nPos,
491 ULONG nFlags,
492 bool bTestLetterNumeric = true )
494 sal_Unicode c = rStr.GetChar( nPos );
495 if (c < 128)
497 for ( int nConv = formula::FormulaGrammar::CONV_UNSPECIFIED;
498 ++nConv < formula::FormulaGrammar::CONV_LAST; )
500 if (pConventions[nConv] &&
501 ((pConventions[nConv]->mpCharTable[ UINT8(c) ] & nFlags) != nFlags))
502 return false;
503 // convention not known => assume valid
505 return true;
507 else if (bTestLetterNumeric)
508 return ScGlobal::pCharClass->isLetterNumeric( rStr, nPos );
509 else
510 return false;
513 private:
514 // FormulaCompiler
515 virtual String FindAddInFunction( const String& rUpperName, BOOL bLocalFirst ) const;
516 virtual void fillFromAddInCollectionUpperName( NonConstOpCodeMapPtr xMap ) const;
517 virtual void fillFromAddInCollectionEnglishName( NonConstOpCodeMapPtr xMap ) const;
518 virtual void fillFromAddInMap( NonConstOpCodeMapPtr xMap, formula::FormulaGrammar::Grammar _eGrammar ) const;
519 virtual void fillAddInToken(::std::vector< ::com::sun::star::sheet::FormulaOpCodeMapEntry >& _rVec,bool _bIsEnglish) const;
521 virtual BOOL HandleExternalReference(const formula::FormulaToken& _aToken);
522 virtual BOOL HandleRange();
523 virtual BOOL HandleSingleRef();
524 virtual BOOL HandleDbData();
526 virtual formula::FormulaTokenRef ExtendRangeReference( formula::FormulaToken & rTok1, formula::FormulaToken & rTok2, bool bReuseDoubleRef );
527 virtual void CreateStringFromExternal(rtl::OUStringBuffer& rBuffer, formula::FormulaToken* pTokenP);
528 virtual void CreateStringFromSingleRef(rtl::OUStringBuffer& rBuffer,formula::FormulaToken* _pTokenP);
529 virtual void CreateStringFromDoubleRef(rtl::OUStringBuffer& rBuffer,formula::FormulaToken* _pTokenP);
530 virtual void CreateStringFromMatrix( rtl::OUStringBuffer& rBuffer, formula::FormulaToken* _pTokenP);
531 virtual void CreateStringFromIndex(rtl::OUStringBuffer& rBuffer,formula::FormulaToken* _pTokenP);
532 virtual void LocalizeString( String& rName ); // modify rName - input: exact name
533 virtual BOOL IsImportingXML() const;
535 /// Access the CharTable flags
536 inline ULONG GetCharTableFlags( sal_Unicode c )
537 { return c < 128 ? pConv->mpCharTable[ UINT8(c) ] : 0; }
540 SC_DLLPUBLIC String GetScCompilerNativeSymbol( OpCode eOp ); //CHINA001
542 #endif