bump product version to 5.0.4.1
[LibreOffice.git] / starmath / source / parse.cxx
blobcde1214f2072bfdb1fe5d8b8d23c150ac647361d
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/i18n/UnicodeType.hpp>
21 #include <i18nlangtag/lang.h>
22 #include <unotools/charclass.hxx>
23 #include <editeng/unolingu.hxx>
24 #include <unotools/syslocale.hxx>
25 #include <sal/macros.h>
26 #include <vcl/settings.hxx>
27 #include "parse.hxx"
28 #include "starmath.hrc"
29 #include "smdll.hxx"
30 #include "smmod.hxx"
31 #include "config.hxx"
33 using namespace ::com::sun::star;
34 using namespace ::com::sun::star::i18n;
37 SmToken::SmToken() :
38 eType (TUNKNOWN),
39 cMathChar ('\0')
41 nGroup = nCol = nRow = nLevel = 0;
44 SmToken::SmToken(SmTokenType eTokenType,
45 sal_Unicode cMath,
46 const sal_Char* pText,
47 sal_uLong nTokenGroup,
48 sal_uInt16 nTokenLevel) {
49 eType = eTokenType;
50 cMathChar = cMath;
51 aText = OUString::createFromAscii(pText);
52 nGroup = nTokenGroup;
53 nLevel = nTokenLevel;
54 nCol = nRow = 0;
60 static const SmTokenTableEntry aTokenTable[] =
62 { "Im" , TIM, MS_IM, TGSTANDALONE, 5 },
63 { "MZ23", TDEBUG, '\0', TGATTRIBUT, 0 },
64 { "Re" , TRE, MS_RE, TGSTANDALONE, 5 },
65 { "abs", TABS, '\0', TGUNOPER, 13 },
66 { "arcosh", TACOSH, '\0', TGFUNCTION, 5 },
67 { "arcoth", TACOTH, '\0', TGFUNCTION, 5 },
68 { "acute", TACUTE, MS_ACUTE, TGATTRIBUT, 5 },
69 { "aleph" , TALEPH, MS_ALEPH, TGSTANDALONE, 5 },
70 { "alignb", TALIGNC, '\0', TGALIGN | TGDISCARDED, 0},
71 { "alignc", TALIGNC, '\0', TGALIGN, 0},
72 { "alignl", TALIGNL, '\0', TGALIGN, 0},
73 { "alignm", TALIGNC, '\0', TGALIGN | TGDISCARDED, 0},
74 { "alignr", TALIGNR, '\0', TGALIGN, 0},
75 { "alignt", TALIGNC, '\0', TGALIGN | TGDISCARDED, 0},
76 { "and", TAND, MS_AND, TGPRODUCT, 0},
77 { "approx", TAPPROX, MS_APPROX, TGRELATION, 0},
78 { "aqua", TAQUA, '\0', TGCOLOR, 0},
79 { "arccos", TACOS, '\0', TGFUNCTION, 5},
80 { "arccot", TACOT, '\0', TGFUNCTION, 5},
81 { "arcsin", TASIN, '\0', TGFUNCTION, 5},
82 { "arctan", TATAN, '\0', TGFUNCTION, 5},
83 { "arsinh", TASINH, '\0', TGFUNCTION, 5},
84 { "artanh", TATANH, '\0', TGFUNCTION, 5},
85 { "backepsilon" , TBACKEPSILON, MS_BACKEPSILON, TGSTANDALONE, 5},
86 { "bar", TBAR, MS_BAR, TGATTRIBUT, 5},
87 { "binom", TBINOM, '\0', 0, 5 },
88 { "black", TBLACK, '\0', TGCOLOR, 0},
89 { "blue", TBLUE, '\0', TGCOLOR, 0},
90 { "bold", TBOLD, '\0', TGFONTATTR, 5},
91 { "boper", TBOPER, '\0', TGPRODUCT, 0},
92 { "breve", TBREVE, MS_BREVE, TGATTRIBUT, 5},
93 { "bslash", TBACKSLASH, MS_BACKSLASH, TGPRODUCT, 0 },
94 { "cdot", TCDOT, MS_CDOT, TGPRODUCT, 0},
95 { "check", TCHECK, MS_CHECK, TGATTRIBUT, 5},
96 { "circ" , TCIRC, MS_CIRC, TGSTANDALONE, 5},
97 { "circle", TCIRCLE, MS_CIRCLE, TGATTRIBUT, 5},
98 { "color", TCOLOR, '\0', TGFONTATTR, 5},
99 { "coprod", TCOPROD, MS_COPROD, TGOPER, 5},
100 { "cos", TCOS, '\0', TGFUNCTION, 5},
101 { "cosh", TCOSH, '\0', TGFUNCTION, 5},
102 { "cot", TCOT, '\0', TGFUNCTION, 5},
103 { "coth", TCOTH, '\0', TGFUNCTION, 5},
104 { "csub", TCSUB, '\0', TGPOWER, 0},
105 { "csup", TCSUP, '\0', TGPOWER, 0},
106 { "cyan", TCYAN, '\0', TGCOLOR, 0},
107 { "dddot", TDDDOT, MS_DDDOT, TGATTRIBUT, 5},
108 { "ddot", TDDOT, MS_DDOT, TGATTRIBUT, 5},
109 { "def", TDEF, MS_DEF, TGRELATION, 0},
110 { "div", TDIV, MS_DIV, TGPRODUCT, 0},
111 { "divides", TDIVIDES, MS_LINE, TGRELATION, 0},
112 { "dlarrow" , TDLARROW, MS_DLARROW, TGSTANDALONE, 5},
113 { "dlrarrow" , TDLRARROW, MS_DLRARROW, TGSTANDALONE, 5},
114 { "dot", TDOT, MS_DOT, TGATTRIBUT, 5},
115 { "dotsaxis", TDOTSAXIS, MS_DOTSAXIS, TGSTANDALONE, 5}, // 5 to continue expression
116 { "dotsdiag", TDOTSDIAG, MS_DOTSUP, TGSTANDALONE, 5},
117 { "dotsdown", TDOTSDOWN, MS_DOTSDOWN, TGSTANDALONE, 5},
118 { "dotslow", TDOTSLOW, MS_DOTSLOW, TGSTANDALONE, 5},
119 { "dotsup", TDOTSUP, MS_DOTSUP, TGSTANDALONE, 5},
120 { "dotsvert", TDOTSVERT, MS_DOTSVERT, TGSTANDALONE, 5},
121 { "downarrow" , TDOWNARROW, MS_DOWNARROW, TGSTANDALONE, 5},
122 { "drarrow" , TDRARROW, MS_DRARROW, TGSTANDALONE, 5},
123 { "emptyset" , TEMPTYSET, MS_EMPTYSET, TGSTANDALONE, 5},
124 { "equiv", TEQUIV, MS_EQUIV, TGRELATION, 0},
125 { "exists", TEXISTS, MS_EXISTS, TGSTANDALONE, 5},
126 { "notexists", TNOTEXISTS, MS_NOTEXISTS, TGSTANDALONE, 5},
127 { "exp", TEXP, '\0', TGFUNCTION, 5},
128 { "fact", TFACT, MS_FACT, TGUNOPER, 5},
129 { "fixed", TFIXED, '\0', TGFONT, 0},
130 { "font", TFONT, '\0', TGFONTATTR, 5},
131 { "forall", TFORALL, MS_FORALL, TGSTANDALONE, 5},
132 { "from", TFROM, '\0', TGLIMIT, 0},
133 { "fuchsia", TFUCHSIA, '\0', TGCOLOR, 0},
134 { "func", TFUNC, '\0', TGFUNCTION, 5},
135 { "ge", TGE, MS_GE, TGRELATION, 0},
136 { "geslant", TGESLANT, MS_GESLANT, TGRELATION, 0 },
137 { "gg", TGG, MS_GG, TGRELATION, 0},
138 { "grave", TGRAVE, MS_GRAVE, TGATTRIBUT, 5},
139 { "gray", TGRAY, '\0', TGCOLOR, 0},
140 { "green", TGREEN, '\0', TGCOLOR, 0},
141 { "gt", TGT, MS_GT, TGRELATION, 0},
142 { "hat", THAT, MS_HAT, TGATTRIBUT, 5},
143 { "hbar" , THBAR, MS_HBAR, TGSTANDALONE, 5},
144 { "iiint", TIIINT, MS_IIINT, TGOPER, 5},
145 { "iint", TIINT, MS_IINT, TGOPER, 5},
146 { "in", TIN, MS_IN, TGRELATION, 0},
147 { "infinity" , TINFINITY, MS_INFINITY, TGSTANDALONE, 5},
148 { "infty" , TINFINITY, MS_INFINITY, TGSTANDALONE, 5},
149 { "int", TINT, MS_INT, TGOPER, 5},
150 { "intd", TINTD, MS_INT, TGUNOPER, 5},
151 { "intersection", TINTERSECT, MS_INTERSECT, TGPRODUCT, 0},
152 { "ital", TITALIC, '\0', TGFONTATTR, 5},
153 { "italic", TITALIC, '\0', TGFONTATTR, 5},
154 { "lambdabar" , TLAMBDABAR, MS_LAMBDABAR, TGSTANDALONE, 5},
155 { "langle", TLANGLE, MS_LMATHANGLE, TGLBRACES, 5},
156 { "lbrace", TLBRACE, MS_LBRACE, TGLBRACES, 5},
157 { "lceil", TLCEIL, MS_LCEIL, TGLBRACES, 5},
158 { "ldbracket", TLDBRACKET, MS_LDBRACKET, TGLBRACES, 5},
159 { "ldline", TLDLINE, MS_DVERTLINE, TGLBRACES, 5},
160 { "le", TLE, MS_LE, TGRELATION, 0},
161 { "left", TLEFT, '\0', 0, 5},
162 { "leftarrow" , TLEFTARROW, MS_LEFTARROW, TGSTANDALONE, 5},
163 { "leslant", TLESLANT, MS_LESLANT, TGRELATION, 0 },
164 { "lfloor", TLFLOOR, MS_LFLOOR, TGLBRACES, 5},
165 { "lim", TLIM, '\0', TGOPER, 5},
166 { "lime", TLIME, '\0', TGCOLOR, 0},
167 { "liminf", TLIMINF, '\0', TGOPER, 5},
168 { "limsup", TLIMSUP, '\0', TGOPER, 5},
169 { "lint", TLINT, MS_LINT, TGOPER, 5},
170 { "ll", TLL, MS_LL, TGRELATION, 0},
171 { "lline", TLLINE, MS_VERTLINE, TGLBRACES, 5},
172 { "llint", TLLINT, MS_LLINT, TGOPER, 5},
173 { "lllint", TLLLINT, MS_LLLINT, TGOPER, 5},
174 { "ln", TLN, '\0', TGFUNCTION, 5},
175 { "log", TLOG, '\0', TGFUNCTION, 5},
176 { "lsub", TLSUB, '\0', TGPOWER, 0},
177 { "lsup", TLSUP, '\0', TGPOWER, 0},
178 { "lt", TLT, MS_LT, TGRELATION, 0},
179 { "magenta", TMAGENTA, '\0', TGCOLOR, 0},
180 { "maroon", TMAROON, '\0', TGCOLOR, 0},
181 { "matrix", TMATRIX, '\0', 0, 5},
182 { "minusplus", TMINUSPLUS, MS_MINUSPLUS, TGUNOPER | TGSUM, 5},
183 { "mline", TMLINE, MS_VERTLINE, 0, 0}, //! not in TGRBRACES, Level 0
184 { "nabla", TNABLA, MS_NABLA, TGSTANDALONE, 5},
185 { "navy", TNAVY, '\0', TGCOLOR, 0},
186 { "nbold", TNBOLD, '\0', TGFONTATTR, 5},
187 { "ndivides", TNDIVIDES, MS_NDIVIDES, TGRELATION, 0},
188 { "neg", TNEG, MS_NEG, TGUNOPER, 5 },
189 { "neq", TNEQ, MS_NEQ, TGRELATION, 0},
190 { "newline", TNEWLINE, '\0', 0, 0},
191 { "ni", TNI, MS_NI, TGRELATION, 0},
192 { "nitalic", TNITALIC, '\0', TGFONTATTR, 5},
193 { "none", TNONE, '\0', TGLBRACES | TGRBRACES, 0},
194 { "nospace", TNOSPACE, '\0', TGSTANDALONE, 5},
195 { "notin", TNOTIN, MS_NOTIN, TGRELATION, 0},
196 { "nroot", TNROOT, MS_SQRT, TGUNOPER, 5},
197 { "nsubset", TNSUBSET, MS_NSUBSET, TGRELATION, 0 },
198 { "nsupset", TNSUPSET, MS_NSUPSET, TGRELATION, 0 },
199 { "nsubseteq", TNSUBSETEQ, MS_NSUBSETEQ, TGRELATION, 0 },
200 { "nsupseteq", TNSUPSETEQ, MS_NSUPSETEQ, TGRELATION, 0 },
201 { "odivide", TODIVIDE, MS_ODIVIDE, TGPRODUCT, 0},
202 { "odot", TODOT, MS_ODOT, TGPRODUCT, 0},
203 { "olive", TOLIVE, '\0', TGCOLOR, 0},
204 { "ominus", TOMINUS, MS_OMINUS, TGSUM, 0},
205 { "oper", TOPER, '\0', TGOPER, 5},
206 { "oplus", TOPLUS, MS_OPLUS, TGSUM, 0},
207 { "or", TOR, MS_OR, TGSUM, 0},
208 { "ortho", TORTHO, MS_ORTHO, TGRELATION, 0},
209 { "otimes", TOTIMES, MS_OTIMES, TGPRODUCT, 0},
210 { "over", TOVER, '\0', TGPRODUCT, 0},
211 { "overbrace", TOVERBRACE, MS_OVERBRACE, TGPRODUCT, 5},
212 { "overline", TOVERLINE, '\0', TGATTRIBUT, 5},
213 { "overstrike", TOVERSTRIKE, '\0', TGATTRIBUT, 5},
214 { "owns", TNI, MS_NI, TGRELATION, 0},
215 { "parallel", TPARALLEL, MS_DLINE, TGRELATION, 0},
216 { "partial", TPARTIAL, MS_PARTIAL, TGSTANDALONE, 5 },
217 { "phantom", TPHANTOM, '\0', TGFONTATTR, 5},
218 { "plusminus", TPLUSMINUS, MS_PLUSMINUS, TGUNOPER | TGSUM, 5},
219 { "prec", TPRECEDES, MS_PRECEDES, TGRELATION, 0 },
220 { "preccurlyeq", TPRECEDESEQUAL, MS_PRECEDESEQUAL, TGRELATION, 0 },
221 { "precsim", TPRECEDESEQUIV, MS_PRECEDESEQUIV, TGRELATION, 0 },
222 { "nprec", TNOTPRECEDES, MS_NOTPRECEDES, TGRELATION, 0 },
223 { "prod", TPROD, MS_PROD, TGOPER, 5},
224 { "prop", TPROP, MS_PROP, TGRELATION, 0},
225 { "purple", TPURPLE, '\0', TGCOLOR, 0},
226 { "rangle", TRANGLE, MS_RMATHANGLE, TGRBRACES, 0}, //! 0 to terminate expression
227 { "rbrace", TRBRACE, MS_RBRACE, TGRBRACES, 0},
228 { "rceil", TRCEIL, MS_RCEIL, TGRBRACES, 0},
229 { "rdbracket", TRDBRACKET, MS_RDBRACKET, TGRBRACES, 0},
230 { "rdline", TRDLINE, MS_DVERTLINE, TGRBRACES, 0},
231 { "red", TRED, '\0', TGCOLOR, 0},
232 { "rfloor", TRFLOOR, MS_RFLOOR, TGRBRACES, 0}, //! 0 to terminate expression
233 { "right", TRIGHT, '\0', 0, 0},
234 { "rightarrow" , TRIGHTARROW, MS_RIGHTARROW, TGSTANDALONE, 5},
235 { "rline", TRLINE, MS_VERTLINE, TGRBRACES, 0}, //! 0 to terminate expression
236 { "rsub", TRSUB, '\0', TGPOWER, 0},
237 { "rsup", TRSUP, '\0', TGPOWER, 0},
238 { "sans", TSANS, '\0', TGFONT, 0},
239 { "serif", TSERIF, '\0', TGFONT, 0},
240 { "setC" , TSETC, MS_SETC, TGSTANDALONE, 5},
241 { "setN" , TSETN, MS_SETN, TGSTANDALONE, 5},
242 { "setQ" , TSETQ, MS_SETQ, TGSTANDALONE, 5},
243 { "setR" , TSETR, MS_SETR, TGSTANDALONE, 5},
244 { "setZ" , TSETZ, MS_SETZ, TGSTANDALONE, 5},
245 { "setminus", TBACKSLASH, MS_BACKSLASH, TGPRODUCT, 0 },
246 { "silver", TSILVER, '\0', TGCOLOR, 0},
247 { "sim", TSIM, MS_SIM, TGRELATION, 0},
248 { "simeq", TSIMEQ, MS_SIMEQ, TGRELATION, 0},
249 { "sin", TSIN, '\0', TGFUNCTION, 5},
250 { "sinh", TSINH, '\0', TGFUNCTION, 5},
251 { "size", TSIZE, '\0', TGFONTATTR, 5},
252 { "slash", TSLASH, MS_SLASH, TGPRODUCT, 0 },
253 { "sqrt", TSQRT, MS_SQRT, TGUNOPER, 5},
254 { "stack", TSTACK, '\0', 0, 5},
255 { "sub", TRSUB, '\0', TGPOWER, 0},
256 { "subset", TSUBSET, MS_SUBSET, TGRELATION, 0},
257 { "succ", TSUCCEEDS, MS_SUCCEEDS, TGRELATION, 0 },
258 { "succcurlyeq", TSUCCEEDSEQUAL, MS_SUCCEEDSEQUAL, TGRELATION, 0 },
259 { "succsim", TSUCCEEDSEQUIV, MS_SUCCEEDSEQUIV, TGRELATION, 0 },
260 { "nsucc", TNOTSUCCEEDS, MS_NOTSUCCEEDS, TGRELATION, 0 },
261 { "subseteq", TSUBSETEQ, MS_SUBSETEQ, TGRELATION, 0},
262 { "sum", TSUM, MS_SUM, TGOPER, 5},
263 { "sup", TRSUP, '\0', TGPOWER, 0},
264 { "supset", TSUPSET, MS_SUPSET, TGRELATION, 0},
265 { "supseteq", TSUPSETEQ, MS_SUPSETEQ, TGRELATION, 0},
266 { "tan", TTAN, '\0', TGFUNCTION, 5},
267 { "tanh", TTANH, '\0', TGFUNCTION, 5},
268 { "teal", TTEAL, '\0', TGCOLOR, 0},
269 { "tilde", TTILDE, MS_TILDE, TGATTRIBUT, 5},
270 { "times", TTIMES, MS_TIMES, TGPRODUCT, 0},
271 { "to", TTO, '\0', TGLIMIT, 0},
272 { "toward", TTOWARD, MS_RIGHTARROW, TGRELATION, 0},
273 { "transl", TTRANSL, MS_TRANSL, TGRELATION, 0},
274 { "transr", TTRANSR, MS_TRANSR, TGRELATION, 0},
275 { "underbrace", TUNDERBRACE, MS_UNDERBRACE, TGPRODUCT, 5},
276 { "underline", TUNDERLINE, '\0', TGATTRIBUT, 5},
277 { "union", TUNION, MS_UNION, TGSUM, 0},
278 { "uoper", TUOPER, '\0', TGUNOPER, 5},
279 { "uparrow" , TUPARROW, MS_UPARROW, TGSTANDALONE, 5},
280 { "vec", TVEC, MS_VEC, TGATTRIBUT, 5},
281 { "white", TWHITE, '\0', TGCOLOR, 0},
282 { "widebslash", TWIDEBACKSLASH, MS_BACKSLASH, TGPRODUCT, 0 },
283 { "widehat", TWIDEHAT, MS_HAT, TGATTRIBUT, 5},
284 { "widetilde", TWIDETILDE, MS_TILDE, TGATTRIBUT, 5},
285 { "wideslash", TWIDESLASH, MS_SLASH, TGPRODUCT, 0 },
286 { "widevec", TWIDEVEC, MS_VEC, TGATTRIBUT, 5},
287 { "wp" , TWP, MS_WP, TGSTANDALONE, 5},
288 { "yellow", TYELLOW, '\0', TGCOLOR, 0},
289 { "", TEND, '\0', 0, 0}
292 const SmTokenTableEntry * SmParser::GetTokenTableEntry( const OUString &rName )
294 const SmTokenTableEntry * pRes = 0;
295 if (!rName.isEmpty())
297 for (size_t i = 0; i < SAL_N_ELEMENTS(aTokenTable); ++i)
299 if (rName.equalsIgnoreAsciiCase( OUString::createFromAscii(aTokenTable[i].pIdent) ))
301 pRes = &aTokenTable[i];
302 break;
307 return pRes;
313 #if OSL_DEBUG_LEVEL > 1
315 static const sal_Unicode aDelimiterTable[] =
317 ' ', '\t', '\n', '\r', '+', '-', '*', '/', '=', '#',
318 '%', '\\', '"', '~', '`', '>', '<', '&', '|', '(',
319 ')', '{', '}', '[', ']', '^', '_',
320 '\0' // end of list symbol
323 bool SmParser::IsDelimiter( const OUString &rTxt, sal_Int32 nPos )
324 // returns 'true' iff cChar is '\0' or a delimiter
326 assert(nPos <= rTxt.getLength()); //index out of range
328 if (nPos == rTxt.getLength())
329 return true;
331 sal_Unicode cChar = rTxt[nPos];
333 // check if 'cChar' is in the delimiter table
334 const sal_Unicode *pDelim = &aDelimiterTable[0];
335 for ( ; *pDelim != 0; pDelim++)
336 if (*pDelim == cChar)
337 break;
340 sal_Int16 nTypJp = SM_MOD()->GetSysLocale().GetCharClass().getType( rTxt, nPos );
341 bool bIsDelim = (*pDelim != 0 ||
342 nTypJp == com::sun::star::i18n::UnicodeType::SPACE_SEPARATOR ||
343 nTypJp == com::sun::star::i18n::UnicodeType::CONTROL);
345 return bIsDelim;
348 #endif
350 void SmParser::Replace( sal_Int32 nPos, sal_Int32 nLen, const OUString &rText )
352 OSL_ENSURE( nPos + nLen <= m_aBufferString.getLength(), "argument mismatch" );
354 m_aBufferString = m_aBufferString.replaceAt( nPos, nLen, rText );
355 sal_Int32 nChg = rText.getLength() - nLen;
356 m_nBufferIndex = m_nBufferIndex + nChg;
357 m_nTokenIndex = m_nTokenIndex + nChg;
361 // First character may be any alphabetic
362 const sal_Int32 coStartFlags =
363 KParseTokens::ANY_LETTER |
364 KParseTokens::IGNORE_LEADING_WS;
366 // Continuing characters may be any alphabetic
367 const sal_Int32 coContFlags =
368 (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
369 | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
371 // user-defined char continuing characters may be any alphanumeric or dot.
372 const sal_Int32 coUserDefinedCharContFlags =
373 ((KParseTokens::ANY_LETTER_OR_NUMBER | KParseTokens::IGNORE_LEADING_WS | KParseTokens::ASC_DOT)
374 & ~KParseTokens::IGNORE_LEADING_WS)
375 | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
377 // First character for numbers, may be any numeric or dot
378 const sal_Int32 coNumStartFlags =
379 KParseTokens::ASC_DIGIT |
380 KParseTokens::ASC_DOT |
381 KParseTokens::IGNORE_LEADING_WS;
382 // Continuing characters for numbers, may be any numeric or dot.
383 const sal_Int32 coNumContFlags =
384 coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS;
386 void SmParser::NextToken()
388 static const OUString aEmptyStr;
390 sal_Int32 nBufLen = m_aBufferString.getLength();
391 ParseResult aRes;
392 sal_Int32 nRealStart;
393 bool bCont;
394 CharClass aCC(SM_MOD()->GetSysLocale().GetLanguageTag());
397 // skip white spaces
398 while (UnicodeType::SPACE_SEPARATOR ==
399 aCC.getType( m_aBufferString, m_nBufferIndex ))
400 ++m_nBufferIndex;
402 // Try to parse a number. This should be independent from the locale
403 // setting, so temporarily set the language to English.
404 // See https://bz.apache.org/ooo/show_bug.cgi?id=45779
405 LanguageTag aOldLoc(aCC.getLanguageTag());
406 aCC.setLanguageTag(LanguageTag(m_aDotLoc));
407 aRes = aCC.parsePredefinedToken(KParseType::ASC_NUMBER,
408 m_aBufferString, m_nBufferIndex,
409 coNumStartFlags, aEmptyStr,
410 coNumContFlags, aEmptyStr);
411 aCC.setLanguageTag(aOldLoc);
413 if (aRes.TokenType == 0)
415 // Try again with the default token parsing.
416 aRes = aCC.parseAnyToken(m_aBufferString, m_nBufferIndex,
417 coStartFlags, aEmptyStr,
418 coContFlags, aEmptyStr);
421 nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
422 m_nBufferIndex = nRealStart;
424 bCont = false;
425 if ( aRes.TokenType == 0 &&
426 nRealStart < nBufLen &&
427 '\n' == m_aBufferString[ nRealStart ] )
429 // keep data needed for tokens row and col entry up to date
430 ++m_Row;
431 m_nBufferIndex = m_nColOff = nRealStart + 1;
432 bCont = true;
434 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
436 if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
438 //SkipComment
439 m_nBufferIndex = nRealStart + 2;
440 while (m_nBufferIndex < nBufLen &&
441 '\n' != m_aBufferString[ m_nBufferIndex ])
442 ++m_nBufferIndex;
443 bCont = true;
447 } while (bCont);
449 // set index of current token
450 m_nTokenIndex = m_nBufferIndex;
452 m_aCurToken.nRow = m_Row;
453 m_aCurToken.nCol = nRealStart - m_nColOff + 1;
455 bool bHandled = true;
456 if (nRealStart >= nBufLen)
458 m_aCurToken.eType = TEND;
459 m_aCurToken.cMathChar = '\0';
460 m_aCurToken.nGroup = 0;
461 m_aCurToken.nLevel = 0;
462 m_aCurToken.aText.clear();
464 else if (aRes.TokenType & KParseType::ANY_NUMBER)
466 sal_Int32 n = aRes.EndPos - nRealStart;
467 OSL_ENSURE( n >= 0, "length < 0" );
468 m_aCurToken.eType = TNUMBER;
469 m_aCurToken.cMathChar = '\0';
470 m_aCurToken.nGroup = 0;
471 m_aCurToken.nLevel = 5;
472 m_aCurToken.aText = m_aBufferString.copy( nRealStart, n );
474 #if OSL_DEBUG_LEVEL > 1
475 if (!IsDelimiter( m_aBufferString, aRes.EndPos ))
476 SAL_WARN( "starmath", "identifier really finished? (compatibility!)" );
477 #endif
479 else if (aRes.TokenType & KParseType::DOUBLE_QUOTE_STRING)
481 m_aCurToken.eType = TTEXT;
482 m_aCurToken.cMathChar = '\0';
483 m_aCurToken.nGroup = 0;
484 m_aCurToken.nLevel = 5;
485 m_aCurToken.aText = aRes.DequotedNameOrString;
486 m_aCurToken.nRow = m_Row;
487 m_aCurToken.nCol = nRealStart - m_nColOff + 2;
489 else if (aRes.TokenType & KParseType::IDENTNAME)
491 sal_Int32 n = aRes.EndPos - nRealStart;
492 OSL_ENSURE( n >= 0, "length < 0" );
493 OUString aName( m_aBufferString.copy( nRealStart, n ) );
494 const SmTokenTableEntry *pEntry = GetTokenTableEntry( aName );
496 if (pEntry)
498 m_aCurToken.eType = pEntry->eType;
499 m_aCurToken.cMathChar = pEntry->cMathChar;
500 m_aCurToken.nGroup = pEntry->nGroup;
501 m_aCurToken.nLevel = pEntry->nLevel;
502 m_aCurToken.aText = OUString::createFromAscii( pEntry->pIdent );
504 else
506 m_aCurToken.eType = TIDENT;
507 m_aCurToken.cMathChar = '\0';
508 m_aCurToken.nGroup = 0;
509 m_aCurToken.nLevel = 5;
510 m_aCurToken.aText = aName;
512 #if OSL_DEBUG_LEVEL > 1
513 if (!IsDelimiter( m_aBufferString, aRes.EndPos ))
514 SAL_WARN( "starmath", "identifier really finished? (compatibility!)" );
515 #endif
518 else if (aRes.TokenType == 0 && '_' == m_aBufferString[ nRealStart ])
520 m_aCurToken.eType = TRSUB;
521 m_aCurToken.cMathChar = '\0';
522 m_aCurToken.nGroup = TGPOWER;
523 m_aCurToken.nLevel = 0;
524 m_aCurToken.aText = "_";
526 aRes.EndPos = nRealStart + 1;
528 else if (aRes.TokenType & KParseType::BOOLEAN)
530 sal_Int32 &rnEndPos = aRes.EndPos;
531 if (rnEndPos - nRealStart <= 2)
533 sal_Unicode ch = m_aBufferString[ nRealStart ];
534 switch (ch)
536 case '<':
538 if (m_aBufferString.match("<<", nRealStart))
540 m_aCurToken.eType = TLL;
541 m_aCurToken.cMathChar = MS_LL;
542 m_aCurToken.nGroup = TGRELATION;
543 m_aCurToken.nLevel = 0;
544 m_aCurToken.aText = "<<";
546 rnEndPos = nRealStart + 2;
548 else if (m_aBufferString.match("<=", nRealStart))
550 m_aCurToken.eType = TLE;
551 m_aCurToken.cMathChar = MS_LE;
552 m_aCurToken.nGroup = TGRELATION;
553 m_aCurToken.nLevel = 0;
554 m_aCurToken.aText = "<=";
556 rnEndPos = nRealStart + 2;
558 else if (m_aBufferString.match("<-", nRealStart))
560 m_aCurToken.eType = TLEFTARROW;
561 m_aCurToken.cMathChar = MS_LEFTARROW;
562 m_aCurToken.nGroup = TGSTANDALONE;
563 m_aCurToken.nLevel = 5;
564 m_aCurToken.aText = "<-";
566 rnEndPos = nRealStart + 2;
568 else if (m_aBufferString.match("<>", nRealStart))
570 m_aCurToken.eType = TNEQ;
571 m_aCurToken.cMathChar = MS_NEQ;
572 m_aCurToken.nGroup = TGRELATION;
573 m_aCurToken.nLevel = 0;
574 m_aCurToken.aText = "<>";
576 rnEndPos = nRealStart + 2;
578 else if (m_aBufferString.match("<?>", nRealStart))
580 m_aCurToken.eType = TPLACE;
581 m_aCurToken.cMathChar = MS_PLACE;
582 m_aCurToken.nGroup = 0;
583 m_aCurToken.nLevel = 5;
584 m_aCurToken.aText = "<?>";
586 rnEndPos = nRealStart + 3;
588 else
590 m_aCurToken.eType = TLT;
591 m_aCurToken.cMathChar = MS_LT;
592 m_aCurToken.nGroup = TGRELATION;
593 m_aCurToken.nLevel = 0;
594 m_aCurToken.aText = "<";
597 break;
598 case '>':
600 if (m_aBufferString.match(">=", nRealStart))
602 m_aCurToken.eType = TGE;
603 m_aCurToken.cMathChar = MS_GE;
604 m_aCurToken.nGroup = TGRELATION;
605 m_aCurToken.nLevel = 0;
606 m_aCurToken.aText = ">=";
608 rnEndPos = nRealStart + 2;
610 else if (m_aBufferString.match(">>", nRealStart))
612 m_aCurToken.eType = TGG;
613 m_aCurToken.cMathChar = MS_GG;
614 m_aCurToken.nGroup = TGRELATION;
615 m_aCurToken.nLevel = 0;
616 m_aCurToken.aText = ">>";
618 rnEndPos = nRealStart + 2;
620 else
622 m_aCurToken.eType = TGT;
623 m_aCurToken.cMathChar = MS_GT;
624 m_aCurToken.nGroup = TGRELATION;
625 m_aCurToken.nLevel = 0;
626 m_aCurToken.aText = ">";
629 break;
630 default:
631 bHandled = false;
635 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
637 sal_Int32 &rnEndPos = aRes.EndPos;
638 if (rnEndPos - nRealStart == 1)
640 sal_Unicode ch = m_aBufferString[ nRealStart ];
641 switch (ch)
643 case '%':
645 //! modifies aRes.EndPos
647 OSL_ENSURE( rnEndPos >= nBufLen ||
648 '%' != m_aBufferString[ rnEndPos ],
649 "unexpected comment start" );
651 // get identifier of user-defined character
652 ParseResult aTmpRes = aCC.parseAnyToken(
653 m_aBufferString, rnEndPos,
654 KParseTokens::ANY_LETTER,
655 aEmptyStr,
656 coUserDefinedCharContFlags,
657 aEmptyStr );
659 sal_Int32 nTmpStart = rnEndPos + aTmpRes.LeadingWhiteSpace;
661 // default setting for the case that no identifier
662 // i.e. a valid symbol-name is following the '%'
663 // character
664 m_aCurToken.eType = TTEXT;
665 m_aCurToken.cMathChar = '\0';
666 m_aCurToken.nGroup = 0;
667 m_aCurToken.nLevel = 5;
668 m_aCurToken.aText.clear();
669 m_aCurToken.nRow = m_Row;
670 m_aCurToken.nCol = nTmpStart - m_nColOff;
672 if (aTmpRes.TokenType & KParseType::IDENTNAME)
675 sal_Int32 n = aTmpRes.EndPos - nTmpStart;
676 m_aCurToken.eType = TSPECIAL;
677 m_aCurToken.aText = m_aBufferString.copy( nTmpStart-1, n+1 );
679 OSL_ENSURE( aTmpRes.EndPos > rnEndPos,
680 "empty identifier" );
681 if (aTmpRes.EndPos > rnEndPos)
682 rnEndPos = aTmpRes.EndPos;
683 else
684 ++rnEndPos;
687 // if no symbol-name was found we start-over with
688 // finding the next token right afer the '%' sign.
689 // I.e. we leave rnEndPos unmodified.
691 break;
692 case '[':
694 m_aCurToken.eType = TLBRACKET;
695 m_aCurToken.cMathChar = MS_LBRACKET;
696 m_aCurToken.nGroup = TGLBRACES;
697 m_aCurToken.nLevel = 5;
698 m_aCurToken.aText = "[";
700 break;
701 case '\\':
703 m_aCurToken.eType = TESCAPE;
704 m_aCurToken.cMathChar = '\0';
705 m_aCurToken.nGroup = 0;
706 m_aCurToken.nLevel = 5;
707 m_aCurToken.aText = "\\";
709 break;
710 case ']':
712 m_aCurToken.eType = TRBRACKET;
713 m_aCurToken.cMathChar = MS_RBRACKET;
714 m_aCurToken.nGroup = TGRBRACES;
715 m_aCurToken.nLevel = 0;
716 m_aCurToken.aText = "]";
718 break;
719 case '^':
721 m_aCurToken.eType = TRSUP;
722 m_aCurToken.cMathChar = '\0';
723 m_aCurToken.nGroup = TGPOWER;
724 m_aCurToken.nLevel = 0;
725 m_aCurToken.aText = "^";
727 break;
728 case '`':
730 m_aCurToken.eType = TSBLANK;
731 m_aCurToken.cMathChar = '\0';
732 m_aCurToken.nGroup = TGBLANK;
733 m_aCurToken.nLevel = 5;
734 m_aCurToken.aText = "`";
736 break;
737 case '{':
739 m_aCurToken.eType = TLGROUP;
740 m_aCurToken.cMathChar = MS_LBRACE;
741 m_aCurToken.nGroup = 0;
742 m_aCurToken.nLevel = 5;
743 m_aCurToken.aText = "{";
745 break;
746 case '|':
748 m_aCurToken.eType = TOR;
749 m_aCurToken.cMathChar = MS_OR;
750 m_aCurToken.nGroup = TGSUM;
751 m_aCurToken.nLevel = 0;
752 m_aCurToken.aText = "|";
754 break;
755 case '}':
757 m_aCurToken.eType = TRGROUP;
758 m_aCurToken.cMathChar = MS_RBRACE;
759 m_aCurToken.nGroup = 0;
760 m_aCurToken.nLevel = 0;
761 m_aCurToken.aText = "}";
763 break;
764 case '~':
766 m_aCurToken.eType = TBLANK;
767 m_aCurToken.cMathChar = '\0';
768 m_aCurToken.nGroup = TGBLANK;
769 m_aCurToken.nLevel = 5;
770 m_aCurToken.aText = "~";
772 break;
773 case '#':
775 if (m_aBufferString.match("##", nRealStart))
777 m_aCurToken.eType = TDPOUND;
778 m_aCurToken.cMathChar = '\0';
779 m_aCurToken.nGroup = 0;
780 m_aCurToken.nLevel = 0;
781 m_aCurToken.aText = "##";
783 rnEndPos = nRealStart + 2;
785 else
787 m_aCurToken.eType = TPOUND;
788 m_aCurToken.cMathChar = '\0';
789 m_aCurToken.nGroup = 0;
790 m_aCurToken.nLevel = 0;
791 m_aCurToken.aText = "#";
794 break;
795 case '&':
797 m_aCurToken.eType = TAND;
798 m_aCurToken.cMathChar = MS_AND;
799 m_aCurToken.nGroup = TGPRODUCT;
800 m_aCurToken.nLevel = 0;
801 m_aCurToken.aText = "&";
803 break;
804 case '(':
806 m_aCurToken.eType = TLPARENT;
807 m_aCurToken.cMathChar = MS_LPARENT;
808 m_aCurToken.nGroup = TGLBRACES;
809 m_aCurToken.nLevel = 5; //! 0 to continue expression
810 m_aCurToken.aText = "(";
812 break;
813 case ')':
815 m_aCurToken.eType = TRPARENT;
816 m_aCurToken.cMathChar = MS_RPARENT;
817 m_aCurToken.nGroup = TGRBRACES;
818 m_aCurToken.nLevel = 0; //! 0 to terminate expression
819 m_aCurToken.aText = ")";
821 break;
822 case '*':
824 m_aCurToken.eType = TMULTIPLY;
825 m_aCurToken.cMathChar = MS_MULTIPLY;
826 m_aCurToken.nGroup = TGPRODUCT;
827 m_aCurToken.nLevel = 0;
828 m_aCurToken.aText = "*";
830 break;
831 case '+':
833 if (m_aBufferString.match("+-", nRealStart))
835 m_aCurToken.eType = TPLUSMINUS;
836 m_aCurToken.cMathChar = MS_PLUSMINUS;
837 m_aCurToken.nGroup = TGUNOPER | TGSUM;
838 m_aCurToken.nLevel = 5;
839 m_aCurToken.aText = "+-";
841 rnEndPos = nRealStart + 2;
843 else
845 m_aCurToken.eType = TPLUS;
846 m_aCurToken.cMathChar = MS_PLUS;
847 m_aCurToken.nGroup = TGUNOPER | TGSUM;
848 m_aCurToken.nLevel = 5;
849 m_aCurToken.aText = "+";
852 break;
853 case '-':
855 if (m_aBufferString.match("-+", nRealStart))
857 m_aCurToken.eType = TMINUSPLUS;
858 m_aCurToken.cMathChar = MS_MINUSPLUS;
859 m_aCurToken.nGroup = TGUNOPER | TGSUM;
860 m_aCurToken.nLevel = 5;
861 m_aCurToken.aText = "-+";
863 rnEndPos = nRealStart + 2;
865 else if (m_aBufferString.match("->", nRealStart))
867 m_aCurToken.eType = TRIGHTARROW;
868 m_aCurToken.cMathChar = MS_RIGHTARROW;
869 m_aCurToken.nGroup = TGSTANDALONE;
870 m_aCurToken.nLevel = 5;
871 m_aCurToken.aText = "->";
873 rnEndPos = nRealStart + 2;
875 else
877 m_aCurToken.eType = TMINUS;
878 m_aCurToken.cMathChar = MS_MINUS;
879 m_aCurToken.nGroup = TGUNOPER | TGSUM;
880 m_aCurToken.nLevel = 5;
881 m_aCurToken.aText = "-";
884 break;
885 case '.':
887 // Only one character? Then it can't be a number.
888 if (m_nBufferIndex < m_aBufferString.getLength() - 1)
890 // for compatibility with SO5.2
891 // texts like .34 ...56 ... h ...78..90
892 // will be treated as numbers
893 m_aCurToken.eType = TNUMBER;
894 m_aCurToken.cMathChar = '\0';
895 m_aCurToken.nGroup = 0;
896 m_aCurToken.nLevel = 5;
898 sal_Int32 nTxtStart = m_nBufferIndex;
899 sal_Unicode cChar;
900 // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only
903 cChar = m_aBufferString[ ++m_nBufferIndex ];
905 while ( (cChar == '.' || rtl::isAsciiDigit( cChar )) &&
906 ( m_nBufferIndex < m_aBufferString.getLength() - 1 ) );
908 m_aCurToken.aText = m_aBufferString.copy( nTxtStart, m_nBufferIndex - nTxtStart );
909 aRes.EndPos = m_nBufferIndex;
911 else
912 bHandled = false;
914 break;
915 case '/':
917 m_aCurToken.eType = TDIVIDEBY;
918 m_aCurToken.cMathChar = MS_SLASH;
919 m_aCurToken.nGroup = TGPRODUCT;
920 m_aCurToken.nLevel = 0;
921 m_aCurToken.aText = "/";
923 break;
924 case '=':
926 m_aCurToken.eType = TASSIGN;
927 m_aCurToken.cMathChar = MS_ASSIGN;
928 m_aCurToken.nGroup = TGRELATION;
929 m_aCurToken.nLevel = 0;
930 m_aCurToken.aText = "=";
932 break;
933 default:
934 bHandled = false;
938 else
939 bHandled = false;
941 if (!bHandled)
943 m_aCurToken.eType = TCHARACTER;
944 m_aCurToken.cMathChar = '\0';
945 m_aCurToken.nGroup = 0;
946 m_aCurToken.nLevel = 5;
947 m_aCurToken.aText = m_aBufferString.copy( nRealStart, 1 );
949 aRes.EndPos = nRealStart + 1;
952 if (TEND != m_aCurToken.eType)
953 m_nBufferIndex = aRes.EndPos;
958 // grammar
962 void SmParser::Table()
964 SmNodeArray LineArray;
966 Line();
967 while (m_aCurToken.eType == TNEWLINE)
969 NextToken();
970 Line();
973 if (m_aCurToken.eType != TEND)
974 Error(PE_UNEXPECTED_CHAR);
976 auto n = m_aNodeStack.size();
978 LineArray.resize(n);
980 for (size_t i = 0; i < n; i++)
982 auto pNode = m_aNodeStack.pop_front();
983 LineArray[n - (i + 1)] = pNode.release();
986 SmStructureNode *pSNode = new SmTableNode(m_aCurToken);
987 pSNode->SetSubNodes(LineArray);
988 m_aNodeStack.push_front(pSNode);
992 void SmParser::Align()
993 // parse alignment info (if any), then go on with rest of expression
995 SmStructureNode *pSNode = 0;
997 if (TokenInGroup(TGALIGN))
999 pSNode = new SmAlignNode(m_aCurToken);
1001 NextToken();
1003 // allow for just one align statement in 5.0
1004 if (TokenInGroup(TGALIGN))
1006 Error(PE_DOUBLE_ALIGN);
1007 delete pSNode;
1008 return;
1012 Expression();
1014 if (pSNode)
1016 pSNode->SetSubNodes(popOrZero(m_aNodeStack), 0);
1017 m_aNodeStack.push_front(pSNode);
1022 void SmParser::Line()
1024 sal_uInt16 n = 0;
1025 SmNodeArray ExpressionArray;
1027 ExpressionArray.resize(n);
1029 // start with single expression that may have an alignment statement
1030 // (and go on with expressions that must not have alignment
1031 // statements in 'while' loop below. See also 'Expression()'.)
1032 if (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
1033 { Align();
1034 ExpressionArray.resize(++n);
1035 ExpressionArray[n - 1] = popOrZero(m_aNodeStack);
1038 while (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
1040 Expression();
1041 ExpressionArray.resize(++n);
1042 ExpressionArray[n - 1] = popOrZero(m_aNodeStack);
1045 //If there's no expression, add an empty one.
1046 //this is to avoid a formula tree without any caret
1047 //positions, in visual formula editor.
1048 if(ExpressionArray.empty())
1050 SmToken aTok = SmToken();
1051 aTok.eType = TNEWLINE;
1052 ExpressionArray.push_back(new SmExpressionNode(aTok));
1055 SmStructureNode *pSNode = new SmLineNode(m_aCurToken);
1056 pSNode->SetSubNodes(ExpressionArray);
1057 m_aNodeStack.push_front(pSNode);
1061 void SmParser::Expression()
1063 bool bUseExtraSpaces = true;
1064 if (!m_aNodeStack.empty())
1066 auto pNode = m_aNodeStack.pop_front();
1067 if (pNode->GetToken().eType == TNOSPACE)
1068 bUseExtraSpaces = false;
1069 else
1070 m_aNodeStack.push_front(pNode.release()); // push the node from above again (now to be used as argument to this current 'nospace' node)
1073 sal_uInt16 n = 0;
1074 SmNodeArray RelationArray;
1076 RelationArray.resize(n);
1078 Relation();
1079 RelationArray.resize(++n);
1080 RelationArray[n - 1] = popOrZero(m_aNodeStack);
1082 while (m_aCurToken.nLevel >= 4)
1083 { Relation();
1084 RelationArray.resize(++n);
1085 RelationArray[n - 1] = popOrZero(m_aNodeStack);
1088 if (n > 1)
1090 SmExpressionNode *pSNode = new SmExpressionNode(m_aCurToken);
1091 pSNode->SetSubNodes(RelationArray);
1092 pSNode->SetUseExtraSpaces(bUseExtraSpaces);
1093 m_aNodeStack.push_front(pSNode);
1095 else
1097 // This expression has only one node so just push this node.
1098 m_aNodeStack.push_front(RelationArray[0]);
1103 void SmParser::Relation()
1105 Sum();
1106 while (TokenInGroup(TGRELATION))
1108 SmStructureNode *pSNode = new SmBinHorNode(m_aCurToken);
1109 SmNode *pFirst = popOrZero(m_aNodeStack);
1111 OpSubSup();
1112 SmNode *pSecond = popOrZero(m_aNodeStack);
1114 Sum();
1116 pSNode->SetSubNodes(pFirst, pSecond, popOrZero(m_aNodeStack));
1117 m_aNodeStack.push_front(pSNode);
1122 void SmParser::Sum()
1124 Product();
1125 while (TokenInGroup(TGSUM))
1127 SmStructureNode *pSNode = new SmBinHorNode(m_aCurToken);
1128 SmNode *pFirst = popOrZero(m_aNodeStack);
1130 OpSubSup();
1131 SmNode *pSecond = popOrZero(m_aNodeStack);
1133 Product();
1135 pSNode->SetSubNodes(pFirst, pSecond, popOrZero(m_aNodeStack));
1136 m_aNodeStack.push_front(pSNode);
1141 void SmParser::Product()
1143 Power();
1145 while (TokenInGroup(TGPRODUCT))
1146 { SmStructureNode *pSNode;
1147 SmNode *pFirst = popOrZero(m_aNodeStack),
1148 *pOper;
1149 bool bSwitchArgs = false;
1151 SmTokenType eType = m_aCurToken.eType;
1152 switch (eType)
1154 case TOVER:
1155 pSNode = new SmBinVerNode(m_aCurToken);
1156 pOper = new SmRectangleNode(m_aCurToken);
1157 NextToken();
1158 break;
1160 case TBOPER:
1161 pSNode = new SmBinHorNode(m_aCurToken);
1163 NextToken();
1165 //Let the glyph node know it's a binary operation
1166 m_aCurToken.eType = TBOPER;
1167 m_aCurToken.nGroup = TGPRODUCT;
1169 GlyphSpecial();
1170 pOper = popOrZero(m_aNodeStack);
1171 break;
1173 case TOVERBRACE :
1174 case TUNDERBRACE :
1175 pSNode = new SmVerticalBraceNode(m_aCurToken);
1176 pOper = new SmMathSymbolNode(m_aCurToken);
1178 NextToken();
1179 break;
1181 case TWIDEBACKSLASH:
1182 case TWIDESLASH:
1184 SmBinDiagonalNode *pSTmp = new SmBinDiagonalNode(m_aCurToken);
1185 pSTmp->SetAscending(eType == TWIDESLASH);
1186 pSNode = pSTmp;
1188 pOper = new SmPolyLineNode(m_aCurToken);
1189 NextToken();
1191 bSwitchArgs = true;
1192 break;
1195 default:
1196 pSNode = new SmBinHorNode(m_aCurToken);
1198 OpSubSup();
1199 pOper = popOrZero(m_aNodeStack);
1202 Power();
1204 if (bSwitchArgs)
1206 //! vgl siehe SmBinDiagonalNode::Arrange
1207 pSNode->SetSubNodes(pFirst, popOrZero(m_aNodeStack), pOper);
1209 else
1211 pSNode->SetSubNodes(pFirst, pOper, popOrZero(m_aNodeStack));
1213 m_aNodeStack.push_front(pSNode);
1218 void SmParser::SubSup(sal_uLong nActiveGroup)
1220 OSL_ENSURE(nActiveGroup == TGPOWER || nActiveGroup == TGLIMIT,
1221 "Sm: wrong token group");
1223 if (!TokenInGroup(nActiveGroup))
1224 // already finish
1225 return;
1227 SmSubSupNode *pNode = new SmSubSupNode(m_aCurToken);
1228 //! Of course 'm_aCurToken' is just the first sub-/supscript token.
1229 //! It should be of no further interest. The positions of the
1230 //! sub-/supscripts will be identified by the corresponding subnodes
1231 //! index in the 'aSubNodes' array (enum value from 'SmSubSup').
1233 pNode->SetUseLimits(nActiveGroup == TGLIMIT);
1235 // initialize subnodes array
1236 SmNodeArray aSubNodes;
1237 aSubNodes.resize(1 + SUBSUP_NUM_ENTRIES);
1238 aSubNodes[0] = popOrZero(m_aNodeStack);
1239 for (sal_uInt16 i = 1; i < aSubNodes.size(); i++)
1240 aSubNodes[i] = NULL;
1242 // process all sub-/supscripts
1243 int nIndex = 0;
1244 while (TokenInGroup(nActiveGroup))
1245 { SmTokenType eType (m_aCurToken.eType);
1247 // skip sub-/supscript token
1248 NextToken();
1250 // get sub-/supscript node on top of stack
1251 if (eType == TFROM || eType == TTO)
1253 // parse limits in old 4.0 and 5.0 style
1254 Relation();
1256 else
1257 Term(true);
1259 switch (eType)
1260 { case TRSUB : nIndex = (int) RSUB; break;
1261 case TRSUP : nIndex = (int) RSUP; break;
1262 case TFROM :
1263 case TCSUB : nIndex = (int) CSUB; break;
1264 case TTO :
1265 case TCSUP : nIndex = (int) CSUP; break;
1266 case TLSUB : nIndex = (int) LSUB; break;
1267 case TLSUP : nIndex = (int) LSUP; break;
1268 default :
1269 SAL_WARN( "starmath", "unknown case");
1271 nIndex++;
1272 OSL_ENSURE(1 <= nIndex && nIndex <= 1 + SUBSUP_NUM_ENTRIES,
1273 "SmParser::Power() : sub-/supscript index falsch");
1275 // set sub-/supscript if not already done
1276 if (aSubNodes[nIndex] != NULL)
1277 Error(PE_DOUBLE_SUBSUPSCRIPT);
1278 aSubNodes[nIndex] = popOrZero(m_aNodeStack);
1281 pNode->SetSubNodes(aSubNodes);
1282 m_aNodeStack.push_front(pNode);
1286 void SmParser::OpSubSup()
1288 // push operator symbol
1289 m_aNodeStack.push_front(new SmMathSymbolNode(m_aCurToken));
1290 // skip operator token
1291 NextToken();
1292 // get sub- supscripts if any
1293 if (TokenInGroup(TGPOWER))
1294 SubSup(TGPOWER);
1298 void SmParser::Power()
1300 // get body for sub- supscripts on top of stack
1301 Term(false);
1303 SubSup(TGPOWER);
1307 void SmParser::Blank()
1309 OSL_ENSURE(TokenInGroup(TGBLANK), "Sm : wrong token");
1310 SmBlankNode *pBlankNode = new SmBlankNode(m_aCurToken);
1312 while (TokenInGroup(TGBLANK))
1314 pBlankNode->IncreaseBy(m_aCurToken);
1315 NextToken();
1318 // Ignore trailing spaces, if corresponding option is set
1319 if ( m_aCurToken.eType == TNEWLINE ||
1320 (m_aCurToken.eType == TEND && SM_MOD()->GetConfig()->IsIgnoreSpacesRight()) )
1322 pBlankNode->Clear();
1325 m_aNodeStack.push_front(pBlankNode);
1329 void SmParser::Term(bool bGroupNumberIdent)
1331 switch (m_aCurToken.eType)
1333 case TESCAPE :
1334 Escape();
1335 break;
1337 case TNOSPACE :
1338 case TLGROUP :
1340 bool bNoSpace = m_aCurToken.eType == TNOSPACE;
1341 if (bNoSpace) // push 'no space' node and continue to parse expression
1343 m_aNodeStack.push_front(new SmExpressionNode(m_aCurToken));
1344 NextToken();
1346 if (m_aCurToken.eType != TLGROUP)
1348 m_aNodeStack.pop_front(); // get rid of the 'no space' node pushed above
1349 Term(false);
1351 else
1353 NextToken();
1355 // allow for empty group
1356 if (m_aCurToken.eType == TRGROUP)
1358 if (bNoSpace) // get rid of the 'no space' node pushed above
1359 m_aNodeStack.pop_front();
1360 SmStructureNode *pSNode = new SmExpressionNode(m_aCurToken);
1361 pSNode->SetSubNodes(NULL, NULL);
1362 m_aNodeStack.push_front(pSNode);
1364 NextToken();
1366 else // go as usual
1368 Align();
1369 if (m_aCurToken.eType != TRGROUP)
1370 Error(PE_RGROUP_EXPECTED);
1371 else
1372 NextToken();
1376 break;
1378 case TLEFT :
1379 Brace();
1380 break;
1382 case TBLANK :
1383 case TSBLANK :
1384 Blank();
1385 break;
1387 case TTEXT :
1388 m_aNodeStack.push_front(new SmTextNode(m_aCurToken, FNT_TEXT));
1389 NextToken();
1390 break;
1391 case TCHARACTER :
1392 m_aNodeStack.push_front(new SmTextNode(m_aCurToken, FNT_VARIABLE));
1393 NextToken();
1394 break;
1395 case TIDENT :
1396 case TNUMBER :
1398 m_aNodeStack.push_front(new SmTextNode(m_aCurToken,
1399 m_aCurToken.eType == TNUMBER ?
1400 FNT_NUMBER :
1401 FNT_VARIABLE));
1402 if (!bGroupNumberIdent)
1404 NextToken();
1406 else
1408 // Some people want to be able to write "x_2n" for "x_{2n}"
1409 // although e.g. LaTeX or AsciiMath interpret that as "x_2 n".
1410 // The tokenizer skips whitespaces so we need some additional
1411 // work to distinguish from "x_2 n".
1412 // See https://bz.apache.org/ooo/show_bug.cgi?id=11752 and
1413 // https://bugs.libreoffice.org/show_bug.cgi?id=55853
1414 sal_Int32 nBufLen = m_aBufferString.getLength();
1415 CharClass aCC(SM_MOD()->GetSysLocale().GetLanguageTag());
1416 sal_Int32 nTokens = 1;
1418 // We need to be careful to call NextToken() only after having
1419 // tested for a whitespace separator (otherwise it will be
1420 // skipped!)
1421 bool moveToNextToken = true;
1422 while (m_nBufferIndex < nBufLen &&
1423 aCC.getType(m_aBufferString, m_nBufferIndex) !=
1424 UnicodeType::SPACE_SEPARATOR)
1426 NextToken();
1427 if (m_aCurToken.eType != TNUMBER &&
1428 m_aCurToken.eType != TIDENT)
1430 // Neither a number nor an indentifier. We just moved to
1431 // the next token, so no need to do that again.
1432 moveToNextToken = false;
1433 break;
1435 m_aNodeStack.push_front(new SmTextNode(m_aCurToken,
1436 m_aCurToken.eType ==
1437 TNUMBER ?
1438 FNT_NUMBER :
1439 FNT_VARIABLE));
1440 nTokens++;
1442 if (moveToNextToken) NextToken();
1443 if (nTokens > 1)
1445 // We have several concatenated identifiers and numbers.
1446 // Let's group them into one SmExpressionNode.
1447 SmNodeArray nodeArray;
1448 nodeArray.resize(nTokens);
1449 while (nTokens > 0)
1451 nodeArray[nTokens-1] = popOrZero(m_aNodeStack);
1452 nTokens--;
1454 SmExpressionNode* pNode = new SmExpressionNode(SmToken());
1455 pNode->SetSubNodes(nodeArray);
1456 m_aNodeStack.push_front(pNode);
1459 break;
1461 case TLEFTARROW :
1462 case TRIGHTARROW :
1463 case TUPARROW :
1464 case TDOWNARROW :
1465 case TCIRC :
1466 case TDRARROW :
1467 case TDLARROW :
1468 case TDLRARROW :
1469 case TEXISTS :
1470 case TNOTEXISTS :
1471 case TFORALL :
1472 case TPARTIAL :
1473 case TNABLA :
1474 case TTOWARD :
1475 case TDOTSAXIS :
1476 case TDOTSDIAG :
1477 case TDOTSDOWN :
1478 case TDOTSLOW :
1479 case TDOTSUP :
1480 case TDOTSVERT :
1481 m_aNodeStack.push_front(new SmMathSymbolNode(m_aCurToken));
1482 NextToken();
1483 break;
1485 case TSETN :
1486 case TSETZ :
1487 case TSETQ :
1488 case TSETR :
1489 case TSETC :
1490 case THBAR :
1491 case TLAMBDABAR :
1492 case TBACKEPSILON :
1493 case TALEPH :
1494 case TIM :
1495 case TRE :
1496 case TWP :
1497 case TEMPTYSET :
1498 case TINFINITY :
1499 m_aNodeStack.push_front(new SmMathIdentifierNode(m_aCurToken));
1500 NextToken();
1501 break;
1503 case TPLACE:
1504 m_aNodeStack.push_front(new SmPlaceNode(m_aCurToken));
1505 NextToken();
1506 break;
1508 case TSPECIAL:
1509 Special();
1510 break;
1512 case TBINOM:
1513 Binom();
1514 break;
1516 case TSTACK:
1517 Stack();
1518 break;
1520 case TMATRIX:
1521 Matrix();
1522 break;
1524 default:
1525 if (TokenInGroup(TGLBRACES))
1526 { Brace();
1528 else if (TokenInGroup(TGOPER))
1529 { Operator();
1531 else if (TokenInGroup(TGUNOPER))
1532 { UnOper();
1534 else if ( TokenInGroup(TGATTRIBUT)
1535 || TokenInGroup(TGFONTATTR))
1536 { SmStructureNodeArray aArray;
1538 bool bIsAttr;
1539 sal_uInt16 n = 0;
1540 while ( (bIsAttr = TokenInGroup(TGATTRIBUT))
1541 || TokenInGroup(TGFONTATTR))
1542 { aArray.resize(n + 1);
1544 if (bIsAttr)
1545 Attribut();
1546 else
1547 FontAttribut();
1549 SmNode* pTmp = popOrZero(m_aNodeStack);
1551 // check if casting in following line is ok
1552 OSL_ENSURE(pTmp && !pTmp->IsVisible(), "Sm : Ooops...");
1554 aArray[n] = static_cast<SmStructureNode *>(pTmp);
1555 n++;
1558 Power();
1560 SmNode *pFirstNode = popOrZero(m_aNodeStack);
1561 while (n > 0)
1562 { aArray[n - 1]->SetSubNodes(0, pFirstNode);
1563 pFirstNode = aArray[n - 1];
1564 n--;
1566 m_aNodeStack.push_front(pFirstNode);
1568 else if (TokenInGroup(TGFUNCTION))
1570 Function();
1572 else
1573 Error(PE_UNEXPECTED_CHAR);
1578 void SmParser::Escape()
1580 NextToken();
1582 switch (m_aCurToken.eType)
1584 case TLPARENT :
1585 case TRPARENT :
1586 case TLBRACKET :
1587 case TRBRACKET :
1588 case TLDBRACKET :
1589 case TRDBRACKET :
1590 case TLBRACE :
1591 case TLGROUP :
1592 case TRBRACE :
1593 case TRGROUP :
1594 case TLANGLE :
1595 case TRANGLE :
1596 case TLCEIL :
1597 case TRCEIL :
1598 case TLFLOOR :
1599 case TRFLOOR :
1600 case TLLINE :
1601 case TRLINE :
1602 case TLDLINE :
1603 case TRDLINE :
1604 break;
1605 default:
1606 Error(PE_UNEXPECTED_TOKEN);
1609 SmNode *pNode = new SmMathSymbolNode(m_aCurToken);
1610 m_aNodeStack.push_front(pNode);
1612 NextToken();
1616 void SmParser::Operator()
1618 if (TokenInGroup(TGOPER))
1619 { SmStructureNode *pSNode = new SmOperNode(m_aCurToken);
1621 // put operator on top of stack
1622 Oper();
1624 if (TokenInGroup(TGLIMIT) || TokenInGroup(TGPOWER))
1625 SubSup(m_aCurToken.nGroup);
1626 SmNode *pOperator = popOrZero(m_aNodeStack);
1628 // get argument
1629 Power();
1631 pSNode->SetSubNodes(pOperator, popOrZero(m_aNodeStack));
1632 m_aNodeStack.push_front(pSNode);
1637 void SmParser::Oper()
1639 SmTokenType eType (m_aCurToken.eType);
1640 SmNode *pNode = NULL;
1642 switch (eType)
1644 case TSUM :
1645 case TPROD :
1646 case TCOPROD :
1647 case TINT :
1648 case TIINT :
1649 case TIIINT :
1650 case TLINT :
1651 case TLLINT :
1652 case TLLLINT :
1653 pNode = new SmMathSymbolNode(m_aCurToken);
1654 break;
1656 case TLIM :
1657 case TLIMSUP :
1658 case TLIMINF :
1660 const sal_Char* pLim = 0;
1661 switch (eType)
1663 case TLIM : pLim = "lim"; break;
1664 case TLIMSUP : pLim = "lim sup"; break;
1665 case TLIMINF : pLim = "lim inf"; break;
1666 default:
1667 break;
1669 if( pLim )
1670 m_aCurToken.aText = OUString::createFromAscii(pLim);
1671 pNode = new SmTextNode(m_aCurToken, FNT_TEXT);
1673 break;
1675 case TOPER :
1676 NextToken();
1678 OSL_ENSURE(m_aCurToken.eType == TSPECIAL, "Sm: wrong token");
1679 pNode = new SmGlyphSpecialNode(m_aCurToken);
1680 break;
1682 default :
1683 assert(false && "unknown case");
1685 m_aNodeStack.push_front(pNode);
1687 NextToken();
1691 void SmParser::UnOper()
1693 OSL_ENSURE(TokenInGroup(TGUNOPER), "Sm: wrong token");
1695 SmToken aNodeToken = m_aCurToken;
1696 SmTokenType eType = m_aCurToken.eType;
1697 bool bIsPostfix = eType == TFACT;
1699 SmStructureNode *pSNode;
1700 SmNode *pOper = 0,
1701 *pExtra = 0,
1702 *pArg;
1704 switch (eType)
1706 case TABS :
1707 case TSQRT :
1708 /* Dynamic integrals are handled as unary operators so we can wrap
1709 the symbol together with the body in a upper level node and make
1710 proper graphic arrangements */
1711 case TINTD:
1712 NextToken();
1713 break;
1715 case TNROOT :
1716 NextToken();
1717 Power();
1718 pExtra = popOrZero(m_aNodeStack);
1719 break;
1721 case TUOPER :
1722 NextToken();
1723 //Let the glyph know what it is...
1724 m_aCurToken.eType = TUOPER;
1725 m_aCurToken.nGroup = TGUNOPER;
1726 GlyphSpecial();
1727 pOper = popOrZero(m_aNodeStack);
1728 break;
1730 case TPLUS :
1731 case TMINUS :
1732 case TPLUSMINUS :
1733 case TMINUSPLUS :
1734 case TNEG :
1735 case TFACT :
1736 OpSubSup();
1737 pOper = popOrZero(m_aNodeStack);
1738 break;
1740 default :
1741 Error(PE_UNOPER_EXPECTED);
1744 // get argument
1745 Power();
1746 pArg = popOrZero(m_aNodeStack);
1748 if (eType == TABS)
1749 { pSNode = new SmBraceNode(aNodeToken);
1750 pSNode->SetScaleMode(SCALE_HEIGHT);
1752 // build nodes for left & right lines
1753 // (text, group, level of the used token are of no interrest here)
1754 // we'll use row & column of the keyword for abs
1755 aNodeToken.eType = TABS;
1757 aNodeToken.cMathChar = MS_VERTLINE;
1758 SmNode* pLeft = new SmMathSymbolNode(aNodeToken);
1760 aNodeToken.cMathChar = MS_VERTLINE;
1761 SmNode* pRight = new SmMathSymbolNode(aNodeToken);
1763 pSNode->SetSubNodes(pLeft, pArg, pRight);
1765 else if (eType == TSQRT || eType == TNROOT)
1766 { pSNode = new SmRootNode(aNodeToken);
1767 pOper = new SmRootSymbolNode(aNodeToken);
1768 pSNode->SetSubNodes(pExtra, pOper, pArg);
1770 else if(eType == TINTD)
1771 { pSNode = new SmDynIntegralNode(aNodeToken);
1772 pOper = new SmDynIntegralSymbolNode(aNodeToken);
1773 pSNode->SetSubNodes(pOper, pArg);
1775 else
1776 { pSNode = new SmUnHorNode(aNodeToken);
1778 if (bIsPostfix)
1779 pSNode->SetSubNodes(pArg, pOper);
1780 else
1781 // prefix operator
1782 pSNode->SetSubNodes(pOper, pArg);
1785 m_aNodeStack.push_front(pSNode);
1789 void SmParser::Attribut()
1791 OSL_ENSURE(TokenInGroup(TGATTRIBUT), "Sm: wrong token group");
1793 SmStructureNode *pSNode = new SmAttributNode(m_aCurToken);
1794 SmNode *pAttr;
1795 SmScaleMode eScaleMode = SCALE_NONE;
1797 // get appropriate node for the attribute itself
1798 switch (m_aCurToken.eType)
1799 { case TUNDERLINE :
1800 case TOVERLINE :
1801 case TOVERSTRIKE :
1802 pAttr = new SmRectangleNode(m_aCurToken);
1803 eScaleMode = SCALE_WIDTH;
1804 break;
1806 case TWIDEVEC :
1807 case TWIDEHAT :
1808 case TWIDETILDE :
1809 pAttr = new SmMathSymbolNode(m_aCurToken);
1810 eScaleMode = SCALE_WIDTH;
1811 break;
1813 default :
1814 pAttr = new SmMathSymbolNode(m_aCurToken);
1817 NextToken();
1819 pSNode->SetSubNodes(pAttr, 0);
1820 pSNode->SetScaleMode(eScaleMode);
1821 m_aNodeStack.push_front(pSNode);
1825 void SmParser::FontAttribut()
1827 OSL_ENSURE(TokenInGroup(TGFONTATTR), "Sm: wrong token group");
1829 switch (m_aCurToken.eType)
1831 case TITALIC :
1832 case TNITALIC :
1833 case TBOLD :
1834 case TNBOLD :
1835 case TPHANTOM :
1836 m_aNodeStack.push_front(new SmFontNode(m_aCurToken));
1837 NextToken();
1838 break;
1840 case TSIZE :
1841 FontSize();
1842 break;
1844 case TFONT :
1845 Font();
1846 break;
1848 case TCOLOR :
1849 Color();
1850 break;
1852 default :
1853 SAL_WARN("starmath", "unknown case");
1858 void SmParser::Color()
1860 OSL_ENSURE(m_aCurToken.eType == TCOLOR, "Sm : Ooops...");
1862 // last color rules, get that one
1863 SmToken aToken;
1865 { NextToken();
1867 if (TokenInGroup(TGCOLOR))
1868 { aToken = m_aCurToken;
1869 NextToken();
1871 else
1872 Error(PE_COLOR_EXPECTED);
1873 } while (m_aCurToken.eType == TCOLOR);
1875 m_aNodeStack.push_front(new SmFontNode(aToken));
1879 void SmParser::Font()
1881 OSL_ENSURE(m_aCurToken.eType == TFONT, "Sm : Ooops...");
1883 // last font rules, get that one
1884 SmToken aToken;
1886 { NextToken();
1888 if (TokenInGroup(TGFONT))
1889 { aToken = m_aCurToken;
1890 NextToken();
1892 else
1893 Error(PE_FONT_EXPECTED);
1894 } while (m_aCurToken.eType == TFONT);
1896 m_aNodeStack.push_front(new SmFontNode(aToken));
1900 // gets number used as arguments in Math formulas (e.g. 'size' command)
1901 // Format: no negative numbers, must start with a digit, no exponent notation, ...
1902 static bool lcl_IsNumber(const OUString& rText)
1904 bool bPoint = false;
1905 const sal_Unicode* pBuffer = rText.getStr();
1906 for(sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
1908 const sal_Unicode cChar = *pBuffer;
1909 if(cChar == '.')
1911 if(bPoint)
1912 return false;
1913 else
1914 bPoint = true;
1916 else if ( !rtl::isAsciiDigit( cChar ) )
1917 return false;
1919 return true;
1922 void SmParser::FontSize()
1924 OSL_ENSURE(m_aCurToken.eType == TSIZE, "Sm : Ooops...");
1926 FontSizeType Type;
1927 SmFontNode *pFontNode = new SmFontNode(m_aCurToken);
1929 NextToken();
1931 switch (m_aCurToken.eType)
1933 case TNUMBER: Type = FontSizeType::ABSOLUT; break;
1934 case TPLUS: Type = FontSizeType::PLUS; break;
1935 case TMINUS: Type = FontSizeType::MINUS; break;
1936 case TMULTIPLY: Type = FontSizeType::MULTIPLY; break;
1937 case TDIVIDEBY: Type = FontSizeType::DIVIDE; break;
1939 default:
1940 delete pFontNode;
1941 Error(PE_SIZE_EXPECTED);
1942 return;
1945 if (Type != FontSizeType::ABSOLUT)
1947 NextToken();
1948 if (m_aCurToken.eType != TNUMBER)
1950 delete pFontNode;
1951 Error(PE_SIZE_EXPECTED);
1952 return;
1956 // get number argument
1957 Fraction aValue( 1L );
1958 if (lcl_IsNumber( m_aCurToken.aText ))
1960 double fTmp = OUString(m_aCurToken.aText).toDouble();
1961 if (fTmp != 0.0)
1963 aValue = fTmp;
1965 //!! keep the numerator and denominator from being to large
1966 //!! otherwise ongoing multiplications may result in overflows
1967 //!! (for example in SmNode::SetFontSize the font size calculated
1968 //!! may become 0 because of this!!! Happens e.g. for ftmp = 2.9 with Linux
1969 //!! or ftmp = 1.11111111111111111... (11/9) on every platform.)
1970 if (aValue.GetDenominator() > 1000)
1972 long nNum = aValue.GetNumerator();
1973 long nDenom = aValue.GetDenominator();
1974 while (nDenom > 1000)
1976 nNum /= 10;
1977 nDenom /= 10;
1979 aValue = Fraction( nNum, nDenom );
1984 NextToken();
1986 pFontNode->SetSizeParameter(aValue, Type);
1987 m_aNodeStack.push_front(pFontNode);
1991 void SmParser::Brace()
1993 OSL_ENSURE(m_aCurToken.eType == TLEFT || TokenInGroup(TGLBRACES),
1994 "Sm: kein Klammer Ausdruck");
1996 SmStructureNode *pSNode = new SmBraceNode(m_aCurToken);
1997 SmNode *pBody = 0,
1998 *pLeft = 0,
1999 *pRight = 0;
2000 SmScaleMode eScaleMode = SCALE_NONE;
2001 SmParseError eError = PE_NONE;
2003 if (m_aCurToken.eType == TLEFT)
2004 { NextToken();
2006 eScaleMode = SCALE_HEIGHT;
2008 // check for left bracket
2009 if (TokenInGroup(TGLBRACES) || TokenInGroup(TGRBRACES))
2011 pLeft = new SmMathSymbolNode(m_aCurToken);
2013 NextToken();
2014 Bracebody(true);
2015 pBody = popOrZero(m_aNodeStack);
2017 if (m_aCurToken.eType == TRIGHT)
2018 { NextToken();
2020 // check for right bracket
2021 if (TokenInGroup(TGLBRACES) || TokenInGroup(TGRBRACES))
2023 pRight = new SmMathSymbolNode(m_aCurToken);
2024 NextToken();
2026 else
2027 eError = PE_RBRACE_EXPECTED;
2029 else
2030 eError = PE_RIGHT_EXPECTED;
2032 else
2033 eError = PE_LBRACE_EXPECTED;
2035 else
2037 if (TokenInGroup(TGLBRACES))
2039 pLeft = new SmMathSymbolNode(m_aCurToken);
2041 NextToken();
2042 Bracebody(false);
2043 pBody = popOrZero(m_aNodeStack);
2045 SmTokenType eExpectedType = TUNKNOWN;
2046 switch (pLeft->GetToken().eType)
2047 { case TLPARENT : eExpectedType = TRPARENT; break;
2048 case TLBRACKET : eExpectedType = TRBRACKET; break;
2049 case TLBRACE : eExpectedType = TRBRACE; break;
2050 case TLDBRACKET : eExpectedType = TRDBRACKET; break;
2051 case TLLINE : eExpectedType = TRLINE; break;
2052 case TLDLINE : eExpectedType = TRDLINE; break;
2053 case TLANGLE : eExpectedType = TRANGLE; break;
2054 case TLFLOOR : eExpectedType = TRFLOOR; break;
2055 case TLCEIL : eExpectedType = TRCEIL; break;
2056 default :
2057 SAL_WARN("starmath", "unknown case");
2060 if (m_aCurToken.eType == eExpectedType)
2062 pRight = new SmMathSymbolNode(m_aCurToken);
2063 NextToken();
2065 else
2066 eError = PE_PARENT_MISMATCH;
2068 else
2069 eError = PE_LBRACE_EXPECTED;
2072 if (eError == PE_NONE)
2073 { OSL_ENSURE(pLeft, "Sm: NULL pointer");
2074 OSL_ENSURE(pRight, "Sm: NULL pointer");
2075 pSNode->SetSubNodes(pLeft, pBody, pRight);
2076 pSNode->SetScaleMode(eScaleMode);
2077 m_aNodeStack.push_front(pSNode);
2079 else
2080 { delete pSNode;
2081 delete pBody;
2082 delete pLeft;
2083 delete pRight;
2085 Error(eError);
2090 void SmParser::Bracebody(bool bIsLeftRight)
2092 SmStructureNode *pBody = new SmBracebodyNode(m_aCurToken);
2093 SmNodeArray aNodes;
2094 sal_uInt16 nNum = 0;
2096 // get body if any
2097 if (bIsLeftRight)
2101 if (m_aCurToken.eType == TMLINE)
2103 m_aNodeStack.push_front(new SmMathSymbolNode(m_aCurToken));
2104 NextToken();
2105 nNum++;
2107 else if (m_aCurToken.eType != TRIGHT)
2108 { Align();
2109 nNum++;
2111 if (m_aCurToken.eType != TMLINE && m_aCurToken.eType != TRIGHT)
2112 Error(PE_RIGHT_EXPECTED);
2114 } while (m_aCurToken.eType != TEND && m_aCurToken.eType != TRIGHT);
2116 else
2120 if (m_aCurToken.eType == TMLINE)
2122 m_aNodeStack.push_front(new SmMathSymbolNode(m_aCurToken));
2123 NextToken();
2124 nNum++;
2126 else if (!TokenInGroup(TGRBRACES))
2127 { Align();
2128 nNum++;
2130 if (m_aCurToken.eType != TMLINE && !TokenInGroup(TGRBRACES))
2131 Error(PE_RBRACE_EXPECTED);
2133 } while (m_aCurToken.eType != TEND && !TokenInGroup(TGRBRACES));
2136 // build argument vector in parsing order
2137 aNodes.resize(nNum);
2138 for (sal_uInt16 i = 0; i < nNum; i++)
2140 aNodes[nNum - 1 - i] = popOrZero(m_aNodeStack);
2143 pBody->SetSubNodes(aNodes);
2144 pBody->SetScaleMode(bIsLeftRight ? SCALE_HEIGHT : SCALE_NONE);
2145 m_aNodeStack.push_front(pBody);
2149 void SmParser::Function()
2151 switch (m_aCurToken.eType)
2153 case TFUNC:
2154 NextToken(); // skip "FUNC"-statement
2155 // fall through
2157 case TSIN :
2158 case TCOS :
2159 case TTAN :
2160 case TCOT :
2161 case TASIN :
2162 case TACOS :
2163 case TATAN :
2164 case TACOT :
2165 case TSINH :
2166 case TCOSH :
2167 case TTANH :
2168 case TCOTH :
2169 case TASINH :
2170 case TACOSH :
2171 case TATANH :
2172 case TACOTH :
2173 case TLN :
2174 case TLOG :
2175 case TEXP :
2176 m_aNodeStack.push_front(new SmTextNode(m_aCurToken, FNT_FUNCTION));
2177 NextToken();
2178 break;
2180 default:
2181 Error(PE_FUNC_EXPECTED);
2186 void SmParser::Binom()
2188 SmNodeArray ExpressionArray;
2189 SmStructureNode *pSNode = new SmTableNode(m_aCurToken);
2191 NextToken();
2193 Sum();
2194 Sum();
2196 ExpressionArray.resize(2);
2198 for (int i = 0; i < 2; i++)
2200 ExpressionArray[2 - (i + 1)] = popOrZero(m_aNodeStack);
2203 pSNode->SetSubNodes(ExpressionArray);
2204 m_aNodeStack.push_front(pSNode);
2208 void SmParser::Stack()
2210 SmNodeArray ExpressionArray;
2211 NextToken();
2212 if (m_aCurToken.eType == TLGROUP)
2214 sal_uInt16 n = 0;
2218 NextToken();
2219 Align();
2220 n++;
2222 while (m_aCurToken.eType == TPOUND);
2224 ExpressionArray.resize(n);
2226 for (sal_uInt16 i = 0; i < n; i++)
2228 ExpressionArray[n - (i + 1)] = popOrZero(m_aNodeStack);
2231 if (m_aCurToken.eType != TRGROUP)
2232 Error(PE_RGROUP_EXPECTED);
2234 NextToken();
2236 //We need to let the table node know it context
2237 //it's used in SmNodeToTextVisitor
2238 SmToken aTok = m_aCurToken;
2239 aTok.eType = TSTACK;
2240 SmStructureNode *pSNode = new SmTableNode(aTok);
2241 pSNode->SetSubNodes(ExpressionArray);
2242 m_aNodeStack.push_front(pSNode);
2244 else
2245 Error(PE_LGROUP_EXPECTED);
2249 void SmParser::Matrix()
2251 SmNodeArray ExpressionArray;
2253 NextToken();
2254 if (m_aCurToken.eType == TLGROUP)
2256 sal_uInt16 c = 0;
2260 NextToken();
2261 Align();
2262 c++;
2264 while (m_aCurToken.eType == TPOUND);
2266 sal_uInt16 r = 1;
2268 while (m_aCurToken.eType == TDPOUND)
2270 NextToken();
2271 for (sal_uInt16 i = 0; i < c; i++)
2273 Align();
2274 if (i < (c - 1))
2276 if (m_aCurToken.eType == TPOUND)
2278 NextToken();
2280 else
2281 Error(PE_POUND_EXPECTED);
2285 r++;
2288 size_t nRC = static_cast<size_t>(r) * c;
2290 ExpressionArray.resize(nRC);
2292 for (size_t i = 0; i < (nRC); ++i)
2294 ExpressionArray[(nRC) - (i + 1)] = popOrZero(m_aNodeStack);
2297 if (m_aCurToken.eType != TRGROUP)
2298 Error(PE_RGROUP_EXPECTED);
2300 NextToken();
2302 SmMatrixNode *pMNode = new SmMatrixNode(m_aCurToken);
2303 pMNode->SetSubNodes(ExpressionArray);
2304 pMNode->SetRowCol(r, c);
2305 m_aNodeStack.push_front(pMNode);
2307 else
2308 Error(PE_LGROUP_EXPECTED);
2312 void SmParser::Special()
2314 bool bReplace = false;
2315 OUString &rName = m_aCurToken.aText;
2316 OUString aNewName;
2318 // conversion of symbol names for 6.0 (XML) file format
2319 // (name change on import / export.
2320 // UI uses localized names XML file format does not.)
2321 if( rName.startsWith("%") )
2323 if (IsImportSymbolNames())
2325 aNewName = SmLocalizedSymbolData::GetUiSymbolName(rName.copy(1));
2326 bReplace = true;
2328 else if (IsExportSymbolNames())
2330 aNewName = SmLocalizedSymbolData::GetExportSymbolName(rName.copy(1));
2331 bReplace = true;
2334 if (!aNewName.isEmpty())
2335 aNewName = "%" + aNewName;
2338 if (bReplace && !aNewName.isEmpty() && rName != aNewName)
2340 Replace(GetTokenIndex(), rName.getLength(), aNewName);
2341 rName = aNewName;
2344 // add symbol name to list of used symbols
2345 const OUString aSymbolName(m_aCurToken.aText.copy(1));
2346 if (!aSymbolName.isEmpty())
2347 AddToUsedSymbols( aSymbolName );
2349 m_aNodeStack.push_front(new SmSpecialNode(m_aCurToken));
2350 NextToken();
2354 void SmParser::GlyphSpecial()
2356 m_aNodeStack.push_front(new SmGlyphSpecialNode(m_aCurToken));
2357 NextToken();
2361 void SmParser::Error(SmParseError eError)
2363 SmStructureNode *pSNode = new SmExpressionNode(m_aCurToken);
2364 SmErrorNode *pErr = new SmErrorNode(eError, m_aCurToken);
2365 pSNode->SetSubNodes(pErr, 0);
2367 //! put a structure node on the stack (instead of the error node itself)
2368 //! because sometimes such a node is expected in order to attach some
2369 //! subnodes
2370 m_aNodeStack.push_front(pSNode);
2372 AddError(eError, pSNode);
2374 NextToken();
2378 // end grammar
2381 SmParser::SmParser()
2382 : m_nCurError( 0 )
2383 , m_nBufferIndex( 0 )
2384 , m_nTokenIndex( 0 )
2385 , m_Row( 0 )
2386 , m_nColOff( 0 )
2387 , m_aDotLoc( LanguageTag::convertToLocale( LANGUAGE_ENGLISH_US ) )
2389 bImportSymNames = m_bExportSymNames = false;
2390 m_nLang = Application::GetSettings().GetUILanguageTag().getLanguageType();
2393 SmNode *SmParser::Parse(const OUString &rBuffer)
2395 ClearUsedSymbols();
2397 m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2398 m_nBufferIndex = 0;
2399 m_nTokenIndex = 0;
2400 m_Row = 1;
2401 m_nColOff = 0;
2402 m_nCurError = -1;
2404 m_aErrDescList.clear();
2406 m_aNodeStack.clear();
2408 SetLanguage( Application::GetSettings().GetUILanguageTag().getLanguageType() );
2409 NextToken();
2410 Table();
2412 SmNode* result = popOrZero(m_aNodeStack);
2413 return result;
2416 SmNode *SmParser::ParseExpression(const OUString &rBuffer)
2418 m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2419 m_nBufferIndex = 0;
2420 m_nTokenIndex = 0;
2421 m_Row = 1;
2422 m_nColOff = 0;
2423 m_nCurError = -1;
2425 m_aErrDescList.clear();
2427 m_aNodeStack.clear();
2429 SetLanguage( Application::GetSettings().GetUILanguageTag().getLanguageType() );
2430 NextToken();
2431 Expression();
2433 SmNode* result = popOrZero(m_aNodeStack);
2434 return result;
2438 size_t SmParser::AddError(SmParseError Type, SmNode *pNode)
2440 SmErrorDesc *pErrDesc = new SmErrorDesc;
2442 pErrDesc->Type = Type;
2443 pErrDesc->pNode = pNode;
2444 pErrDesc->Text = SM_RESSTR(RID_ERR_IDENT);
2446 sal_uInt16 nRID;
2447 switch (Type)
2449 case PE_UNEXPECTED_CHAR: nRID = RID_ERR_UNEXPECTEDCHARACTER; break;
2450 case PE_LGROUP_EXPECTED: nRID = RID_ERR_LGROUPEXPECTED; break;
2451 case PE_RGROUP_EXPECTED: nRID = RID_ERR_RGROUPEXPECTED; break;
2452 case PE_LBRACE_EXPECTED: nRID = RID_ERR_LBRACEEXPECTED; break;
2453 case PE_RBRACE_EXPECTED: nRID = RID_ERR_RBRACEEXPECTED; break;
2454 case PE_FUNC_EXPECTED: nRID = RID_ERR_FUNCEXPECTED; break;
2455 case PE_UNOPER_EXPECTED: nRID = RID_ERR_UNOPEREXPECTED; break;
2456 case PE_BINOPER_EXPECTED: nRID = RID_ERR_BINOPEREXPECTED; break;
2457 case PE_SYMBOL_EXPECTED: nRID = RID_ERR_SYMBOLEXPECTED; break;
2458 case PE_IDENTIFIER_EXPECTED: nRID = RID_ERR_IDENTEXPECTED; break;
2459 case PE_POUND_EXPECTED: nRID = RID_ERR_POUNDEXPECTED; break;
2460 case PE_COLOR_EXPECTED: nRID = RID_ERR_COLOREXPECTED; break;
2461 case PE_RIGHT_EXPECTED: nRID = RID_ERR_RIGHTEXPECTED; break;
2463 default:
2464 nRID = RID_ERR_UNKNOWN;
2466 pErrDesc->Text += SM_RESSTR(nRID);
2468 m_aErrDescList.push_back( pErrDesc );
2470 return m_aErrDescList.size()-1;
2474 const SmErrorDesc *SmParser::NextError()
2476 if ( !m_aErrDescList.empty() )
2477 if (m_nCurError > 0) return &m_aErrDescList[ --m_nCurError ];
2478 else
2480 m_nCurError = 0;
2481 return &m_aErrDescList[ m_nCurError ];
2483 else return NULL;
2487 const SmErrorDesc *SmParser::PrevError()
2489 if ( !m_aErrDescList.empty() )
2490 if (m_nCurError < (int) (m_aErrDescList.size() - 1)) return &m_aErrDescList[ ++m_nCurError ];
2491 else
2493 m_nCurError = (int) (m_aErrDescList.size() - 1);
2494 return &m_aErrDescList[ m_nCurError ];
2496 else return NULL;
2500 const SmErrorDesc *SmParser::GetError(size_t i)
2502 if ( i < m_aErrDescList.size() )
2503 return &m_aErrDescList[ i ];
2505 if ( (size_t)m_nCurError < m_aErrDescList.size() )
2506 return &m_aErrDescList[ m_nCurError ];
2508 return NULL;
2511 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */