Version 6.1.4.1, tag libreoffice-6.1.4.1
[LibreOffice.git] / starmath / source / parse.cxx
blob232a5273f3bcd95f5a5aaa337267388110d4e58d
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <memory>
21 #include <com/sun/star/i18n/UnicodeType.hpp>
22 #include <i18nlangtag/lang.h>
23 #include <tools/lineend.hxx>
24 #include <unotools/configmgr.hxx>
25 #include <unotools/syslocale.hxx>
26 #include <o3tl/make_unique.hxx>
27 #include <parse.hxx>
28 #include <strings.hrc>
29 #include <smmod.hxx>
30 #include "cfgitem.hxx"
31 #include <cassert>
32 #include <stack>
34 using namespace ::com::sun::star::i18n;
37 SmToken::SmToken()
38 : eType(TUNKNOWN)
39 , cMathChar('\0')
40 , nGroup(TG::NONE)
41 , nLevel(0)
42 , nRow(0)
43 , nCol(0)
47 SmToken::SmToken(SmTokenType eTokenType,
48 sal_Unicode cMath,
49 const sal_Char* pText,
50 TG nTokenGroup,
51 sal_uInt16 nTokenLevel)
52 : aText(OUString::createFromAscii(pText))
53 , eType(eTokenType)
54 , cMathChar(cMath)
55 , nGroup(nTokenGroup)
56 , nLevel(nTokenLevel)
57 , nRow(0)
58 , nCol(0)
63 static const SmTokenTableEntry aTokenTable[] =
65 { "Im" , TIM, MS_IM, TG::Standalone, 5 },
66 { "Re" , TRE, MS_RE, TG::Standalone, 5 },
67 { "abs", TABS, '\0', TG::UnOper, 13 },
68 { "arcosh", TACOSH, '\0', TG::Function, 5 },
69 { "arcoth", TACOTH, '\0', TG::Function, 5 },
70 { "acute", TACUTE, MS_ACUTE, TG::Attribute, 5 },
71 { "aleph" , TALEPH, MS_ALEPH, TG::Standalone, 5 },
72 { "alignb", TALIGNC, '\0', TG::Align, 0},
73 { "alignc", TALIGNC, '\0', TG::Align, 0},
74 { "alignl", TALIGNL, '\0', TG::Align, 0},
75 { "alignm", TALIGNC, '\0', TG::Align, 0},
76 { "alignr", TALIGNR, '\0', TG::Align, 0},
77 { "alignt", TALIGNC, '\0', TG::Align, 0},
78 { "and", TAND, MS_AND, TG::Product, 0},
79 { "approx", TAPPROX, MS_APPROX, TG::Relation, 0},
80 { "aqua", TAQUA, '\0', TG::Color, 0},
81 { "arccos", TACOS, '\0', TG::Function, 5},
82 { "arccot", TACOT, '\0', TG::Function, 5},
83 { "arcsin", TASIN, '\0', TG::Function, 5},
84 { "arctan", TATAN, '\0', TG::Function, 5},
85 { "arsinh", TASINH, '\0', TG::Function, 5},
86 { "artanh", TATANH, '\0', TG::Function, 5},
87 { "backepsilon" , TBACKEPSILON, MS_BACKEPSILON, TG::Standalone, 5},
88 { "bar", TBAR, MS_BAR, TG::Attribute, 5},
89 { "binom", TBINOM, '\0', TG::NONE, 5 },
90 { "black", TBLACK, '\0', TG::Color, 0},
91 { "blue", TBLUE, '\0', TG::Color, 0},
92 { "bold", TBOLD, '\0', TG::FontAttr, 5},
93 { "boper", TBOPER, '\0', TG::Product, 0},
94 { "breve", TBREVE, MS_BREVE, TG::Attribute, 5},
95 { "bslash", TBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
96 { "cdot", TCDOT, MS_CDOT, TG::Product, 0},
97 { "check", TCHECK, MS_CHECK, TG::Attribute, 5},
98 { "circ" , TCIRC, MS_CIRC, TG::Standalone, 5},
99 { "circle", TCIRCLE, MS_CIRCLE, TG::Attribute, 5},
100 { "color", TCOLOR, '\0', TG::FontAttr, 5},
101 { "coprod", TCOPROD, MS_COPROD, TG::Oper, 5},
102 { "cos", TCOS, '\0', TG::Function, 5},
103 { "cosh", TCOSH, '\0', TG::Function, 5},
104 { "cot", TCOT, '\0', TG::Function, 5},
105 { "coth", TCOTH, '\0', TG::Function, 5},
106 { "csub", TCSUB, '\0', TG::Power, 0},
107 { "csup", TCSUP, '\0', TG::Power, 0},
108 { "cyan", TCYAN, '\0', TG::Color, 0},
109 { "dddot", TDDDOT, MS_DDDOT, TG::Attribute, 5},
110 { "ddot", TDDOT, MS_DDOT, TG::Attribute, 5},
111 { "def", TDEF, MS_DEF, TG::Relation, 0},
112 { "div", TDIV, MS_DIV, TG::Product, 0},
113 { "divides", TDIVIDES, MS_LINE, TG::Relation, 0},
114 { "dlarrow" , TDLARROW, MS_DLARROW, TG::Standalone, 5},
115 { "dlrarrow" , TDLRARROW, MS_DLRARROW, TG::Standalone, 5},
116 { "dot", TDOT, MS_DOT, TG::Attribute, 5},
117 { "dotsaxis", TDOTSAXIS, MS_DOTSAXIS, TG::Standalone, 5}, // 5 to continue expression
118 { "dotsdiag", TDOTSDIAG, MS_DOTSUP, TG::Standalone, 5},
119 { "dotsdown", TDOTSDOWN, MS_DOTSDOWN, TG::Standalone, 5},
120 { "dotslow", TDOTSLOW, MS_DOTSLOW, TG::Standalone, 5},
121 { "dotsup", TDOTSUP, MS_DOTSUP, TG::Standalone, 5},
122 { "dotsvert", TDOTSVERT, MS_DOTSVERT, TG::Standalone, 5},
123 { "downarrow" , TDOWNARROW, MS_DOWNARROW, TG::Standalone, 5},
124 { "drarrow" , TDRARROW, MS_DRARROW, TG::Standalone, 5},
125 { "emptyset" , TEMPTYSET, MS_EMPTYSET, TG::Standalone, 5},
126 { "equiv", TEQUIV, MS_EQUIV, TG::Relation, 0},
127 { "exists", TEXISTS, MS_EXISTS, TG::Standalone, 5},
128 { "notexists", TNOTEXISTS, MS_NOTEXISTS, TG::Standalone, 5},
129 { "exp", TEXP, '\0', TG::Function, 5},
130 { "fact", TFACT, MS_FACT, TG::UnOper, 5},
131 { "fixed", TFIXED, '\0', TG::Font, 0},
132 { "font", TFONT, '\0', TG::FontAttr, 5},
133 { "forall", TFORALL, MS_FORALL, TG::Standalone, 5},
134 { "from", TFROM, '\0', TG::Limit, 0},
135 { "fuchsia", TFUCHSIA, '\0', TG::Color, 0},
136 { "func", TFUNC, '\0', TG::Function, 5},
137 { "ge", TGE, MS_GE, TG::Relation, 0},
138 { "geslant", TGESLANT, MS_GESLANT, TG::Relation, 0 },
139 { "gg", TGG, MS_GG, TG::Relation, 0},
140 { "grave", TGRAVE, MS_GRAVE, TG::Attribute, 5},
141 { "gray", TGRAY, '\0', TG::Color, 0},
142 { "green", TGREEN, '\0', TG::Color, 0},
143 { "gt", TGT, MS_GT, TG::Relation, 0},
144 { "hat", THAT, MS_HAT, TG::Attribute, 5},
145 { "hbar" , THBAR, MS_HBAR, TG::Standalone, 5},
146 { "iiint", TIIINT, MS_IIINT, TG::Oper, 5},
147 { "iint", TIINT, MS_IINT, TG::Oper, 5},
148 { "in", TIN, MS_IN, TG::Relation, 0},
149 { "infinity" , TINFINITY, MS_INFINITY, TG::Standalone, 5},
150 { "infty" , TINFINITY, MS_INFINITY, TG::Standalone, 5},
151 { "int", TINT, MS_INT, TG::Oper, 5},
152 { "intd", TINTD, MS_INT, TG::Oper, 5},
153 { "intersection", TINTERSECT, MS_INTERSECT, TG::Product, 0},
154 { "ital", TITALIC, '\0', TG::FontAttr, 5},
155 { "italic", TITALIC, '\0', TG::FontAttr, 5},
156 { "lambdabar" , TLAMBDABAR, MS_LAMBDABAR, TG::Standalone, 5},
157 { "langle", TLANGLE, MS_LMATHANGLE, TG::LBrace, 5},
158 { "lbrace", TLBRACE, MS_LBRACE, TG::LBrace, 5},
159 { "lceil", TLCEIL, MS_LCEIL, TG::LBrace, 5},
160 { "ldbracket", TLDBRACKET, MS_LDBRACKET, TG::LBrace, 5},
161 { "ldline", TLDLINE, MS_DVERTLINE, TG::LBrace, 5},
162 { "le", TLE, MS_LE, TG::Relation, 0},
163 { "left", TLEFT, '\0', TG::NONE, 5},
164 { "leftarrow" , TLEFTARROW, MS_LEFTARROW, TG::Standalone, 5},
165 { "leslant", TLESLANT, MS_LESLANT, TG::Relation, 0 },
166 { "lfloor", TLFLOOR, MS_LFLOOR, TG::LBrace, 5},
167 { "lim", TLIM, '\0', TG::Oper, 5},
168 { "lime", TLIME, '\0', TG::Color, 0},
169 { "liminf", TLIMINF, '\0', TG::Oper, 5},
170 { "limsup", TLIMSUP, '\0', TG::Oper, 5},
171 { "lint", TLINT, MS_LINT, TG::Oper, 5},
172 { "ll", TLL, MS_LL, TG::Relation, 0},
173 { "lline", TLLINE, MS_VERTLINE, TG::LBrace, 5},
174 { "llint", TLLINT, MS_LLINT, TG::Oper, 5},
175 { "lllint", TLLLINT, MS_LLLINT, TG::Oper, 5},
176 { "ln", TLN, '\0', TG::Function, 5},
177 { "log", TLOG, '\0', TG::Function, 5},
178 { "lsub", TLSUB, '\0', TG::Power, 0},
179 { "lsup", TLSUP, '\0', TG::Power, 0},
180 { "lt", TLT, MS_LT, TG::Relation, 0},
181 { "magenta", TMAGENTA, '\0', TG::Color, 0},
182 { "maroon", TMAROON, '\0', TG::Color, 0},
183 { "matrix", TMATRIX, '\0', TG::NONE, 5},
184 { "minusplus", TMINUSPLUS, MS_MINUSPLUS, TG::UnOper | TG::Sum, 5},
185 { "mline", TMLINE, MS_VERTLINE, TG::NONE, 0}, //! not in TG::RBrace, Level 0
186 { "nabla", TNABLA, MS_NABLA, TG::Standalone, 5},
187 { "navy", TNAVY, '\0', TG::Color, 0},
188 { "nbold", TNBOLD, '\0', TG::FontAttr, 5},
189 { "ndivides", TNDIVIDES, MS_NDIVIDES, TG::Relation, 0},
190 { "neg", TNEG, MS_NEG, TG::UnOper, 5 },
191 { "neq", TNEQ, MS_NEQ, TG::Relation, 0},
192 { "newline", TNEWLINE, '\0', TG::NONE, 0},
193 { "ni", TNI, MS_NI, TG::Relation, 0},
194 { "nitalic", TNITALIC, '\0', TG::FontAttr, 5},
195 { "none", TNONE, '\0', TG::LBrace | TG::RBrace, 0},
196 { "nospace", TNOSPACE, '\0', TG::Standalone, 5},
197 { "notin", TNOTIN, MS_NOTIN, TG::Relation, 0},
198 { "nroot", TNROOT, MS_SQRT, TG::UnOper, 5},
199 { "nsubset", TNSUBSET, MS_NSUBSET, TG::Relation, 0 },
200 { "nsupset", TNSUPSET, MS_NSUPSET, TG::Relation, 0 },
201 { "nsubseteq", TNSUBSETEQ, MS_NSUBSETEQ, TG::Relation, 0 },
202 { "nsupseteq", TNSUPSETEQ, MS_NSUPSETEQ, TG::Relation, 0 },
203 { "odivide", TODIVIDE, MS_ODIVIDE, TG::Product, 0},
204 { "odot", TODOT, MS_ODOT, TG::Product, 0},
205 { "olive", TOLIVE, '\0', TG::Color, 0},
206 { "ominus", TOMINUS, MS_OMINUS, TG::Sum, 0},
207 { "oper", TOPER, '\0', TG::Oper, 5},
208 { "oplus", TOPLUS, MS_OPLUS, TG::Sum, 0},
209 { "or", TOR, MS_OR, TG::Sum, 0},
210 { "ortho", TORTHO, MS_ORTHO, TG::Relation, 0},
211 { "otimes", TOTIMES, MS_OTIMES, TG::Product, 0},
212 { "over", TOVER, '\0', TG::Product, 0},
213 { "overbrace", TOVERBRACE, MS_OVERBRACE, TG::Product, 5},
214 { "overline", TOVERLINE, '\0', TG::Attribute, 5},
215 { "overstrike", TOVERSTRIKE, '\0', TG::Attribute, 5},
216 { "owns", TNI, MS_NI, TG::Relation, 0},
217 { "parallel", TPARALLEL, MS_DLINE, TG::Relation, 0},
218 { "partial", TPARTIAL, MS_PARTIAL, TG::Standalone, 5 },
219 { "phantom", TPHANTOM, '\0', TG::FontAttr, 5},
220 { "plusminus", TPLUSMINUS, MS_PLUSMINUS, TG::UnOper | TG::Sum, 5},
221 { "prec", TPRECEDES, MS_PRECEDES, TG::Relation, 0 },
222 { "preccurlyeq", TPRECEDESEQUAL, MS_PRECEDESEQUAL, TG::Relation, 0 },
223 { "precsim", TPRECEDESEQUIV, MS_PRECEDESEQUIV, TG::Relation, 0 },
224 { "nprec", TNOTPRECEDES, MS_NOTPRECEDES, TG::Relation, 0 },
225 { "prod", TPROD, MS_PROD, TG::Oper, 5},
226 { "prop", TPROP, MS_PROP, TG::Relation, 0},
227 { "purple", TPURPLE, '\0', TG::Color, 0},
228 { "rangle", TRANGLE, MS_RMATHANGLE, TG::RBrace, 0}, //! 0 to terminate expression
229 { "rbrace", TRBRACE, MS_RBRACE, TG::RBrace, 0},
230 { "rceil", TRCEIL, MS_RCEIL, TG::RBrace, 0},
231 { "rdbracket", TRDBRACKET, MS_RDBRACKET, TG::RBrace, 0},
232 { "rdline", TRDLINE, MS_DVERTLINE, TG::RBrace, 0},
233 { "red", TRED, '\0', TG::Color, 0},
234 { "rfloor", TRFLOOR, MS_RFLOOR, TG::RBrace, 0}, //! 0 to terminate expression
235 { "right", TRIGHT, '\0', TG::NONE, 0},
236 { "rightarrow" , TRIGHTARROW, MS_RIGHTARROW, TG::Standalone, 5},
237 { "rline", TRLINE, MS_VERTLINE, TG::RBrace, 0}, //! 0 to terminate expression
238 { "rsub", TRSUB, '\0', TG::Power, 0},
239 { "rsup", TRSUP, '\0', TG::Power, 0},
240 { "sans", TSANS, '\0', TG::Font, 0},
241 { "serif", TSERIF, '\0', TG::Font, 0},
242 { "setC" , TSETC, MS_SETC, TG::Standalone, 5},
243 { "setN" , TSETN, MS_SETN, TG::Standalone, 5},
244 { "setQ" , TSETQ, MS_SETQ, TG::Standalone, 5},
245 { "setR" , TSETR, MS_SETR, TG::Standalone, 5},
246 { "setZ" , TSETZ, MS_SETZ, TG::Standalone, 5},
247 { "setminus", TBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
248 { "silver", TSILVER, '\0', TG::Color, 0},
249 { "sim", TSIM, MS_SIM, TG::Relation, 0},
250 { "simeq", TSIMEQ, MS_SIMEQ, TG::Relation, 0},
251 { "sin", TSIN, '\0', TG::Function, 5},
252 { "sinh", TSINH, '\0', TG::Function, 5},
253 { "size", TSIZE, '\0', TG::FontAttr, 5},
254 { "slash", TSLASH, MS_SLASH, TG::Product, 0 },
255 { "sqrt", TSQRT, MS_SQRT, TG::UnOper, 5},
256 { "stack", TSTACK, '\0', TG::NONE, 5},
257 { "sub", TRSUB, '\0', TG::Power, 0},
258 { "subset", TSUBSET, MS_SUBSET, TG::Relation, 0},
259 { "succ", TSUCCEEDS, MS_SUCCEEDS, TG::Relation, 0 },
260 { "succcurlyeq", TSUCCEEDSEQUAL, MS_SUCCEEDSEQUAL, TG::Relation, 0 },
261 { "succsim", TSUCCEEDSEQUIV, MS_SUCCEEDSEQUIV, TG::Relation, 0 },
262 { "nsucc", TNOTSUCCEEDS, MS_NOTSUCCEEDS, TG::Relation, 0 },
263 { "subseteq", TSUBSETEQ, MS_SUBSETEQ, TG::Relation, 0},
264 { "sum", TSUM, MS_SUM, TG::Oper, 5},
265 { "sup", TRSUP, '\0', TG::Power, 0},
266 { "supset", TSUPSET, MS_SUPSET, TG::Relation, 0},
267 { "supseteq", TSUPSETEQ, MS_SUPSETEQ, TG::Relation, 0},
268 { "tan", TTAN, '\0', TG::Function, 5},
269 { "tanh", TTANH, '\0', TG::Function, 5},
270 { "teal", TTEAL, '\0', TG::Color, 0},
271 { "tilde", TTILDE, MS_TILDE, TG::Attribute, 5},
272 { "times", TTIMES, MS_TIMES, TG::Product, 0},
273 { "to", TTO, '\0', TG::Limit, 0},
274 { "toward", TTOWARD, MS_RIGHTARROW, TG::Relation, 0},
275 { "transl", TTRANSL, MS_TRANSL, TG::Relation, 0},
276 { "transr", TTRANSR, MS_TRANSR, TG::Relation, 0},
277 { "underbrace", TUNDERBRACE, MS_UNDERBRACE, TG::Product, 5},
278 { "underline", TUNDERLINE, '\0', TG::Attribute, 5},
279 { "union", TUNION, MS_UNION, TG::Sum, 0},
280 { "uoper", TUOPER, '\0', TG::UnOper, 5},
281 { "uparrow" , TUPARROW, MS_UPARROW, TG::Standalone, 5},
282 { "vec", TVEC, MS_VEC, TG::Attribute, 5},
283 { "white", TWHITE, '\0', TG::Color, 0},
284 { "widebslash", TWIDEBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
285 { "widehat", TWIDEHAT, MS_HAT, TG::Attribute, 5},
286 { "widetilde", TWIDETILDE, MS_TILDE, TG::Attribute, 5},
287 { "wideslash", TWIDESLASH, MS_SLASH, TG::Product, 0 },
288 { "widevec", TWIDEVEC, MS_VEC, TG::Attribute, 5},
289 { "wp" , TWP, MS_WP, TG::Standalone, 5},
290 { "yellow", TYELLOW, '\0', TG::Color, 0}
293 const SmTokenTableEntry * SmParser::GetTokenTableEntry( const OUString &rName )
295 if (!rName.isEmpty())
297 for (auto const &token : aTokenTable)
299 if (rName.equalsIgnoreAsciiCaseAscii( token.pIdent ))
300 return &token;
303 return nullptr;
306 namespace {
308 bool IsDelimiter( const OUString &rTxt, sal_Int32 nPos )
309 // returns 'true' iff cChar is '\0' or a delimiter
311 assert(nPos <= rTxt.getLength()); //index out of range
313 if (nPos == rTxt.getLength())
314 return true;
316 sal_Unicode cChar = rTxt[nPos];
318 // check if 'cChar' is in the delimiter table
319 static const sal_Unicode aDelimiterTable[] =
321 ' ', '\t', '\n', '\r', '+', '-', '*', '/', '=', '#',
322 '%', '\\', '"', '~', '`', '>', '<', '&', '|', '(',
323 ')', '{', '}', '[', ']', '^', '_'
325 for (auto const &cDelimiter : aDelimiterTable)
327 if (cDelimiter == cChar)
328 return true;
331 sal_Int16 nTypJp = SM_MOD()->GetSysLocale().GetCharClass().getType( rTxt, nPos );
332 return ( nTypJp == css::i18n::UnicodeType::SPACE_SEPARATOR ||
333 nTypJp == css::i18n::UnicodeType::CONTROL);
338 void SmParser::Replace( sal_Int32 nPos, sal_Int32 nLen, const OUString &rText )
340 assert( nPos + nLen <= m_aBufferString.getLength() );
342 m_aBufferString = m_aBufferString.replaceAt( nPos, nLen, rText );
343 sal_Int32 nChg = rText.getLength() - nLen;
344 m_nBufferIndex = m_nBufferIndex + nChg;
345 m_nTokenIndex = m_nTokenIndex + nChg;
348 void SmParser::NextToken()
350 // First character may be any alphabetic
351 static const sal_Int32 coStartFlags =
352 KParseTokens::ANY_LETTER |
353 KParseTokens::IGNORE_LEADING_WS;
355 // Continuing characters may be any alphabetic
356 static const sal_Int32 coContFlags =
357 (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
358 | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
360 // user-defined char continuing characters may be any alphanumeric or dot.
361 static const sal_Int32 coUserDefinedCharContFlags =
362 KParseTokens::ANY_LETTER_OR_NUMBER |
363 KParseTokens::ASC_DOT |
364 KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
366 // First character for numbers, may be any numeric or dot
367 static const sal_Int32 coNumStartFlags =
368 KParseTokens::ASC_DIGIT |
369 KParseTokens::ASC_DOT |
370 KParseTokens::IGNORE_LEADING_WS;
372 // Continuing characters for numbers, may be any numeric or dot.
373 static const sal_Int32 coNumContFlags =
374 coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS;
376 sal_Int32 nBufLen = m_aBufferString.getLength();
377 ParseResult aRes;
378 sal_Int32 nRealStart;
379 bool bCont;
382 // skip white spaces
383 while (UnicodeType::SPACE_SEPARATOR ==
384 m_pSysCC->getType( m_aBufferString, m_nBufferIndex ))
385 ++m_nBufferIndex;
387 // Try to parse a number in a locale-independent manner using
388 // '.' as decimal separator.
389 // See https://bz.apache.org/ooo/show_bug.cgi?id=45779
390 aRes = m_aNumCC.parsePredefinedToken(KParseType::ASC_NUMBER,
391 m_aBufferString, m_nBufferIndex,
392 coNumStartFlags, "",
393 coNumContFlags, "");
395 if (aRes.TokenType == 0)
397 // Try again with the default token parsing.
398 aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex,
399 coStartFlags, "",
400 coContFlags, "");
403 nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
404 m_nBufferIndex = nRealStart;
406 bCont = false;
407 if ( aRes.TokenType == 0 &&
408 nRealStart < nBufLen &&
409 '\n' == m_aBufferString[ nRealStart ] )
411 // keep data needed for tokens row and col entry up to date
412 ++m_nRow;
413 m_nBufferIndex = m_nColOff = nRealStart + 1;
414 bCont = true;
416 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
418 if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
420 //SkipComment
421 m_nBufferIndex = nRealStart + 2;
422 while (m_nBufferIndex < nBufLen &&
423 '\n' != m_aBufferString[ m_nBufferIndex ])
424 ++m_nBufferIndex;
425 bCont = true;
429 } while (bCont);
431 // set index of current token
432 m_nTokenIndex = m_nBufferIndex;
434 m_aCurToken.nRow = m_nRow;
435 m_aCurToken.nCol = nRealStart - m_nColOff + 1;
437 bool bHandled = true;
438 if (nRealStart >= nBufLen)
440 m_aCurToken.eType = TEND;
441 m_aCurToken.cMathChar = '\0';
442 m_aCurToken.nGroup = TG::NONE;
443 m_aCurToken.nLevel = 0;
444 m_aCurToken.aText.clear();
446 else if (aRes.TokenType & KParseType::ANY_NUMBER)
448 assert(aRes.EndPos > 0);
449 if ( m_aBufferString[aRes.EndPos-1] == ',' &&
450 aRes.EndPos < nBufLen &&
451 m_pSysCC->getType( m_aBufferString, aRes.EndPos ) != UnicodeType::SPACE_SEPARATOR )
453 // Comma followed by a non-space char is unlikely for decimal/thousands separator.
454 --aRes.EndPos;
456 sal_Int32 n = aRes.EndPos - nRealStart;
457 assert(n >= 0);
458 m_aCurToken.eType = TNUMBER;
459 m_aCurToken.cMathChar = '\0';
460 m_aCurToken.nGroup = TG::NONE;
461 m_aCurToken.nLevel = 5;
462 m_aCurToken.aText = m_aBufferString.copy( nRealStart, n );
464 SAL_WARN_IF( !IsDelimiter( m_aBufferString, aRes.EndPos ), "starmath", "identifier really finished? (compatibility!)" );
466 else if (aRes.TokenType & KParseType::DOUBLE_QUOTE_STRING)
468 m_aCurToken.eType = TTEXT;
469 m_aCurToken.cMathChar = '\0';
470 m_aCurToken.nGroup = TG::NONE;
471 m_aCurToken.nLevel = 5;
472 m_aCurToken.aText = aRes.DequotedNameOrString;
473 m_aCurToken.nRow = m_nRow;
474 m_aCurToken.nCol = nRealStart - m_nColOff + 2;
476 else if (aRes.TokenType & KParseType::IDENTNAME)
478 sal_Int32 n = aRes.EndPos - nRealStart;
479 assert(n >= 0);
480 OUString aName( m_aBufferString.copy( nRealStart, n ) );
481 const SmTokenTableEntry *pEntry = GetTokenTableEntry( aName );
483 if (pEntry)
485 m_aCurToken.eType = pEntry->eType;
486 m_aCurToken.cMathChar = pEntry->cMathChar;
487 m_aCurToken.nGroup = pEntry->nGroup;
488 m_aCurToken.nLevel = pEntry->nLevel;
489 m_aCurToken.aText = OUString::createFromAscii( pEntry->pIdent );
491 else
493 m_aCurToken.eType = TIDENT;
494 m_aCurToken.cMathChar = '\0';
495 m_aCurToken.nGroup = TG::NONE;
496 m_aCurToken.nLevel = 5;
497 m_aCurToken.aText = aName;
499 SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos),"starmath", "identifier really finished? (compatibility!)");
502 else if (aRes.TokenType == 0 && '_' == m_aBufferString[ nRealStart ])
504 m_aCurToken.eType = TRSUB;
505 m_aCurToken.cMathChar = '\0';
506 m_aCurToken.nGroup = TG::Power;
507 m_aCurToken.nLevel = 0;
508 m_aCurToken.aText = "_";
510 aRes.EndPos = nRealStart + 1;
512 else if (aRes.TokenType & KParseType::BOOLEAN)
514 sal_Int32 &rnEndPos = aRes.EndPos;
515 if (rnEndPos - nRealStart <= 2)
517 sal_Unicode ch = m_aBufferString[ nRealStart ];
518 switch (ch)
520 case '<':
522 if (m_aBufferString.match("<<", nRealStart))
524 m_aCurToken.eType = TLL;
525 m_aCurToken.cMathChar = MS_LL;
526 m_aCurToken.nGroup = TG::Relation;
527 m_aCurToken.nLevel = 0;
528 m_aCurToken.aText = "<<";
530 rnEndPos = nRealStart + 2;
532 else if (m_aBufferString.match("<=", nRealStart))
534 m_aCurToken.eType = TLE;
535 m_aCurToken.cMathChar = MS_LE;
536 m_aCurToken.nGroup = TG::Relation;
537 m_aCurToken.nLevel = 0;
538 m_aCurToken.aText = "<=";
540 rnEndPos = nRealStart + 2;
542 else if (m_aBufferString.match("<-", nRealStart))
544 m_aCurToken.eType = TLEFTARROW;
545 m_aCurToken.cMathChar = MS_LEFTARROW;
546 m_aCurToken.nGroup = TG::Standalone;
547 m_aCurToken.nLevel = 5;
548 m_aCurToken.aText = "<-";
550 rnEndPos = nRealStart + 2;
552 else if (m_aBufferString.match("<>", nRealStart))
554 m_aCurToken.eType = TNEQ;
555 m_aCurToken.cMathChar = MS_NEQ;
556 m_aCurToken.nGroup = TG::Relation;
557 m_aCurToken.nLevel = 0;
558 m_aCurToken.aText = "<>";
560 rnEndPos = nRealStart + 2;
562 else if (m_aBufferString.match("<?>", nRealStart))
564 m_aCurToken.eType = TPLACE;
565 m_aCurToken.cMathChar = MS_PLACE;
566 m_aCurToken.nGroup = TG::NONE;
567 m_aCurToken.nLevel = 5;
568 m_aCurToken.aText = "<?>";
570 rnEndPos = nRealStart + 3;
572 else
574 m_aCurToken.eType = TLT;
575 m_aCurToken.cMathChar = MS_LT;
576 m_aCurToken.nGroup = TG::Relation;
577 m_aCurToken.nLevel = 0;
578 m_aCurToken.aText = "<";
581 break;
582 case '>':
584 if (m_aBufferString.match(">=", nRealStart))
586 m_aCurToken.eType = TGE;
587 m_aCurToken.cMathChar = MS_GE;
588 m_aCurToken.nGroup = TG::Relation;
589 m_aCurToken.nLevel = 0;
590 m_aCurToken.aText = ">=";
592 rnEndPos = nRealStart + 2;
594 else if (m_aBufferString.match(">>", nRealStart))
596 m_aCurToken.eType = TGG;
597 m_aCurToken.cMathChar = MS_GG;
598 m_aCurToken.nGroup = TG::Relation;
599 m_aCurToken.nLevel = 0;
600 m_aCurToken.aText = ">>";
602 rnEndPos = nRealStart + 2;
604 else
606 m_aCurToken.eType = TGT;
607 m_aCurToken.cMathChar = MS_GT;
608 m_aCurToken.nGroup = TG::Relation;
609 m_aCurToken.nLevel = 0;
610 m_aCurToken.aText = ">";
613 break;
614 default:
615 bHandled = false;
619 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
621 sal_Int32 &rnEndPos = aRes.EndPos;
622 if (rnEndPos - nRealStart == 1)
624 sal_Unicode ch = m_aBufferString[ nRealStart ];
625 switch (ch)
627 case '%':
629 //! modifies aRes.EndPos
631 OSL_ENSURE( rnEndPos >= nBufLen ||
632 '%' != m_aBufferString[ rnEndPos ],
633 "unexpected comment start" );
635 // get identifier of user-defined character
636 ParseResult aTmpRes = m_pSysCC->parseAnyToken(
637 m_aBufferString, rnEndPos,
638 KParseTokens::ANY_LETTER,
640 coUserDefinedCharContFlags,
641 "" );
643 sal_Int32 nTmpStart = rnEndPos + aTmpRes.LeadingWhiteSpace;
645 // default setting for the case that no identifier
646 // i.e. a valid symbol-name is following the '%'
647 // character
648 m_aCurToken.eType = TTEXT;
649 m_aCurToken.cMathChar = '\0';
650 m_aCurToken.nGroup = TG::NONE;
651 m_aCurToken.nLevel = 5;
652 m_aCurToken.aText ="%";
653 m_aCurToken.nRow = m_nRow;
654 m_aCurToken.nCol = nTmpStart - m_nColOff;
656 if (aTmpRes.TokenType & KParseType::IDENTNAME)
659 sal_Int32 n = aTmpRes.EndPos - nTmpStart;
660 m_aCurToken.eType = TSPECIAL;
661 m_aCurToken.aText = m_aBufferString.copy( nTmpStart-1, n+1 );
663 OSL_ENSURE( aTmpRes.EndPos > rnEndPos,
664 "empty identifier" );
665 if (aTmpRes.EndPos > rnEndPos)
666 rnEndPos = aTmpRes.EndPos;
667 else
668 ++rnEndPos;
671 // if no symbol-name was found we start-over with
672 // finding the next token right after the '%' sign.
673 // I.e. we leave rnEndPos unmodified.
675 break;
676 case '[':
678 m_aCurToken.eType = TLBRACKET;
679 m_aCurToken.cMathChar = MS_LBRACKET;
680 m_aCurToken.nGroup = TG::LBrace;
681 m_aCurToken.nLevel = 5;
682 m_aCurToken.aText = "[";
684 break;
685 case '\\':
687 m_aCurToken.eType = TESCAPE;
688 m_aCurToken.cMathChar = '\0';
689 m_aCurToken.nGroup = TG::NONE;
690 m_aCurToken.nLevel = 5;
691 m_aCurToken.aText = "\\";
693 break;
694 case ']':
696 m_aCurToken.eType = TRBRACKET;
697 m_aCurToken.cMathChar = MS_RBRACKET;
698 m_aCurToken.nGroup = TG::RBrace;
699 m_aCurToken.nLevel = 0;
700 m_aCurToken.aText = "]";
702 break;
703 case '^':
705 m_aCurToken.eType = TRSUP;
706 m_aCurToken.cMathChar = '\0';
707 m_aCurToken.nGroup = TG::Power;
708 m_aCurToken.nLevel = 0;
709 m_aCurToken.aText = "^";
711 break;
712 case '`':
714 m_aCurToken.eType = TSBLANK;
715 m_aCurToken.cMathChar = '\0';
716 m_aCurToken.nGroup = TG::Blank;
717 m_aCurToken.nLevel = 5;
718 m_aCurToken.aText = "`";
720 break;
721 case '{':
723 m_aCurToken.eType = TLGROUP;
724 m_aCurToken.cMathChar = MS_LBRACE;
725 m_aCurToken.nGroup = TG::NONE;
726 m_aCurToken.nLevel = 5;
727 m_aCurToken.aText = "{";
729 break;
730 case '|':
732 m_aCurToken.eType = TOR;
733 m_aCurToken.cMathChar = MS_OR;
734 m_aCurToken.nGroup = TG::Sum;
735 m_aCurToken.nLevel = 0;
736 m_aCurToken.aText = "|";
738 break;
739 case '}':
741 m_aCurToken.eType = TRGROUP;
742 m_aCurToken.cMathChar = MS_RBRACE;
743 m_aCurToken.nGroup = TG::NONE;
744 m_aCurToken.nLevel = 0;
745 m_aCurToken.aText = "}";
747 break;
748 case '~':
750 m_aCurToken.eType = TBLANK;
751 m_aCurToken.cMathChar = '\0';
752 m_aCurToken.nGroup = TG::Blank;
753 m_aCurToken.nLevel = 5;
754 m_aCurToken.aText = "~";
756 break;
757 case '#':
759 if (m_aBufferString.match("##", nRealStart))
761 m_aCurToken.eType = TDPOUND;
762 m_aCurToken.cMathChar = '\0';
763 m_aCurToken.nGroup = TG::NONE;
764 m_aCurToken.nLevel = 0;
765 m_aCurToken.aText = "##";
767 rnEndPos = nRealStart + 2;
769 else
771 m_aCurToken.eType = TPOUND;
772 m_aCurToken.cMathChar = '\0';
773 m_aCurToken.nGroup = TG::NONE;
774 m_aCurToken.nLevel = 0;
775 m_aCurToken.aText = "#";
778 break;
779 case '&':
781 m_aCurToken.eType = TAND;
782 m_aCurToken.cMathChar = MS_AND;
783 m_aCurToken.nGroup = TG::Product;
784 m_aCurToken.nLevel = 0;
785 m_aCurToken.aText = "&";
787 break;
788 case '(':
790 m_aCurToken.eType = TLPARENT;
791 m_aCurToken.cMathChar = MS_LPARENT;
792 m_aCurToken.nGroup = TG::LBrace;
793 m_aCurToken.nLevel = 5; //! 0 to continue expression
794 m_aCurToken.aText = "(";
796 break;
797 case ')':
799 m_aCurToken.eType = TRPARENT;
800 m_aCurToken.cMathChar = MS_RPARENT;
801 m_aCurToken.nGroup = TG::RBrace;
802 m_aCurToken.nLevel = 0; //! 0 to terminate expression
803 m_aCurToken.aText = ")";
805 break;
806 case '*':
808 m_aCurToken.eType = TMULTIPLY;
809 m_aCurToken.cMathChar = MS_MULTIPLY;
810 m_aCurToken.nGroup = TG::Product;
811 m_aCurToken.nLevel = 0;
812 m_aCurToken.aText = "*";
814 break;
815 case '+':
817 if (m_aBufferString.match("+-", nRealStart))
819 m_aCurToken.eType = TPLUSMINUS;
820 m_aCurToken.cMathChar = MS_PLUSMINUS;
821 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
822 m_aCurToken.nLevel = 5;
823 m_aCurToken.aText = "+-";
825 rnEndPos = nRealStart + 2;
827 else
829 m_aCurToken.eType = TPLUS;
830 m_aCurToken.cMathChar = MS_PLUS;
831 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
832 m_aCurToken.nLevel = 5;
833 m_aCurToken.aText = "+";
836 break;
837 case '-':
839 if (m_aBufferString.match("-+", nRealStart))
841 m_aCurToken.eType = TMINUSPLUS;
842 m_aCurToken.cMathChar = MS_MINUSPLUS;
843 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
844 m_aCurToken.nLevel = 5;
845 m_aCurToken.aText = "-+";
847 rnEndPos = nRealStart + 2;
849 else if (m_aBufferString.match("->", nRealStart))
851 m_aCurToken.eType = TRIGHTARROW;
852 m_aCurToken.cMathChar = MS_RIGHTARROW;
853 m_aCurToken.nGroup = TG::Standalone;
854 m_aCurToken.nLevel = 5;
855 m_aCurToken.aText = "->";
857 rnEndPos = nRealStart + 2;
859 else
861 m_aCurToken.eType = TMINUS;
862 m_aCurToken.cMathChar = MS_MINUS;
863 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
864 m_aCurToken.nLevel = 5;
865 m_aCurToken.aText = "-";
868 break;
869 case '.':
871 // Only one character? Then it can't be a number.
872 if (m_nBufferIndex < m_aBufferString.getLength() - 1)
874 // for compatibility with SO5.2
875 // texts like .34 ...56 ... h ...78..90
876 // will be treated as numbers
877 m_aCurToken.eType = TNUMBER;
878 m_aCurToken.cMathChar = '\0';
879 m_aCurToken.nGroup = TG::NONE;
880 m_aCurToken.nLevel = 5;
882 sal_Int32 nTxtStart = m_nBufferIndex;
883 sal_Unicode cChar;
884 // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only
887 cChar = m_aBufferString[ ++m_nBufferIndex ];
889 while ( (cChar == '.' || rtl::isAsciiDigit( cChar )) &&
890 ( m_nBufferIndex < m_aBufferString.getLength() - 1 ) );
892 m_aCurToken.aText = m_aBufferString.copy( nTxtStart, m_nBufferIndex - nTxtStart );
893 aRes.EndPos = m_nBufferIndex;
895 else
896 bHandled = false;
898 break;
899 case '/':
901 m_aCurToken.eType = TDIVIDEBY;
902 m_aCurToken.cMathChar = MS_SLASH;
903 m_aCurToken.nGroup = TG::Product;
904 m_aCurToken.nLevel = 0;
905 m_aCurToken.aText = "/";
907 break;
908 case '=':
910 m_aCurToken.eType = TASSIGN;
911 m_aCurToken.cMathChar = MS_ASSIGN;
912 m_aCurToken.nGroup = TG::Relation;
913 m_aCurToken.nLevel = 0;
914 m_aCurToken.aText = "=";
916 break;
917 default:
918 bHandled = false;
922 else
923 bHandled = false;
925 if (!bHandled)
927 m_aCurToken.eType = TCHARACTER;
928 m_aCurToken.cMathChar = '\0';
929 m_aCurToken.nGroup = TG::NONE;
930 m_aCurToken.nLevel = 5;
931 m_aCurToken.aText = m_aBufferString.copy( nRealStart, 1 );
933 aRes.EndPos = nRealStart + 1;
936 if (TEND != m_aCurToken.eType)
937 m_nBufferIndex = aRes.EndPos;
940 namespace
942 SmNodeArray buildNodeArray(std::vector<std::unique_ptr<SmNode>>& rSubNodes)
944 SmNodeArray aSubArray(rSubNodes.size());
945 for (size_t i = 0; i < rSubNodes.size(); ++i)
946 aSubArray[i] = rSubNodes[i].release();
947 return aSubArray;
951 // grammar
953 std::unique_ptr<SmTableNode> SmParser::DoTable()
955 DepthProtect aDepthGuard(m_nParseDepth);
956 if (aDepthGuard.TooDeep())
957 throw std::range_error("parser depth limit");
959 std::vector<std::unique_ptr<SmNode>> aLineArray;
960 aLineArray.push_back(DoLine());
961 while (m_aCurToken.eType == TNEWLINE)
963 NextToken();
964 aLineArray.push_back(DoLine());
966 assert(m_aCurToken.eType == TEND);
967 std::unique_ptr<SmTableNode> xSNode(new SmTableNode(m_aCurToken));
968 xSNode->SetSubNodes(buildNodeArray(aLineArray));
969 return xSNode;
972 std::unique_ptr<SmNode> SmParser::DoAlign(bool bUseExtraSpaces)
973 // parse alignment info (if any), then go on with rest of expression
975 DepthProtect aDepthGuard(m_nParseDepth);
976 if (aDepthGuard.TooDeep())
977 throw std::range_error("parser depth limit");
979 std::unique_ptr<SmStructureNode> xSNode;
981 if (TokenInGroup(TG::Align))
983 xSNode.reset(new SmAlignNode(m_aCurToken));
985 NextToken();
987 // allow for just one align statement in 5.0
988 if (TokenInGroup(TG::Align))
989 return std::unique_ptr<SmNode>(DoError(SmParseError::DoubleAlign));
992 auto pNode = DoExpression(bUseExtraSpaces);
994 if (xSNode)
996 xSNode->SetSubNode(0, pNode.release());
997 return std::move(xSNode); // this explicit move can be omitted since C++14
999 return pNode;
1002 // Postcondition: m_aCurToken.eType == TEND || m_aCurToken.eType == TNEWLINE
1003 std::unique_ptr<SmNode> SmParser::DoLine()
1005 DepthProtect aDepthGuard(m_nParseDepth);
1006 if (aDepthGuard.TooDeep())
1007 throw std::range_error("parser depth limit");
1009 std::vector<std::unique_ptr<SmNode>> ExpressionArray;
1011 // start with single expression that may have an alignment statement
1012 // (and go on with expressions that must not have alignment
1013 // statements in 'while' loop below. See also 'Expression()'.)
1014 if (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
1015 ExpressionArray.push_back(DoAlign());
1017 while (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
1018 ExpressionArray.push_back(DoExpression());
1020 //If there's no expression, add an empty one.
1021 //this is to avoid a formula tree without any caret
1022 //positions, in visual formula editor.
1023 if(ExpressionArray.empty())
1025 SmToken aTok = SmToken();
1026 aTok.eType = TNEWLINE;
1027 ExpressionArray.emplace_back(std::unique_ptr<SmNode>(new SmExpressionNode(aTok)));
1030 auto xSNode = o3tl::make_unique<SmLineNode>(m_aCurToken);
1031 xSNode->SetSubNodes(buildNodeArray(ExpressionArray));
1032 return std::move(xSNode); // this explicit move can be omitted since C++14
1035 std::unique_ptr<SmNode> SmParser::DoExpression(bool bUseExtraSpaces)
1037 DepthProtect aDepthGuard(m_nParseDepth);
1038 if (aDepthGuard.TooDeep())
1039 throw std::range_error("parser depth limit");
1041 std::vector<std::unique_ptr<SmNode>> RelationArray;
1042 RelationArray.push_back(DoRelation());
1043 while (m_aCurToken.nLevel >= 4)
1044 RelationArray.push_back(DoRelation());
1046 if (RelationArray.size() > 1)
1048 std::unique_ptr<SmExpressionNode> xSNode(new SmExpressionNode(m_aCurToken));
1049 xSNode->SetSubNodes(buildNodeArray(RelationArray));
1050 xSNode->SetUseExtraSpaces(bUseExtraSpaces);
1051 // the following explicit move can be omitted since C++14:
1052 // https://stackoverflow.com/questions/22018115/converting-stdunique-ptrderived-to-stdunique-ptrbase
1053 return std::move(xSNode);
1055 else
1057 // This expression has only one node so just push this node.
1058 return std::move(RelationArray[0]);
1062 std::unique_ptr<SmNode> SmParser::DoRelation()
1064 DepthProtect aDepthGuard(m_nParseDepth);
1065 if (aDepthGuard.TooDeep())
1066 throw std::range_error("parser depth limit");
1068 auto xFirst = DoSum();
1069 while (TokenInGroup(TG::Relation))
1071 std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1072 auto xSecond = DoOpSubSup();
1073 auto xThird = DoSum();
1074 xSNode->SetSubNodes(xFirst.release(), xSecond.release(), xThird.release());
1075 xFirst = std::move(xSNode);
1077 return xFirst;
1080 std::unique_ptr<SmNode> SmParser::DoSum()
1082 DepthProtect aDepthGuard(m_nParseDepth);
1083 if (aDepthGuard.TooDeep())
1084 throw std::range_error("parser depth limit");
1086 auto xFirst = DoProduct();
1087 while (TokenInGroup(TG::Sum))
1089 std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1090 auto xSecond = DoOpSubSup();
1091 auto xThird = DoProduct();
1092 xSNode->SetSubNodes(xFirst.release(), xSecond.release(), xThird.release());
1093 xFirst = std::move(xSNode);
1095 return xFirst;
1098 std::unique_ptr<SmNode> SmParser::DoProduct()
1100 DepthProtect aDepthGuard(m_nParseDepth);
1101 if (aDepthGuard.TooDeep())
1102 throw std::range_error("parser depth limit");
1104 auto xFirst = DoPower();
1106 int nDepthLimit = 0;
1108 while (TokenInGroup(TG::Product))
1110 //this linear loop builds a recursive structure, if it gets
1111 //too deep then later processing, e.g. releasing the tree,
1112 //can exhaust stack
1113 if (nDepthLimit > DEPTH_LIMIT)
1114 throw std::range_error("parser depth limit");
1116 std::unique_ptr<SmStructureNode> xSNode;
1117 std::unique_ptr<SmNode> xOper;
1118 bool bSwitchArgs = false;
1120 SmTokenType eType = m_aCurToken.eType;
1121 switch (eType)
1123 case TOVER:
1124 xSNode.reset(new SmBinVerNode(m_aCurToken));
1125 xOper.reset(new SmRectangleNode(m_aCurToken));
1126 NextToken();
1127 break;
1129 case TBOPER:
1130 xSNode.reset(new SmBinHorNode(m_aCurToken));
1132 NextToken();
1134 //Let the glyph node know it's a binary operation
1135 m_aCurToken.eType = TBOPER;
1136 m_aCurToken.nGroup = TG::Product;
1137 xOper.reset(DoGlyphSpecial());
1138 break;
1140 case TOVERBRACE :
1141 case TUNDERBRACE :
1142 xSNode.reset(new SmVerticalBraceNode(m_aCurToken));
1143 xOper.reset(new SmMathSymbolNode(m_aCurToken));
1145 NextToken();
1146 break;
1148 case TWIDEBACKSLASH:
1149 case TWIDESLASH:
1151 SmBinDiagonalNode *pSTmp = new SmBinDiagonalNode(m_aCurToken);
1152 pSTmp->SetAscending(eType == TWIDESLASH);
1153 xSNode.reset(pSTmp);
1155 xOper.reset(new SmPolyLineNode(m_aCurToken));
1156 NextToken();
1158 bSwitchArgs = true;
1159 break;
1162 default:
1163 xSNode.reset(new SmBinHorNode(m_aCurToken));
1165 xOper = DoOpSubSup();
1168 auto xArg = DoPower();
1170 if (bSwitchArgs)
1172 //! vgl siehe SmBinDiagonalNode::Arrange
1173 xSNode->SetSubNodes(xFirst.release(), xArg.release(), xOper.release());
1175 else
1177 xSNode->SetSubNodes(xFirst.release(), xOper.release(), xArg.release());
1179 xFirst = std::move(xSNode);
1180 ++nDepthLimit;
1182 return xFirst;
1185 std::unique_ptr<SmNode> SmParser::DoSubSup(TG nActiveGroup, SmNode *pGivenNode)
1187 std::unique_ptr<SmNode> xGivenNode(pGivenNode);
1188 DepthProtect aDepthGuard(m_nParseDepth);
1189 if (aDepthGuard.TooDeep())
1190 throw std::range_error("parser depth limit");
1192 assert(nActiveGroup == TG::Power || nActiveGroup == TG::Limit);
1193 assert(m_aCurToken.nGroup == nActiveGroup);
1195 std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
1196 //! Of course 'm_aCurToken' is just the first sub-/supscript token.
1197 //! It should be of no further interest. The positions of the
1198 //! sub-/supscripts will be identified by the corresponding subnodes
1199 //! index in the 'aSubNodes' array (enum value from 'SmSubSup').
1201 pNode->SetUseLimits(nActiveGroup == TG::Limit);
1203 // initialize subnodes array
1204 std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
1205 aSubNodes[0] = std::move(xGivenNode);
1207 // process all sub-/supscripts
1208 int nIndex = 0;
1209 while (TokenInGroup(nActiveGroup))
1211 SmTokenType eType (m_aCurToken.eType);
1213 switch (eType)
1215 case TRSUB : nIndex = static_cast<int>(RSUB); break;
1216 case TRSUP : nIndex = static_cast<int>(RSUP); break;
1217 case TFROM :
1218 case TCSUB : nIndex = static_cast<int>(CSUB); break;
1219 case TTO :
1220 case TCSUP : nIndex = static_cast<int>(CSUP); break;
1221 case TLSUB : nIndex = static_cast<int>(LSUB); break;
1222 case TLSUP : nIndex = static_cast<int>(LSUP); break;
1223 default :
1224 SAL_WARN( "starmath", "unknown case");
1226 nIndex++;
1227 assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES);
1229 std::unique_ptr<SmNode> xENode;
1230 if (aSubNodes[nIndex]) // if already occupied at earlier iteration
1232 // forget the earlier one, remember an error instead
1233 aSubNodes[nIndex].reset();
1234 xENode.reset(DoError(SmParseError::DoubleSubsupscript)); // this also skips current token.
1236 else
1238 // skip sub-/supscript token
1239 NextToken();
1242 // get sub-/supscript node
1243 // (even when we saw a double-sub/supscript error in the above
1244 // in order to minimize mess and continue parsing.)
1245 std::unique_ptr<SmNode> xSNode;
1246 if (eType == TFROM || eType == TTO)
1248 // parse limits in old 4.0 and 5.0 style
1249 xSNode = DoRelation();
1251 else
1252 xSNode.reset(DoTerm(true));
1254 aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
1257 pNode->SetSubNodes(buildNodeArray(aSubNodes));
1258 return std::move(pNode); // this explicit move can be omitted since C++14
1261 std::unique_ptr<SmNode> SmParser::DoOpSubSup()
1263 DepthProtect aDepthGuard(m_nParseDepth);
1264 if (aDepthGuard.TooDeep())
1265 throw std::range_error("parser depth limit");
1267 // get operator symbol
1268 auto pNode = o3tl::make_unique<SmMathSymbolNode>(m_aCurToken);
1269 // skip operator token
1270 NextToken();
1271 // get sub- supscripts if any
1272 if (m_aCurToken.nGroup == TG::Power)
1273 return DoSubSup(TG::Power, pNode.release());
1274 return std::move(pNode); // this explicit move can be omitted since C++14
1277 std::unique_ptr<SmNode> SmParser::DoPower()
1279 DepthProtect aDepthGuard(m_nParseDepth);
1280 if (aDepthGuard.TooDeep())
1281 throw std::range_error("parser depth limit");
1283 // get body for sub- supscripts on top of stack
1284 std::unique_ptr<SmNode> xNode(DoTerm(false));
1286 if (m_aCurToken.nGroup == TG::Power)
1287 return DoSubSup(TG::Power, xNode.release());
1288 return xNode;
1291 SmBlankNode *SmParser::DoBlank()
1293 DepthProtect aDepthGuard(m_nParseDepth);
1294 if (aDepthGuard.TooDeep())
1295 throw std::range_error("parser depth limit");
1297 assert(TokenInGroup(TG::Blank));
1298 std::unique_ptr<SmBlankNode> pBlankNode(new SmBlankNode(m_aCurToken));
1302 pBlankNode->IncreaseBy(m_aCurToken);
1303 NextToken();
1305 while (TokenInGroup(TG::Blank));
1307 // Ignore trailing spaces, if corresponding option is set
1308 if ( m_aCurToken.eType == TNEWLINE ||
1309 (m_aCurToken.eType == TEND && !utl::ConfigManager::IsFuzzing() && SM_MOD()->GetConfig()->IsIgnoreSpacesRight()) )
1311 pBlankNode->Clear();
1313 return pBlankNode.release();
1316 SmNode *SmParser::DoTerm(bool bGroupNumberIdent)
1318 DepthProtect aDepthGuard(m_nParseDepth);
1319 if (aDepthGuard.TooDeep())
1320 throw std::range_error("parser depth limit");
1322 switch (m_aCurToken.eType)
1324 case TESCAPE :
1325 return DoEscape();
1327 case TNOSPACE :
1328 case TLGROUP :
1330 bool bNoSpace = m_aCurToken.eType == TNOSPACE;
1331 if (bNoSpace)
1332 NextToken();
1333 if (m_aCurToken.eType != TLGROUP)
1334 return DoTerm(false); // nospace is no longer concerned
1336 NextToken();
1338 // allow for empty group
1339 if (m_aCurToken.eType == TRGROUP)
1341 std::unique_ptr<SmStructureNode> xSNode(new SmExpressionNode(m_aCurToken));
1342 xSNode->SetSubNodes(nullptr, nullptr);
1344 NextToken();
1345 return xSNode.release();
1348 auto pNode = DoAlign(!bNoSpace);
1349 if (m_aCurToken.eType == TRGROUP) {
1350 NextToken();
1351 return pNode.release();
1353 auto xSNode = o3tl::make_unique<SmExpressionNode>(m_aCurToken);
1354 std::unique_ptr<SmNode> xError(DoError(SmParseError::RgroupExpected));
1355 xSNode->SetSubNodes(pNode.release(), xError.release());
1356 return xSNode.release();
1359 case TLEFT :
1360 return DoBrace();
1362 case TBLANK :
1363 case TSBLANK :
1364 return DoBlank();
1366 case TTEXT :
1368 auto pNode = o3tl::make_unique<SmTextNode>(m_aCurToken, FNT_TEXT);
1369 NextToken();
1370 return pNode.release();
1372 case TCHARACTER :
1374 auto pNode = o3tl::make_unique<SmTextNode>(m_aCurToken, FNT_VARIABLE);
1375 NextToken();
1376 return pNode.release();
1378 case TIDENT :
1379 case TNUMBER :
1381 auto pTextNode = o3tl::make_unique<SmTextNode>(m_aCurToken,
1382 m_aCurToken.eType == TNUMBER ?
1383 FNT_NUMBER :
1384 FNT_VARIABLE);
1385 if (!bGroupNumberIdent)
1387 NextToken();
1388 return pTextNode.release();
1390 std::vector<std::unique_ptr<SmNode>> aNodes;
1391 // Some people want to be able to write "x_2n" for "x_{2n}"
1392 // although e.g. LaTeX or AsciiMath interpret that as "x_2 n".
1393 // The tokenizer skips whitespaces so we need some additional
1394 // work to distinguish from "x_2 n".
1395 // See https://bz.apache.org/ooo/show_bug.cgi?id=11752 and
1396 // https://bugs.libreoffice.org/show_bug.cgi?id=55853
1397 sal_Int32 nBufLen = m_aBufferString.getLength();
1399 // We need to be careful to call NextToken() only after having
1400 // tested for a whitespace separator (otherwise it will be
1401 // skipped!)
1402 bool moveToNextToken = true;
1403 while (m_nBufferIndex < nBufLen &&
1404 m_pSysCC->getType(m_aBufferString, m_nBufferIndex) !=
1405 UnicodeType::SPACE_SEPARATOR)
1407 NextToken();
1408 if (m_aCurToken.eType != TNUMBER &&
1409 m_aCurToken.eType != TIDENT)
1411 // Neither a number nor an identifier. We just moved to
1412 // the next token, so no need to do that again.
1413 moveToNextToken = false;
1414 break;
1416 aNodes.emplace_back(std::unique_ptr<SmNode>(new SmTextNode(m_aCurToken,
1417 m_aCurToken.eType ==
1418 TNUMBER ?
1419 FNT_NUMBER :
1420 FNT_VARIABLE)));
1422 if (moveToNextToken)
1423 NextToken();
1424 if (aNodes.empty())
1425 return pTextNode.release();
1426 // We have several concatenated identifiers and numbers.
1427 // Let's group them into one SmExpressionNode.
1428 aNodes.insert(aNodes.begin(), std::move(pTextNode));
1429 std::unique_ptr<SmExpressionNode> xNode(new SmExpressionNode(SmToken()));
1430 xNode->SetSubNodes(buildNodeArray(aNodes));
1431 return xNode.release();
1433 case TLEFTARROW :
1434 case TRIGHTARROW :
1435 case TUPARROW :
1436 case TDOWNARROW :
1437 case TCIRC :
1438 case TDRARROW :
1439 case TDLARROW :
1440 case TDLRARROW :
1441 case TEXISTS :
1442 case TNOTEXISTS :
1443 case TFORALL :
1444 case TPARTIAL :
1445 case TNABLA :
1446 case TTOWARD :
1447 case TDOTSAXIS :
1448 case TDOTSDIAG :
1449 case TDOTSDOWN :
1450 case TDOTSLOW :
1451 case TDOTSUP :
1452 case TDOTSVERT :
1454 auto pNode = o3tl::make_unique<SmMathSymbolNode>(m_aCurToken);
1455 NextToken();
1456 return pNode.release();
1459 case TSETN :
1460 case TSETZ :
1461 case TSETQ :
1462 case TSETR :
1463 case TSETC :
1464 case THBAR :
1465 case TLAMBDABAR :
1466 case TBACKEPSILON :
1467 case TALEPH :
1468 case TIM :
1469 case TRE :
1470 case TWP :
1471 case TEMPTYSET :
1472 case TINFINITY :
1474 auto pNode = o3tl::make_unique<SmMathIdentifierNode>(m_aCurToken);
1475 NextToken();
1476 return pNode.release();
1479 case TPLACE:
1481 auto pNode = o3tl::make_unique<SmPlaceNode>(m_aCurToken);
1482 NextToken();
1483 return pNode.release();
1486 case TSPECIAL:
1487 return DoSpecial();
1489 case TBINOM:
1490 return DoBinom();
1492 case TSTACK:
1493 return DoStack();
1495 case TMATRIX:
1496 return DoMatrix();
1498 default:
1499 if (TokenInGroup(TG::LBrace))
1500 return DoBrace();
1501 if (TokenInGroup(TG::Oper))
1502 return DoOperator();
1503 if (TokenInGroup(TG::UnOper))
1504 return DoUnOper();
1505 if ( TokenInGroup(TG::Attribute) ||
1506 TokenInGroup(TG::FontAttr) )
1508 std::stack<std::unique_ptr<SmStructureNode>> aStack;
1509 bool bIsAttr;
1510 while ( (bIsAttr = TokenInGroup(TG::Attribute))
1511 || TokenInGroup(TG::FontAttr))
1512 aStack.push(bIsAttr ? DoAttribut() : DoFontAttribut());
1514 auto xFirstNode = DoPower();
1515 while (!aStack.empty())
1517 std::unique_ptr<SmStructureNode> xNode = std::move(aStack.top());
1518 aStack.pop();
1519 xNode->SetSubNodes(nullptr, xFirstNode.release());
1520 xFirstNode = std::move(xNode);
1522 return xFirstNode.release();
1524 if (TokenInGroup(TG::Function))
1525 return DoFunction();
1526 return DoError(SmParseError::UnexpectedChar);
1530 SmNode *SmParser::DoEscape()
1532 DepthProtect aDepthGuard(m_nParseDepth);
1533 if (aDepthGuard.TooDeep())
1534 throw std::range_error("parser depth limit");
1536 NextToken();
1538 switch (m_aCurToken.eType)
1540 case TLPARENT :
1541 case TRPARENT :
1542 case TLBRACKET :
1543 case TRBRACKET :
1544 case TLDBRACKET :
1545 case TRDBRACKET :
1546 case TLBRACE :
1547 case TLGROUP :
1548 case TRBRACE :
1549 case TRGROUP :
1550 case TLANGLE :
1551 case TRANGLE :
1552 case TLCEIL :
1553 case TRCEIL :
1554 case TLFLOOR :
1555 case TRFLOOR :
1556 case TLLINE :
1557 case TRLINE :
1558 case TLDLINE :
1559 case TRDLINE :
1561 auto pNode = o3tl::make_unique<SmMathSymbolNode>(m_aCurToken);
1562 NextToken();
1563 return pNode.release();
1565 default:
1566 return DoError(SmParseError::UnexpectedToken);
1570 SmOperNode *SmParser::DoOperator()
1572 DepthProtect aDepthGuard(m_nParseDepth);
1573 if (aDepthGuard.TooDeep())
1574 throw std::range_error("parser depth limit");
1576 assert(TokenInGroup(TG::Oper));
1578 auto xSNode = o3tl::make_unique<SmOperNode>(m_aCurToken);
1580 // get operator
1581 auto xOperator = DoOper();
1583 if (m_aCurToken.nGroup == TG::Limit || m_aCurToken.nGroup == TG::Power)
1584 xOperator = DoSubSup(m_aCurToken.nGroup, xOperator.release());
1586 // get argument
1587 auto xArg = DoPower();
1589 xSNode->SetSubNodes(xOperator.release(), xArg.release());
1590 return xSNode.release();
1593 std::unique_ptr<SmNode> SmParser::DoOper()
1595 DepthProtect aDepthGuard(m_nParseDepth);
1596 if (aDepthGuard.TooDeep())
1597 throw std::range_error("parser depth limit");
1599 SmTokenType eType (m_aCurToken.eType);
1600 std::unique_ptr<SmNode> pNode;
1602 switch (eType)
1604 case TSUM :
1605 case TPROD :
1606 case TCOPROD :
1607 case TINT :
1608 case TINTD :
1609 case TIINT :
1610 case TIIINT :
1611 case TLINT :
1612 case TLLINT :
1613 case TLLLINT :
1614 pNode.reset(new SmMathSymbolNode(m_aCurToken));
1615 break;
1617 case TLIM :
1618 case TLIMSUP :
1619 case TLIMINF :
1621 const sal_Char* pLim = nullptr;
1622 switch (eType)
1624 case TLIM : pLim = "lim"; break;
1625 case TLIMSUP : pLim = "lim sup"; break;
1626 case TLIMINF : pLim = "lim inf"; break;
1627 default:
1628 break;
1630 if( pLim )
1631 m_aCurToken.aText = OUString::createFromAscii(pLim);
1632 pNode.reset(new SmTextNode(m_aCurToken, FNT_TEXT));
1634 break;
1636 case TOPER :
1637 NextToken();
1639 OSL_ENSURE(m_aCurToken.eType == TSPECIAL, "Sm: wrong token");
1640 pNode.reset(new SmGlyphSpecialNode(m_aCurToken));
1641 break;
1643 default :
1644 assert(false && "unknown case");
1647 NextToken();
1648 return pNode;
1651 SmStructureNode *SmParser::DoUnOper()
1653 DepthProtect aDepthGuard(m_nParseDepth);
1654 if (aDepthGuard.TooDeep())
1655 throw std::range_error("parser depth limit");
1657 assert(TokenInGroup(TG::UnOper));
1659 SmToken aNodeToken = m_aCurToken;
1660 SmTokenType eType = m_aCurToken.eType;
1661 bool bIsPostfix = eType == TFACT;
1663 std::unique_ptr<SmStructureNode> xSNode;
1664 std::unique_ptr<SmNode> xOper;
1665 std::unique_ptr<SmNode> xExtra;
1666 std::unique_ptr<SmNode> xArg;
1668 switch (eType)
1670 case TABS :
1671 case TSQRT :
1672 NextToken();
1673 break;
1675 case TNROOT :
1676 NextToken();
1677 xExtra = DoPower();
1678 break;
1680 case TUOPER :
1681 NextToken();
1682 //Let the glyph know what it is...
1683 m_aCurToken.eType = TUOPER;
1684 m_aCurToken.nGroup = TG::UnOper;
1685 xOper.reset(DoGlyphSpecial());
1686 break;
1688 case TPLUS :
1689 case TMINUS :
1690 case TPLUSMINUS :
1691 case TMINUSPLUS :
1692 case TNEG :
1693 case TFACT :
1694 xOper = DoOpSubSup();
1695 break;
1697 default :
1698 assert(false);
1701 // get argument
1702 xArg = DoPower();
1704 if (eType == TABS)
1706 xSNode.reset(new SmBraceNode(aNodeToken));
1707 xSNode->SetScaleMode(SmScaleMode::Height);
1709 // build nodes for left & right lines
1710 // (text, group, level of the used token are of no interest here)
1711 // we'll use row & column of the keyword for abs
1712 aNodeToken.eType = TABS;
1714 aNodeToken.cMathChar = MS_VERTLINE;
1715 std::unique_ptr<SmNode> xLeft(new SmMathSymbolNode(aNodeToken));
1717 aNodeToken.cMathChar = MS_VERTLINE;
1718 std::unique_ptr<SmNode> xRight(new SmMathSymbolNode(aNodeToken));
1720 xSNode->SetSubNodes(xLeft.release(), xArg.release(), xRight.release());
1722 else if (eType == TSQRT || eType == TNROOT)
1724 xSNode.reset(new SmRootNode(aNodeToken));
1725 xOper.reset(new SmRootSymbolNode(aNodeToken));
1726 xSNode->SetSubNodes(xExtra.release(), xOper.release(), xArg.release());
1728 else
1730 xSNode.reset(new SmUnHorNode(aNodeToken));
1731 if (bIsPostfix)
1732 xSNode->SetSubNodes(xArg.release(), xOper.release());
1733 else
1735 // prefix operator
1736 xSNode->SetSubNodes(xOper.release(), xArg.release());
1739 return xSNode.release();
1742 std::unique_ptr<SmStructureNode> SmParser::DoAttribut()
1744 DepthProtect aDepthGuard(m_nParseDepth);
1745 if (aDepthGuard.TooDeep())
1746 throw std::range_error("parser depth limit");
1748 assert(TokenInGroup(TG::Attribute));
1750 auto xSNode = o3tl::make_unique<SmAttributNode>(m_aCurToken);
1751 std::unique_ptr<SmNode> xAttr;
1752 SmScaleMode eScaleMode = SmScaleMode::None;
1754 // get appropriate node for the attribute itself
1755 switch (m_aCurToken.eType)
1756 { case TUNDERLINE :
1757 case TOVERLINE :
1758 case TOVERSTRIKE :
1759 xAttr.reset(new SmRectangleNode(m_aCurToken));
1760 eScaleMode = SmScaleMode::Width;
1761 break;
1763 case TWIDEVEC :
1764 case TWIDEHAT :
1765 case TWIDETILDE :
1766 xAttr.reset(new SmMathSymbolNode(m_aCurToken));
1767 eScaleMode = SmScaleMode::Width;
1768 break;
1770 default :
1771 xAttr.reset(new SmMathSymbolNode(m_aCurToken));
1774 NextToken();
1776 xSNode->SetSubNodes(xAttr.release(), nullptr); // the body will be filled later
1777 xSNode->SetScaleMode(eScaleMode);
1778 return std::move(xSNode); // this explicit move can be omitted since C++14
1781 std::unique_ptr<SmStructureNode> SmParser::DoFontAttribut()
1783 DepthProtect aDepthGuard(m_nParseDepth);
1784 if (aDepthGuard.TooDeep())
1785 throw std::range_error("parser depth limit");
1787 assert(TokenInGroup(TG::FontAttr));
1789 switch (m_aCurToken.eType)
1791 case TITALIC :
1792 case TNITALIC :
1793 case TBOLD :
1794 case TNBOLD :
1795 case TPHANTOM :
1797 auto pNode = o3tl::make_unique<SmFontNode>(m_aCurToken);
1798 NextToken();
1799 return std::move(pNode); // this explicit move can be omitted since C++14
1802 case TSIZE :
1803 return DoFontSize();
1805 case TFONT :
1806 return DoFont();
1808 case TCOLOR :
1809 return DoColor();
1811 default :
1812 assert(false);
1813 return {};
1817 std::unique_ptr<SmStructureNode> SmParser::DoColor()
1819 DepthProtect aDepthGuard(m_nParseDepth);
1820 if (aDepthGuard.TooDeep())
1821 throw std::range_error("parser depth limit");
1823 assert(m_aCurToken.eType == TCOLOR);
1825 std::unique_ptr<SmStructureNode> xNode;
1826 // last color rules, get that one
1827 SmToken aToken;
1829 { NextToken();
1831 if (TokenInGroup(TG::Color))
1832 { aToken = m_aCurToken;
1833 NextToken();
1835 else
1837 xNode.reset(DoError(SmParseError::ColorExpected));
1838 return xNode;
1840 } while (m_aCurToken.eType == TCOLOR);
1842 xNode.reset(new SmFontNode(aToken));
1843 return xNode;
1846 std::unique_ptr<SmStructureNode> SmParser::DoFont()
1848 DepthProtect aDepthGuard(m_nParseDepth);
1849 if (aDepthGuard.TooDeep())
1850 throw std::range_error("parser depth limit");
1852 assert(m_aCurToken.eType == TFONT);
1854 std::unique_ptr<SmStructureNode> xNode;
1855 // last font rules, get that one
1856 SmToken aToken;
1858 { NextToken();
1860 if (TokenInGroup(TG::Font))
1861 { aToken = m_aCurToken;
1862 NextToken();
1864 else
1866 xNode.reset(DoError(SmParseError::FontExpected));
1867 return xNode;
1869 } while (m_aCurToken.eType == TFONT);
1871 xNode.reset(new SmFontNode(aToken));
1872 return xNode;
1876 // gets number used as arguments in Math formulas (e.g. 'size' command)
1877 // Format: no negative numbers, must start with a digit, no exponent notation, ...
1878 static bool lcl_IsNumber(const OUString& rText)
1880 bool bPoint = false;
1881 const sal_Unicode* pBuffer = rText.getStr();
1882 for(sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
1884 const sal_Unicode cChar = *pBuffer;
1885 if(cChar == '.')
1887 if(bPoint)
1888 return false;
1889 else
1890 bPoint = true;
1892 else if ( !rtl::isAsciiDigit( cChar ) )
1893 return false;
1895 return true;
1898 std::unique_ptr<SmStructureNode> SmParser::DoFontSize()
1900 DepthProtect aDepthGuard(m_nParseDepth);
1901 if (aDepthGuard.TooDeep())
1902 throw std::range_error("parser depth limit");
1904 assert(m_aCurToken.eType == TSIZE);
1906 FontSizeType Type;
1907 std::unique_ptr<SmFontNode> pFontNode(new SmFontNode(m_aCurToken));
1909 NextToken();
1911 switch (m_aCurToken.eType)
1913 case TNUMBER: Type = FontSizeType::ABSOLUT; break;
1914 case TPLUS: Type = FontSizeType::PLUS; break;
1915 case TMINUS: Type = FontSizeType::MINUS; break;
1916 case TMULTIPLY: Type = FontSizeType::MULTIPLY; break;
1917 case TDIVIDEBY: Type = FontSizeType::DIVIDE; break;
1919 default:
1920 return std::unique_ptr<SmStructureNode>(DoError(SmParseError::SizeExpected));
1923 if (Type != FontSizeType::ABSOLUT)
1925 NextToken();
1926 if (m_aCurToken.eType != TNUMBER)
1927 return std::unique_ptr<SmStructureNode>(DoError(SmParseError::SizeExpected));
1930 // get number argument
1931 Fraction aValue( 1 );
1932 if (lcl_IsNumber( m_aCurToken.aText ))
1934 double fTmp = m_aCurToken.aText.toDouble();
1935 if (fTmp != 0.0)
1937 aValue = fTmp;
1939 //!! keep the numerator and denominator from being to large
1940 //!! otherwise ongoing multiplications may result in overflows
1941 //!! (for example in SmNode::SetFontSize the font size calculated
1942 //!! may become 0 because of this!!! Happens e.g. for ftmp = 2.9 with Linux
1943 //!! or ftmp = 1.11111111111111111... (11/9) on every platform.)
1944 if (aValue.GetDenominator() > 1000)
1946 long nNum = aValue.GetNumerator();
1947 long nDenom = aValue.GetDenominator();
1948 while (nDenom > 1000)
1950 nNum /= 10;
1951 nDenom /= 10;
1953 aValue = Fraction( nNum, nDenom );
1958 NextToken();
1960 pFontNode->SetSizeParameter(aValue, Type);
1961 return std::move(pFontNode); // this explicit move can be omitted since C++14
1964 SmStructureNode *SmParser::DoBrace()
1966 DepthProtect aDepthGuard(m_nParseDepth);
1967 if (aDepthGuard.TooDeep())
1968 throw std::range_error("parser depth limit");
1970 assert(m_aCurToken.eType == TLEFT || TokenInGroup(TG::LBrace));
1972 std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
1973 std::unique_ptr<SmNode> pBody, pLeft, pRight;
1974 SmScaleMode eScaleMode = SmScaleMode::None;
1975 SmParseError eError = SmParseError::None;
1977 if (m_aCurToken.eType == TLEFT)
1978 { NextToken();
1980 eScaleMode = SmScaleMode::Height;
1982 // check for left bracket
1983 if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
1985 pLeft.reset(new SmMathSymbolNode(m_aCurToken));
1987 NextToken();
1988 pBody.reset(DoBracebody(true));
1990 if (m_aCurToken.eType == TRIGHT)
1991 { NextToken();
1993 // check for right bracket
1994 if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
1996 pRight.reset(new SmMathSymbolNode(m_aCurToken));
1997 NextToken();
1999 else
2000 eError = SmParseError::RbraceExpected;
2002 else
2003 eError = SmParseError::RightExpected;
2005 else
2006 eError = SmParseError::LbraceExpected;
2008 else
2010 assert(TokenInGroup(TG::LBrace));
2012 pLeft.reset(new SmMathSymbolNode(m_aCurToken));
2014 NextToken();
2015 pBody.reset(DoBracebody(false));
2017 SmTokenType eExpectedType = TUNKNOWN;
2018 switch (pLeft->GetToken().eType)
2019 { case TLPARENT : eExpectedType = TRPARENT; break;
2020 case TLBRACKET : eExpectedType = TRBRACKET; break;
2021 case TLBRACE : eExpectedType = TRBRACE; break;
2022 case TLDBRACKET : eExpectedType = TRDBRACKET; break;
2023 case TLLINE : eExpectedType = TRLINE; break;
2024 case TLDLINE : eExpectedType = TRDLINE; break;
2025 case TLANGLE : eExpectedType = TRANGLE; break;
2026 case TLFLOOR : eExpectedType = TRFLOOR; break;
2027 case TLCEIL : eExpectedType = TRCEIL; break;
2028 default :
2029 SAL_WARN("starmath", "unknown case");
2032 if (m_aCurToken.eType == eExpectedType)
2034 pRight.reset(new SmMathSymbolNode(m_aCurToken));
2035 NextToken();
2037 else
2038 eError = SmParseError::ParentMismatch;
2041 if (eError == SmParseError::None)
2043 assert(pLeft);
2044 assert(pRight);
2045 xSNode->SetSubNodes(pLeft.release(), pBody.release(), pRight.release());
2046 xSNode->SetScaleMode(eScaleMode);
2047 return xSNode.release();
2049 return DoError(eError);
2052 SmBracebodyNode *SmParser::DoBracebody(bool bIsLeftRight)
2054 DepthProtect aDepthGuard(m_nParseDepth);
2055 if (aDepthGuard.TooDeep())
2056 throw std::range_error("parser depth limit");
2058 auto pBody = o3tl::make_unique<SmBracebodyNode>(m_aCurToken);
2060 std::vector<std::unique_ptr<SmNode>> aNodes;
2061 // get body if any
2062 if (bIsLeftRight)
2066 if (m_aCurToken.eType == TMLINE)
2068 aNodes.emplace_back(o3tl::make_unique<SmMathSymbolNode>(m_aCurToken));
2069 NextToken();
2071 else if (m_aCurToken.eType != TRIGHT)
2073 aNodes.push_back(DoAlign());
2074 if (m_aCurToken.eType != TMLINE && m_aCurToken.eType != TRIGHT)
2075 aNodes.emplace_back(std::unique_ptr<SmNode>(DoError(SmParseError::RightExpected)));
2077 } while (m_aCurToken.eType != TEND && m_aCurToken.eType != TRIGHT);
2079 else
2083 if (m_aCurToken.eType == TMLINE)
2085 aNodes.emplace_back(o3tl::make_unique<SmMathSymbolNode>(m_aCurToken));
2086 NextToken();
2088 else if (!TokenInGroup(TG::RBrace))
2090 aNodes.push_back(DoAlign());
2091 if (m_aCurToken.eType != TMLINE && !TokenInGroup(TG::RBrace))
2092 aNodes.emplace_back(std::unique_ptr<SmNode>(DoError(SmParseError::RbraceExpected)));
2094 } while (m_aCurToken.eType != TEND && !TokenInGroup(TG::RBrace));
2097 pBody->SetSubNodes(buildNodeArray(aNodes));
2098 pBody->SetScaleMode(bIsLeftRight ? SmScaleMode::Height : SmScaleMode::None);
2099 return pBody.release();
2102 SmTextNode *SmParser::DoFunction()
2104 DepthProtect aDepthGuard(m_nParseDepth);
2105 if (aDepthGuard.TooDeep())
2106 throw std::range_error("parser depth limit");
2108 switch (m_aCurToken.eType)
2110 case TFUNC:
2111 NextToken(); // skip "FUNC"-statement
2112 SAL_FALLTHROUGH;
2114 case TSIN :
2115 case TCOS :
2116 case TTAN :
2117 case TCOT :
2118 case TASIN :
2119 case TACOS :
2120 case TATAN :
2121 case TACOT :
2122 case TSINH :
2123 case TCOSH :
2124 case TTANH :
2125 case TCOTH :
2126 case TASINH :
2127 case TACOSH :
2128 case TATANH :
2129 case TACOTH :
2130 case TLN :
2131 case TLOG :
2132 case TEXP :
2134 auto pNode = o3tl::make_unique<SmTextNode>(m_aCurToken, FNT_FUNCTION);
2135 NextToken();
2136 return pNode.release();
2139 default:
2140 assert(false);
2141 return nullptr;
2145 SmTableNode *SmParser::DoBinom()
2147 DepthProtect aDepthGuard(m_nParseDepth);
2148 if (aDepthGuard.TooDeep())
2149 throw std::range_error("parser depth limit");
2151 auto xSNode = o3tl::make_unique<SmTableNode>(m_aCurToken);
2153 NextToken();
2155 auto xFirst = DoSum();
2156 auto xSecond = DoSum();
2157 xSNode->SetSubNodes(xFirst.release(), xSecond.release());
2158 return xSNode.release();
2161 SmStructureNode *SmParser::DoStack()
2163 DepthProtect aDepthGuard(m_nParseDepth);
2164 if (aDepthGuard.TooDeep())
2165 throw std::range_error("parser depth limit");
2167 std::unique_ptr<SmStructureNode> xSNode(new SmTableNode(m_aCurToken));
2168 NextToken();
2169 if (m_aCurToken.eType != TLGROUP)
2170 return DoError(SmParseError::LgroupExpected);
2171 std::vector<std::unique_ptr<SmNode>> aExprArr;
2174 NextToken();
2175 aExprArr.push_back(DoAlign());
2177 while (m_aCurToken.eType == TPOUND);
2179 if (m_aCurToken.eType == TRGROUP)
2180 NextToken();
2181 else
2182 aExprArr.emplace_back(std::unique_ptr<SmNode>(DoError(SmParseError::RgroupExpected)));
2184 xSNode->SetSubNodes(buildNodeArray(aExprArr));
2185 return xSNode.release();
2188 SmStructureNode *SmParser::DoMatrix()
2190 DepthProtect aDepthGuard(m_nParseDepth);
2191 if (aDepthGuard.TooDeep())
2192 throw std::range_error("parser depth limit");
2194 std::unique_ptr<SmMatrixNode> xMNode(new SmMatrixNode(m_aCurToken));
2195 NextToken();
2196 if (m_aCurToken.eType != TLGROUP)
2197 return DoError(SmParseError::LgroupExpected);
2199 std::vector<std::unique_ptr<SmNode>> aExprArr;
2202 NextToken();
2203 aExprArr.push_back(DoAlign());
2205 while (m_aCurToken.eType == TPOUND);
2207 size_t nCol = aExprArr.size();
2208 size_t nRow = 1;
2209 while (m_aCurToken.eType == TDPOUND)
2211 NextToken();
2212 for (size_t i = 0; i < nCol; i++)
2214 auto xNode = DoAlign();
2215 if (i < (nCol - 1))
2217 if (m_aCurToken.eType == TPOUND)
2218 NextToken();
2219 else
2220 xNode.reset(DoError(SmParseError::PoundExpected));
2222 aExprArr.emplace_back(std::move(xNode));
2224 ++nRow;
2227 if (m_aCurToken.eType == TRGROUP)
2228 NextToken();
2229 else
2231 std::unique_ptr<SmNode> xENode(DoError(SmParseError::RgroupExpected));
2232 if (aExprArr.empty())
2233 nRow = nCol = 1;
2234 else
2235 aExprArr.pop_back();
2236 aExprArr.emplace_back(std::move(xENode));
2239 xMNode->SetSubNodes(buildNodeArray(aExprArr));
2240 xMNode->SetRowCol(static_cast<sal_uInt16>(nRow),
2241 static_cast<sal_uInt16>(nCol));
2242 return xMNode.release();
2245 SmSpecialNode *SmParser::DoSpecial()
2247 DepthProtect aDepthGuard(m_nParseDepth);
2248 if (aDepthGuard.TooDeep())
2249 throw std::range_error("parser depth limit");
2251 bool bReplace = false;
2252 OUString &rName = m_aCurToken.aText;
2253 OUString aNewName;
2255 // conversion of symbol names for 6.0 (XML) file format
2256 // (name change on import / export.
2257 // UI uses localized names XML file format does not.)
2258 if( rName.startsWith("%") )
2260 if (IsImportSymbolNames())
2262 aNewName = SmLocalizedSymbolData::GetUiSymbolName(rName.copy(1));
2263 bReplace = true;
2265 else if (IsExportSymbolNames())
2267 aNewName = SmLocalizedSymbolData::GetExportSymbolName(rName.copy(1));
2268 bReplace = true;
2271 if (!aNewName.isEmpty())
2272 aNewName = "%" + aNewName;
2275 if (bReplace && !aNewName.isEmpty() && rName != aNewName)
2277 Replace(GetTokenIndex(), rName.getLength(), aNewName);
2278 rName = aNewName;
2281 // add symbol name to list of used symbols
2282 const OUString aSymbolName(m_aCurToken.aText.copy(1));
2283 if (!aSymbolName.isEmpty())
2284 m_aUsedSymbols.insert( aSymbolName );
2286 auto pNode = o3tl::make_unique<SmSpecialNode>(m_aCurToken);
2287 NextToken();
2288 return pNode.release();
2291 SmGlyphSpecialNode *SmParser::DoGlyphSpecial()
2293 DepthProtect aDepthGuard(m_nParseDepth);
2294 if (aDepthGuard.TooDeep())
2295 throw std::range_error("parser depth limit");
2297 auto pNode = o3tl::make_unique<SmGlyphSpecialNode>(m_aCurToken);
2298 NextToken();
2299 return pNode.release();
2302 SmExpressionNode *SmParser::DoError(SmParseError eError)
2304 DepthProtect aDepthGuard(m_nParseDepth);
2305 if (aDepthGuard.TooDeep())
2306 throw std::range_error("parser depth limit");
2308 auto xSNode = o3tl::make_unique<SmExpressionNode>(m_aCurToken);
2309 SmErrorNode *pErr = new SmErrorNode(m_aCurToken);
2310 xSNode->SetSubNodes(pErr, nullptr);
2312 AddError(eError, xSNode.get());
2314 NextToken();
2316 return xSNode.release();
2319 // end grammar
2322 SmParser::SmParser()
2323 : m_nCurError( 0 )
2324 , m_nBufferIndex( 0 )
2325 , m_nTokenIndex( 0 )
2326 , m_nRow( 0 )
2327 , m_nColOff( 0 )
2328 , m_bImportSymNames( false )
2329 , m_bExportSymNames( false )
2330 , m_nParseDepth(0)
2331 , m_aNumCC( LanguageTag( LANGUAGE_ENGLISH_US ) )
2332 , m_pSysCC( SM_MOD()->GetSysLocale().GetCharClassPtr() )
2336 std::unique_ptr<SmTableNode> SmParser::Parse(const OUString &rBuffer)
2338 m_aUsedSymbols.clear();
2340 m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2341 m_nBufferIndex = 0;
2342 m_nTokenIndex = 0;
2343 m_nRow = 1;
2344 m_nColOff = 0;
2345 m_nCurError = -1;
2347 m_aErrDescList.clear();
2349 NextToken();
2350 return DoTable();
2353 std::unique_ptr<SmNode> SmParser::ParseExpression(const OUString &rBuffer)
2355 m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2356 m_nBufferIndex = 0;
2357 m_nTokenIndex = 0;
2358 m_nRow = 1;
2359 m_nColOff = 0;
2360 m_nCurError = -1;
2362 m_aErrDescList.clear();
2364 NextToken();
2365 return DoExpression();
2369 void SmParser::AddError(SmParseError Type, SmNode *pNode)
2371 std::unique_ptr<SmErrorDesc> pErrDesc(new SmErrorDesc);
2373 pErrDesc->m_eType = Type;
2374 pErrDesc->m_pNode = pNode;
2375 pErrDesc->m_aText = SmResId(RID_ERR_IDENT);
2377 const char* pRID;
2378 switch (Type)
2380 case SmParseError::UnexpectedChar: pRID = RID_ERR_UNEXPECTEDCHARACTER; break;
2381 case SmParseError::UnexpectedToken: pRID = RID_ERR_UNEXPECTEDTOKEN; break;
2382 case SmParseError::PoundExpected: pRID = RID_ERR_POUNDEXPECTED; break;
2383 case SmParseError::ColorExpected: pRID = RID_ERR_COLOREXPECTED; break;
2384 case SmParseError::LgroupExpected: pRID = RID_ERR_LGROUPEXPECTED; break;
2385 case SmParseError::RgroupExpected: pRID = RID_ERR_RGROUPEXPECTED; break;
2386 case SmParseError::LbraceExpected: pRID = RID_ERR_LBRACEEXPECTED; break;
2387 case SmParseError::RbraceExpected: pRID = RID_ERR_RBRACEEXPECTED; break;
2388 case SmParseError::ParentMismatch: pRID = RID_ERR_PARENTMISMATCH; break;
2389 case SmParseError::RightExpected: pRID = RID_ERR_RIGHTEXPECTED; break;
2390 case SmParseError::FontExpected: pRID = RID_ERR_FONTEXPECTED; break;
2391 case SmParseError::SizeExpected: pRID = RID_ERR_SIZEEXPECTED; break;
2392 case SmParseError::DoubleAlign: pRID = RID_ERR_DOUBLEALIGN; break;
2393 case SmParseError::DoubleSubsupscript: pRID = RID_ERR_DOUBLESUBSUPSCRIPT; break;
2394 default:
2395 assert(false);
2396 return;
2398 pErrDesc->m_aText += SmResId(pRID);
2400 m_aErrDescList.push_back(std::move(pErrDesc));
2404 const SmErrorDesc *SmParser::NextError()
2406 if ( !m_aErrDescList.empty() )
2407 if (m_nCurError > 0) return m_aErrDescList[ --m_nCurError ].get();
2408 else
2410 m_nCurError = 0;
2411 return m_aErrDescList[ m_nCurError ].get();
2413 else return nullptr;
2417 const SmErrorDesc *SmParser::PrevError()
2419 if ( !m_aErrDescList.empty() )
2420 if (m_nCurError < static_cast<int>(m_aErrDescList.size() - 1)) return m_aErrDescList[ ++m_nCurError ].get();
2421 else
2423 m_nCurError = static_cast<int>(m_aErrDescList.size() - 1);
2424 return m_aErrDescList[ m_nCurError ].get();
2426 else return nullptr;
2430 const SmErrorDesc *SmParser::GetError()
2432 if ( !m_aErrDescList.empty() )
2433 return m_aErrDescList.front().get();
2434 return nullptr;
2437 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */