Bump version to 21.06.18.1
[LibreOffice.git] / starmath / source / parse.cxx
bloba2c4e338251729d3e95b20e9e742a54594695864
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <memory>
21 #include <com/sun/star/i18n/UnicodeType.hpp>
22 #include <com/sun/star/i18n/KParseTokens.hpp>
23 #include <com/sun/star/i18n/KParseType.hpp>
24 #include <i18nlangtag/lang.h>
25 #include <tools/lineend.hxx>
26 #include <unotools/configmgr.hxx>
27 #include <unotools/syslocale.hxx>
28 #include <sal/log.hxx>
29 #include <osl/diagnose.h>
30 #include <rtl/character.hxx>
31 #include <node.hxx>
32 #include <parse.hxx>
33 #include <strings.hrc>
34 #include <smmod.hxx>
35 #include "cfgitem.hxx"
36 #include <cassert>
37 #include <stack>
38 #include <starmathdatabase.hxx>
40 using namespace ::com::sun::star::i18n;
42 //Definition of math keywords
43 const SmTokenTableEntry aTokenTable[] =
45 { "abs", TABS, '\0', TG::UnOper, 13 },
46 { "acute", TACUTE, MS_ACUTE, TG::Attribute, 5 },
47 { "aleph" , TALEPH, MS_ALEPH, TG::Standalone, 5 },
48 { "alignb", TALIGNC, '\0', TG::Align, 0},
49 { "alignc", TALIGNC, '\0', TG::Align, 0},
50 { "alignl", TALIGNL, '\0', TG::Align, 0},
51 { "alignm", TALIGNC, '\0', TG::Align, 0},
52 { "alignr", TALIGNR, '\0', TG::Align, 0},
53 { "alignt", TALIGNC, '\0', TG::Align, 0},
54 { "and", TAND, MS_AND, TG::Product, 0},
55 { "approx", TAPPROX, MS_APPROX, TG::Relation, 0},
56 { "arccos", TACOS, '\0', TG::Function, 5},
57 { "arccot", TACOT, '\0', TG::Function, 5},
58 { "arcosh", TACOSH, '\0', TG::Function, 5 },
59 { "arcoth", TACOTH, '\0', TG::Function, 5 },
60 { "arcsin", TASIN, '\0', TG::Function, 5},
61 { "arctan", TATAN, '\0', TG::Function, 5},
62 { "arsinh", TASINH, '\0', TG::Function, 5},
63 { "artanh", TATANH, '\0', TG::Function, 5},
64 { "backepsilon" , TBACKEPSILON, MS_BACKEPSILON, TG::Standalone, 5},
65 { "bar", TBAR, MS_BAR, TG::Attribute, 5},
66 { "binom", TBINOM, '\0', TG::NONE, 5 },
67 { "bold", TBOLD, '\0', TG::FontAttr, 5},
68 { "boper", TBOPER, '\0', TG::Product, 0},
69 { "breve", TBREVE, MS_BREVE, TG::Attribute, 5},
70 { "bslash", TBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
71 { "cdot", TCDOT, MS_CDOT, TG::Product, 0},
72 { "check", TCHECK, MS_CHECK, TG::Attribute, 5},
73 { "circ" , TCIRC, MS_CIRC, TG::Standalone, 5},
74 { "circle", TCIRCLE, MS_CIRCLE, TG::Attribute, 5},
75 { "color", TCOLOR, '\0', TG::FontAttr, 5},
76 { "coprod", TCOPROD, MS_COPROD, TG::Oper, 5},
77 { "cos", TCOS, '\0', TG::Function, 5},
78 { "cosh", TCOSH, '\0', TG::Function, 5},
79 { "cot", TCOT, '\0', TG::Function, 5},
80 { "coth", TCOTH, '\0', TG::Function, 5},
81 { "csub", TCSUB, '\0', TG::Power, 0},
82 { "csup", TCSUP, '\0', TG::Power, 0},
83 { "dddot", TDDDOT, MS_DDDOT, TG::Attribute, 5},
84 { "ddot", TDDOT, MS_DDOT, TG::Attribute, 5},
85 { "def", TDEF, MS_DEF, TG::Relation, 0},
86 { "div", TDIV, MS_DIV, TG::Product, 0},
87 { "divides", TDIVIDES, MS_LINE, TG::Relation, 0},
88 { "dlarrow" , TDLARROW, MS_DLARROW, TG::Standalone, 5},
89 { "dlrarrow" , TDLRARROW, MS_DLRARROW, TG::Standalone, 5},
90 { "dot", TDOT, MS_DOT, TG::Attribute, 5},
91 { "dotsaxis", TDOTSAXIS, MS_DOTSAXIS, TG::Standalone, 5}, // 5 to continue expression
92 { "dotsdiag", TDOTSDIAG, MS_DOTSUP, TG::Standalone, 5},
93 { "dotsdown", TDOTSDOWN, MS_DOTSDOWN, TG::Standalone, 5},
94 { "dotslow", TDOTSLOW, MS_DOTSLOW, TG::Standalone, 5},
95 { "dotsup", TDOTSUP, MS_DOTSUP, TG::Standalone, 5},
96 { "dotsvert", TDOTSVERT, MS_DOTSVERT, TG::Standalone, 5},
97 { "downarrow" , TDOWNARROW, MS_DOWNARROW, TG::Standalone, 5},
98 { "drarrow" , TDRARROW, MS_DRARROW, TG::Standalone, 5},
99 { "emptyset" , TEMPTYSET, MS_EMPTYSET, TG::Standalone, 5},
100 { "equiv", TEQUIV, MS_EQUIV, TG::Relation, 0},
101 { "evaluate", TEVALUATE, '\0', TG::NONE, 0},
102 { "exists", TEXISTS, MS_EXISTS, TG::Standalone, 5},
103 { "exp", TEXP, '\0', TG::Function, 5},
104 { "fact", TFACT, MS_FACT, TG::UnOper, 5},
105 { "fixed", TFIXED, '\0', TG::Font, 0},
106 { "font", TFONT, '\0', TG::FontAttr, 5},
107 { "forall", TFORALL, MS_FORALL, TG::Standalone, 5},
108 { "fourier", TFOURIER, MS_FOURIER, TG::Standalone, 5},
109 { "frac", TFRAC, '\0', TG::NONE, 5},
110 { "from", TFROM, '\0', TG::Limit, 0},
111 { "func", TFUNC, '\0', TG::Function, 5},
112 { "ge", TGE, MS_GE, TG::Relation, 0},
113 { "geslant", TGESLANT, MS_GESLANT, TG::Relation, 0 },
114 { "gg", TGG, MS_GG, TG::Relation, 0},
115 { "grave", TGRAVE, MS_GRAVE, TG::Attribute, 5},
116 { "gt", TGT, MS_GT, TG::Relation, 0},
117 { "harpoon", THARPOON, MS_HARPOON, TG::Attribute, 5},
118 { "hat", THAT, MS_HAT, TG::Attribute, 5},
119 { "hbar" , THBAR, MS_HBAR, TG::Standalone, 5},
120 { "iiint", TIIINT, MS_IIINT, TG::Oper, 5},
121 { "iint", TIINT, MS_IINT, TG::Oper, 5},
122 { "im" , TIM, MS_IM, TG::Standalone, 5 },
123 { "in", TIN, MS_IN, TG::Relation, 0},
124 { "infinity" , TINFINITY, MS_INFINITY, TG::Standalone, 5},
125 { "infty" , TINFINITY, MS_INFINITY, TG::Standalone, 5},
126 { "int", TINT, MS_INT, TG::Oper, 5},
127 { "intd", TINTD, MS_INT, TG::Oper, 5},
128 { "intersection", TINTERSECT, MS_INTERSECT, TG::Product, 0},
129 { "it", TIT, '\0', TG::Product, 0},
130 { "ital", TITALIC, '\0', TG::FontAttr, 5},
131 { "italic", TITALIC, '\0', TG::FontAttr, 5},
132 { "lambdabar" , TLAMBDABAR, MS_LAMBDABAR, TG::Standalone, 5},
133 { "langle", TLANGLE, MS_LMATHANGLE, TG::LBrace, 5},
134 { "laplace", TLAPLACE, MS_LAPLACE, TG::Standalone, 5},
135 { "lbrace", TLBRACE, MS_LBRACE, TG::LBrace, 5},
136 { "lceil", TLCEIL, MS_LCEIL, TG::LBrace, 5},
137 { "ldbracket", TLDBRACKET, MS_LDBRACKET, TG::LBrace, 5},
138 { "ldline", TLDLINE, MS_DVERTLINE, TG::LBrace, 5},
139 { "le", TLE, MS_LE, TG::Relation, 0},
140 { "left", TLEFT, '\0', TG::NONE, 5},
141 { "leftarrow" , TLEFTARROW, MS_LEFTARROW, TG::Standalone, 5},
142 { "leslant", TLESLANT, MS_LESLANT, TG::Relation, 0 },
143 { "lfloor", TLFLOOR, MS_LFLOOR, TG::LBrace, 5},
144 { "lim", TLIM, '\0', TG::Oper, 5},
145 { "liminf", TLIMINF, '\0', TG::Oper, 5},
146 { "limsup", TLIMSUP, '\0', TG::Oper, 5},
147 { "lint", TLINT, MS_LINT, TG::Oper, 5},
148 { "ll", TLL, MS_LL, TG::Relation, 0},
149 { "lline", TLLINE, MS_VERTLINE, TG::LBrace, 5},
150 { "llint", TLLINT, MS_LLINT, TG::Oper, 5},
151 { "lllint", TLLLINT, MS_LLLINT, TG::Oper, 5},
152 { "ln", TLN, '\0', TG::Function, 5},
153 { "log", TLOG, '\0', TG::Function, 5},
154 { "lrline", TLRLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5},
155 { "lrdline", TLRDLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5},
156 { "lsub", TLSUB, '\0', TG::Power, 0},
157 { "lsup", TLSUP, '\0', TG::Power, 0},
158 { "lt", TLT, MS_LT, TG::Relation, 0},
159 { "matrix", TMATRIX, '\0', TG::NONE, 5},
160 { "minusplus", TMINUSPLUS, MS_MINUSPLUS, TG::UnOper | TG::Sum, 5},
161 { "mline", TMLINE, MS_VERTLINE, TG::NONE, 0}, //! not in TG::RBrace, Level 0
162 { "nabla", TNABLA, MS_NABLA, TG::Standalone, 5},
163 { "nbold", TNBOLD, '\0', TG::FontAttr, 5},
164 { "ndivides", TNDIVIDES, MS_NDIVIDES, TG::Relation, 0},
165 { "neg", TNEG, MS_NEG, TG::UnOper, 5 },
166 { "neq", TNEQ, MS_NEQ, TG::Relation, 0},
167 { "newline", TNEWLINE, '\0', TG::NONE, 0},
168 { "ni", TNI, MS_NI, TG::Relation, 0},
169 { "nitalic", TNITALIC, '\0', TG::FontAttr, 5},
170 { "none", TNONE, '\0', TG::LBrace | TG::RBrace, 0},
171 { "nospace", TNOSPACE, '\0', TG::Standalone, 5},
172 { "notexists", TNOTEXISTS, MS_NOTEXISTS, TG::Standalone, 5},
173 { "notin", TNOTIN, MS_NOTIN, TG::Relation, 0},
174 { "nprec", TNOTPRECEDES, MS_NOTPRECEDES, TG::Relation, 0 },
175 { "nroot", TNROOT, MS_SQRT, TG::UnOper, 5},
176 { "nsubset", TNSUBSET, MS_NSUBSET, TG::Relation, 0 },
177 { "nsubseteq", TNSUBSETEQ, MS_NSUBSETEQ, TG::Relation, 0 },
178 { "nsucc", TNOTSUCCEEDS, MS_NOTSUCCEEDS, TG::Relation, 0 },
179 { "nsupset", TNSUPSET, MS_NSUPSET, TG::Relation, 0 },
180 { "nsupseteq", TNSUPSETEQ, MS_NSUPSETEQ, TG::Relation, 0 },
181 { "odivide", TODIVIDE, MS_ODIVIDE, TG::Product, 0},
182 { "odot", TODOT, MS_ODOT, TG::Product, 0},
183 { "ominus", TOMINUS, MS_OMINUS, TG::Sum, 0},
184 { "oper", TOPER, '\0', TG::Oper, 5},
185 { "oplus", TOPLUS, MS_OPLUS, TG::Sum, 0},
186 { "or", TOR, MS_OR, TG::Sum, 0},
187 { "ortho", TORTHO, MS_ORTHO, TG::Relation, 0},
188 { "otimes", TOTIMES, MS_OTIMES, TG::Product, 0},
189 { "over", TOVER, '\0', TG::Product, 0},
190 { "overbrace", TOVERBRACE, MS_OVERBRACE, TG::Product, 5},
191 { "overline", TOVERLINE, '\0', TG::Attribute, 5},
192 { "overstrike", TOVERSTRIKE, '\0', TG::Attribute, 5},
193 { "owns", TNI, MS_NI, TG::Relation, 0},
194 { "parallel", TPARALLEL, MS_DLINE, TG::Relation, 0},
195 { "partial", TPARTIAL, MS_PARTIAL, TG::Standalone, 5 },
196 { "phantom", TPHANTOM, '\0', TG::FontAttr, 5},
197 { "plusminus", TPLUSMINUS, MS_PLUSMINUS, TG::UnOper | TG::Sum, 5},
198 { "prec", TPRECEDES, MS_PRECEDES, TG::Relation, 0 },
199 { "preccurlyeq", TPRECEDESEQUAL, MS_PRECEDESEQUAL, TG::Relation, 0 },
200 { "precsim", TPRECEDESEQUIV, MS_PRECEDESEQUIV, TG::Relation, 0 },
201 { "prod", TPROD, MS_PROD, TG::Oper, 5},
202 { "prop", TPROP, MS_PROP, TG::Relation, 0},
203 { "rangle", TRANGLE, MS_RMATHANGLE, TG::RBrace, 0}, //! 0 to terminate expression
204 { "rbrace", TRBRACE, MS_RBRACE, TG::RBrace, 0},
205 { "rceil", TRCEIL, MS_RCEIL, TG::RBrace, 0},
206 { "rdbracket", TRDBRACKET, MS_RDBRACKET, TG::RBrace, 0},
207 { "rdline", TRDLINE, MS_DVERTLINE, TG::RBrace, 0},
208 { "re" , TRE, MS_RE, TG::Standalone, 5 },
209 { "rfloor", TRFLOOR, MS_RFLOOR, TG::RBrace, 0}, //! 0 to terminate expression
210 { "right", TRIGHT, '\0', TG::NONE, 0},
211 { "rightarrow" , TRIGHTARROW, MS_RIGHTARROW, TG::Standalone, 5},
212 { "rline", TRLINE, MS_VERTLINE, TG::RBrace, 0}, //! 0 to terminate expression
213 { "rsub", TRSUB, '\0', TG::Power, 0},
214 { "rsup", TRSUP, '\0', TG::Power, 0},
215 { "sans", TSANS, '\0', TG::Font, 0},
216 { "serif", TSERIF, '\0', TG::Font, 0},
217 { "setC" , TSETC, MS_SETC, TG::Standalone, 5},
218 { "setminus", TSETMINUS, MS_BACKSLASH, TG::Product, 0 },
219 { "setN" , TSETN, MS_SETN, TG::Standalone, 5},
220 { "setQ" , TSETQ, MS_SETQ, TG::Standalone, 5},
221 { "setquotient", TSETQUOTIENT, MS_SLASH, TG::Product, 0 },
222 { "setR" , TSETR, MS_SETR, TG::Standalone, 5},
223 { "setZ" , TSETZ, MS_SETZ, TG::Standalone, 5},
224 { "sim", TSIM, MS_SIM, TG::Relation, 0},
225 { "simeq", TSIMEQ, MS_SIMEQ, TG::Relation, 0},
226 { "sin", TSIN, '\0', TG::Function, 5},
227 { "sinh", TSINH, '\0', TG::Function, 5},
228 { "size", TSIZE, '\0', TG::FontAttr, 5},
229 { "slash", TSLASH, MS_SLASH, TG::Product, 0 },
230 { "sqrt", TSQRT, MS_SQRT, TG::UnOper, 5},
231 { "stack", TSTACK, '\0', TG::NONE, 5},
232 { "sub", TRSUB, '\0', TG::Power, 0},
233 { "subset", TSUBSET, MS_SUBSET, TG::Relation, 0},
234 { "subseteq", TSUBSETEQ, MS_SUBSETEQ, TG::Relation, 0},
235 { "succ", TSUCCEEDS, MS_SUCCEEDS, TG::Relation, 0 },
236 { "succcurlyeq", TSUCCEEDSEQUAL, MS_SUCCEEDSEQUAL, TG::Relation, 0 },
237 { "succsim", TSUCCEEDSEQUIV, MS_SUCCEEDSEQUIV, TG::Relation, 0 },
238 { "sum", TSUM, MS_SUM, TG::Oper, 5},
239 { "sup", TRSUP, '\0', TG::Power, 0},
240 { "supset", TSUPSET, MS_SUPSET, TG::Relation, 0},
241 { "supseteq", TSUPSETEQ, MS_SUPSETEQ, TG::Relation, 0},
242 { "tan", TTAN, '\0', TG::Function, 5},
243 { "tanh", TTANH, '\0', TG::Function, 5},
244 { "tilde", TTILDE, MS_TILDE, TG::Attribute, 5},
245 { "times", TTIMES, MS_TIMES, TG::Product, 0},
246 { "to", TTO, '\0', TG::Limit, 0},
247 { "toward", TTOWARD, MS_RIGHTARROW, TG::Relation, 0},
248 { "transl", TTRANSL, MS_TRANSL, TG::Relation, 0},
249 { "transr", TTRANSR, MS_TRANSR, TG::Relation, 0},
250 { "underbrace", TUNDERBRACE, MS_UNDERBRACE, TG::Product, 5},
251 { "underline", TUNDERLINE, '\0', TG::Attribute, 5},
252 { "union", TUNION, MS_UNION, TG::Sum, 0},
253 { "uoper", TUOPER, '\0', TG::UnOper, 5},
254 { "uparrow" , TUPARROW, MS_UPARROW, TG::Standalone, 5},
255 { "vec", TVEC, MS_VEC, TG::Attribute, 5},
256 { "widebslash", TWIDEBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
257 { "wideharpoon", TWIDEHARPOON, MS_HARPOON, TG::Attribute, 5},
258 { "widehat", TWIDEHAT, MS_HAT, TG::Attribute, 5},
259 { "wideslash", TWIDESLASH, MS_SLASH, TG::Product, 0 },
260 { "widetilde", TWIDETILDE, MS_TILDE, TG::Attribute, 5},
261 { "widevec", TWIDEVEC, MS_VEC, TG::Attribute, 5},
262 { "wp" , TWP, MS_WP, TG::Standalone, 5}
265 // First character may be any alphabetic
266 const sal_Int32 coStartFlags = KParseTokens::ANY_LETTER | KParseTokens::IGNORE_LEADING_WS;
268 // Continuing characters may be any alphabetic
269 const sal_Int32 coContFlags = (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
270 | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
271 // First character for numbers, may be any numeric or dot
272 const sal_Int32 coNumStartFlags = KParseTokens::ASC_DIGIT | KParseTokens::ASC_DOT
273 | KParseTokens::IGNORE_LEADING_WS;
274 // Continuing characters for numbers, may be any numeric or dot or comma.
275 // tdf#127873: additionally accept ',' comma group separator as too many
276 // existing documents unwittingly may have used that as decimal separator
277 // in such locales (though it never was as this is always the en-US locale
278 // and the group separator is only parsed away).
279 const sal_Int32 coNumContFlags = (coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
280 | KParseTokens::GROUP_SEPARATOR_IN_NUMBER;
281 // First character for numbers hexadecimal
282 const sal_Int32 coNum16StartFlags = KParseTokens::ASC_DIGIT | KParseTokens::ASC_UPALPHA
283 | KParseTokens::IGNORE_LEADING_WS;
285 // Continuing characters for numbers hexadecimal
286 const sal_Int32 coNum16ContFlags = (coNum16StartFlags & ~KParseTokens::IGNORE_LEADING_WS);
287 // user-defined char continuing characters may be any alphanumeric or dot.
288 const sal_Int32 coUserDefinedCharContFlags = KParseTokens::ANY_LETTER_OR_NUMBER
289 | KParseTokens::ASC_DOT
290 | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
292 //Checks if keyword is in the list.
293 static inline bool findCompare(const SmTokenTableEntry & lhs, const OUString & s)
295 return s.compareToIgnoreAsciiCaseAscii(lhs.pIdent) > 0;
298 //Returns the SmTokenTableEntry for a keyword
299 static const SmTokenTableEntry * GetTokenTableEntry( const OUString &rName )
301 if (rName.isEmpty())return nullptr; //avoid null pointer exceptions
302 //Looks for the first keyword after or equal to rName in alphabetical order.
303 auto findIter = std::lower_bound( std::begin(aTokenTable),
304 std::end(aTokenTable), rName, findCompare );
305 if ( findIter != std::end(aTokenTable) && rName.equalsIgnoreAsciiCaseAscii( findIter->pIdent ))
306 return &*findIter; //check is equal
307 return nullptr; //not found
310 static bool IsDelimiter( const OUString &rTxt, sal_Int32 nPos )
311 { // returns 'true' iff cChar is '\0' or a delimiter
313 assert(nPos <= rTxt.getLength()); //index out of range
314 if (nPos == rTxt.getLength())return true; //This is EOF
315 sal_Unicode cChar = rTxt[nPos];
317 // check if 'cChar' is in the delimiter table
318 static const sal_Unicode aDelimiterTable[] =
320 ' ', '{', '}', '(', ')', '\t', '\n', '\r', '+', '-',
321 '*', '/', '=', '[', ']', '^', '_', '#',
322 '%', '>', '<', '&', '|', '\\', '"', '~', '`'
323 };//reordered by usage (by eye) for nanoseconds saving.
325 //checks the array
326 for (auto const &cDelimiter : aDelimiterTable)
328 if (cDelimiter == cChar)return true;
331 //special chars support
332 sal_Int16 nTypJp = SM_MOD()->GetSysLocale().GetCharClass().getType( rTxt, nPos );
333 return ( nTypJp == css::i18n::UnicodeType::SPACE_SEPARATOR ||
334 nTypJp == css::i18n::UnicodeType::CONTROL);
337 // checks number used as arguments in Math formulas (e.g. 'size' command)
338 // Format: no negative numbers, must start with a digit, no exponent notation, ...
339 static bool lcl_IsNumber(const OUString& rText)
341 bool bPoint = false;
342 const sal_Unicode* pBuffer = rText.getStr();
343 for(sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
345 const sal_Unicode cChar = *pBuffer;
346 if(cChar == '.')
348 if(bPoint) return false;
349 else bPoint = true;
351 else if ( !rtl::isAsciiDigit( cChar ) ) return false;
353 return true;
355 // checks number used as arguments in Math formulas (e.g. 'size' command)
356 // Format: no negative numbers, must start with a digit, no exponent notation, ...
357 static bool lcl_IsNotWholeNumber(const OUString& rText)
359 const sal_Unicode* pBuffer = rText.getStr();
360 for(sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
361 if ( !rtl::isAsciiDigit( *pBuffer ) ) return true;
362 return false;
364 // checks hex number used as arguments in Math formulas (e.g. 'hex' command)
365 // Format: no negative numbers, must start with a digit, no exponent notation, ...
366 static bool lcl_IsNotWholeNumber16(const OUString& rText)
368 const sal_Unicode* pBuffer = rText.getStr();
369 for(sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
370 if ( !rtl::isAsciiCanonicHexDigit( *pBuffer ) ) return true;
371 return false;
374 //Text replace onto m_aBufferString
375 void SmParser::Replace( sal_Int32 nPos, sal_Int32 nLen, const OUString &rText )
377 assert( nPos + nLen <= m_aBufferString.getLength() ); //checks if length allows text replace
379 m_aBufferString = m_aBufferString.replaceAt( nPos, nLen, rText ); //replace and reindex
380 sal_Int32 nChg = rText.getLength() - nLen;
381 m_nBufferIndex = m_nBufferIndex + nChg;
382 m_nTokenIndex = m_nTokenIndex + nChg;
385 void SmParser::NextToken() //Central part of the parser
388 sal_Int32 nBufLen = m_aBufferString.getLength();
389 ParseResult aRes;
390 sal_Int32 nRealStart;
391 bool bCont;
394 // skip white spaces
395 while (UnicodeType::SPACE_SEPARATOR ==
396 m_pSysCC->getType( m_aBufferString, m_nBufferIndex ))
397 ++m_nBufferIndex;
399 // Try to parse a number in a locale-independent manner using
400 // '.' as decimal separator.
401 // See https://bz.apache.org/ooo/show_bug.cgi?id=45779
402 aRes = m_aNumCC.parsePredefinedToken(KParseType::ASC_NUMBER,
403 m_aBufferString, m_nBufferIndex,
404 coNumStartFlags, "",
405 coNumContFlags, "");
407 if (aRes.TokenType == 0)
409 // Try again with the default token parsing.
410 aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex,
411 coStartFlags, "",
412 coContFlags, "");
415 nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
416 m_nBufferIndex = nRealStart;
418 bCont = false;
419 if ( aRes.TokenType == 0 &&
420 nRealStart < nBufLen &&
421 '\n' == m_aBufferString[ nRealStart ] )
423 // keep data needed for tokens row and col entry up to date
424 ++m_nRow;
425 m_nBufferIndex = m_nColOff = nRealStart + 1;
426 bCont = true;
428 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
430 if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
432 //SkipComment
433 m_nBufferIndex = nRealStart + 2;
434 while (m_nBufferIndex < nBufLen &&
435 '\n' != m_aBufferString[ m_nBufferIndex ])
436 ++m_nBufferIndex;
437 bCont = true;
441 } while (bCont);
443 // set index of current token
444 m_nTokenIndex = m_nBufferIndex;
446 m_aCurToken.nRow = m_nRow;
447 m_aCurToken.nCol = nRealStart - m_nColOff + 1;
449 bool bHandled = true;
450 if (nRealStart >= nBufLen)
452 m_aCurToken.eType = TEND;
453 m_aCurToken.cMathChar = '\0';
454 m_aCurToken.nGroup = TG::NONE;
455 m_aCurToken.nLevel = 0;
456 m_aCurToken.aText.clear();
458 else if (aRes.TokenType & KParseType::ANY_NUMBER)
460 assert(aRes.EndPos > 0);
461 if ( m_aBufferString[aRes.EndPos-1] == ',' &&
462 aRes.EndPos < nBufLen &&
463 m_pSysCC->getType( m_aBufferString, aRes.EndPos ) != UnicodeType::SPACE_SEPARATOR )
465 // Comma followed by a non-space char is unlikely for decimal/thousands separator.
466 --aRes.EndPos;
468 sal_Int32 n = aRes.EndPos - nRealStart;
469 assert(n >= 0);
470 m_aCurToken.eType = TNUMBER;
471 m_aCurToken.cMathChar = '\0';
472 m_aCurToken.nGroup = TG::NONE;
473 m_aCurToken.nLevel = 5;
474 m_aCurToken.aText = m_aBufferString.copy( nRealStart, n );
476 SAL_WARN_IF( !IsDelimiter( m_aBufferString, aRes.EndPos ), "starmath", "identifier really finished? (compatibility!)" );
478 else if (aRes.TokenType & KParseType::DOUBLE_QUOTE_STRING)
480 m_aCurToken.eType = TTEXT;
481 m_aCurToken.cMathChar = '\0';
482 m_aCurToken.nGroup = TG::NONE;
483 m_aCurToken.nLevel = 5;
484 m_aCurToken.aText = aRes.DequotedNameOrString;
485 m_aCurToken.nRow = m_nRow;
486 m_aCurToken.nCol = nRealStart - m_nColOff + 2;
488 else if (aRes.TokenType & KParseType::IDENTNAME)
490 sal_Int32 n = aRes.EndPos - nRealStart;
491 assert(n >= 0);
492 OUString aName( m_aBufferString.copy( nRealStart, n ) );
493 const SmTokenTableEntry *pEntry = GetTokenTableEntry( aName );
495 if (pEntry)
497 m_aCurToken.eType = pEntry->eType;
498 m_aCurToken.cMathChar = pEntry->cMathChar;
499 m_aCurToken.nGroup = pEntry->nGroup;
500 m_aCurToken.nLevel = pEntry->nLevel;
501 m_aCurToken.aText = OUString::createFromAscii( pEntry->pIdent );
503 else
505 m_aCurToken.eType = TIDENT;
506 m_aCurToken.cMathChar = '\0';
507 m_aCurToken.nGroup = TG::NONE;
508 m_aCurToken.nLevel = 5;
509 m_aCurToken.aText = aName;
511 SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos),"starmath", "identifier really finished? (compatibility!)");
514 else if (aRes.TokenType == 0 && '_' == m_aBufferString[ nRealStart ])
516 m_aCurToken.eType = TRSUB;
517 m_aCurToken.cMathChar = '\0';
518 m_aCurToken.nGroup = TG::Power;
519 m_aCurToken.nLevel = 0;
520 m_aCurToken.aText = "_";
522 aRes.EndPos = nRealStart + 1;
524 else if (aRes.TokenType & KParseType::BOOLEAN)
526 sal_Int32 &rnEndPos = aRes.EndPos;
527 if (rnEndPos - nRealStart <= 2)
529 sal_Unicode ch = m_aBufferString[ nRealStart ];
530 switch (ch)
532 case '<':
534 if (m_aBufferString.match("<<", nRealStart))
536 m_aCurToken.eType = TLL;
537 m_aCurToken.cMathChar = MS_LL;
538 m_aCurToken.nGroup = TG::Relation;
539 m_aCurToken.nLevel = 0;
540 m_aCurToken.aText = "<<";
542 rnEndPos = nRealStart + 2;
544 else if (m_aBufferString.match("<=", nRealStart))
546 m_aCurToken.eType = TLE;
547 m_aCurToken.cMathChar = MS_LE;
548 m_aCurToken.nGroup = TG::Relation;
549 m_aCurToken.nLevel = 0;
550 m_aCurToken.aText = "<=";
552 rnEndPos = nRealStart + 2;
554 else if (m_aBufferString.match("<-", nRealStart))
556 m_aCurToken.eType = TLEFTARROW;
557 m_aCurToken.cMathChar = MS_LEFTARROW;
558 m_aCurToken.nGroup = TG::Standalone;
559 m_aCurToken.nLevel = 5;
560 m_aCurToken.aText = "<-";
562 rnEndPos = nRealStart + 2;
564 else if (m_aBufferString.match("<>", nRealStart))
566 m_aCurToken.eType = TNEQ;
567 m_aCurToken.cMathChar = MS_NEQ;
568 m_aCurToken.nGroup = TG::Relation;
569 m_aCurToken.nLevel = 0;
570 m_aCurToken.aText = "<>";
572 rnEndPos = nRealStart + 2;
574 else if (m_aBufferString.match("<?>", nRealStart))
576 m_aCurToken.eType = TPLACE;
577 m_aCurToken.cMathChar = MS_PLACE;
578 m_aCurToken.nGroup = TG::NONE;
579 m_aCurToken.nLevel = 5;
580 m_aCurToken.aText = "<?>";
582 rnEndPos = nRealStart + 3;
584 else
586 m_aCurToken.eType = TLT;
587 m_aCurToken.cMathChar = MS_LT;
588 m_aCurToken.nGroup = TG::Relation;
589 m_aCurToken.nLevel = 0;
590 m_aCurToken.aText = "<";
593 break;
594 case '>':
596 if (m_aBufferString.match(">=", nRealStart))
598 m_aCurToken.eType = TGE;
599 m_aCurToken.cMathChar = MS_GE;
600 m_aCurToken.nGroup = TG::Relation;
601 m_aCurToken.nLevel = 0;
602 m_aCurToken.aText = ">=";
604 rnEndPos = nRealStart + 2;
606 else if (m_aBufferString.match(">>", nRealStart))
608 m_aCurToken.eType = TGG;
609 m_aCurToken.cMathChar = MS_GG;
610 m_aCurToken.nGroup = TG::Relation;
611 m_aCurToken.nLevel = 0;
612 m_aCurToken.aText = ">>";
614 rnEndPos = nRealStart + 2;
616 else
618 m_aCurToken.eType = TGT;
619 m_aCurToken.cMathChar = MS_GT;
620 m_aCurToken.nGroup = TG::Relation;
621 m_aCurToken.nLevel = 0;
622 m_aCurToken.aText = ">";
625 break;
626 default:
627 bHandled = false;
631 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
633 sal_Int32 &rnEndPos = aRes.EndPos;
634 if (rnEndPos - nRealStart == 1)
636 sal_Unicode ch = m_aBufferString[ nRealStart ];
637 switch (ch)
639 case '%':
641 //! modifies aRes.EndPos
643 OSL_ENSURE( rnEndPos >= nBufLen ||
644 '%' != m_aBufferString[ rnEndPos ],
645 "unexpected comment start" );
647 // get identifier of user-defined character
648 ParseResult aTmpRes = m_pSysCC->parseAnyToken(
649 m_aBufferString, rnEndPos,
650 KParseTokens::ANY_LETTER,
652 coUserDefinedCharContFlags,
653 "" );
655 sal_Int32 nTmpStart = rnEndPos + aTmpRes.LeadingWhiteSpace;
657 // default setting for the case that no identifier
658 // i.e. a valid symbol-name is following the '%'
659 // character
660 m_aCurToken.eType = TTEXT;
661 m_aCurToken.cMathChar = '\0';
662 m_aCurToken.nGroup = TG::NONE;
663 m_aCurToken.nLevel = 5;
664 m_aCurToken.aText ="%";
665 m_aCurToken.nRow = m_nRow;
666 m_aCurToken.nCol = nTmpStart - m_nColOff;
668 if (aTmpRes.TokenType & KParseType::IDENTNAME)
671 sal_Int32 n = aTmpRes.EndPos - nTmpStart;
672 m_aCurToken.eType = TSPECIAL;
673 m_aCurToken.aText = m_aBufferString.copy( nTmpStart-1, n+1 );
675 OSL_ENSURE( aTmpRes.EndPos > rnEndPos,
676 "empty identifier" );
677 if (aTmpRes.EndPos > rnEndPos)
678 rnEndPos = aTmpRes.EndPos;
679 else
680 ++rnEndPos;
683 // if no symbol-name was found we start-over with
684 // finding the next token right after the '%' sign.
685 // I.e. we leave rnEndPos unmodified.
687 break;
688 case '[':
690 m_aCurToken.eType = TLBRACKET;
691 m_aCurToken.cMathChar = MS_LBRACKET;
692 m_aCurToken.nGroup = TG::LBrace;
693 m_aCurToken.nLevel = 5;
694 m_aCurToken.aText = "[";
696 break;
697 case '\\':
699 m_aCurToken.eType = TESCAPE;
700 m_aCurToken.cMathChar = '\0';
701 m_aCurToken.nGroup = TG::NONE;
702 m_aCurToken.nLevel = 5;
703 m_aCurToken.aText = "\\";
705 break;
706 case ']':
708 m_aCurToken.eType = TRBRACKET;
709 m_aCurToken.cMathChar = MS_RBRACKET;
710 m_aCurToken.nGroup = TG::RBrace;
711 m_aCurToken.nLevel = 0;
712 m_aCurToken.aText = "]";
714 break;
715 case '^':
717 m_aCurToken.eType = TRSUP;
718 m_aCurToken.cMathChar = '\0';
719 m_aCurToken.nGroup = TG::Power;
720 m_aCurToken.nLevel = 0;
721 m_aCurToken.aText = "^";
723 break;
724 case '`':
726 m_aCurToken.eType = TSBLANK;
727 m_aCurToken.cMathChar = '\0';
728 m_aCurToken.nGroup = TG::Blank;
729 m_aCurToken.nLevel = 5;
730 m_aCurToken.aText = "`";
732 break;
733 case '{':
735 m_aCurToken.eType = TLGROUP;
736 m_aCurToken.cMathChar = MS_LBRACE;
737 m_aCurToken.nGroup = TG::NONE;
738 m_aCurToken.nLevel = 5;
739 m_aCurToken.aText = "{";
741 break;
742 case '|':
744 m_aCurToken.eType = TOR;
745 m_aCurToken.cMathChar = MS_OR;
746 m_aCurToken.nGroup = TG::Sum;
747 m_aCurToken.nLevel = 0;
748 m_aCurToken.aText = "|";
750 break;
751 case '}':
753 m_aCurToken.eType = TRGROUP;
754 m_aCurToken.cMathChar = MS_RBRACE;
755 m_aCurToken.nGroup = TG::NONE;
756 m_aCurToken.nLevel = 0;
757 m_aCurToken.aText = "}";
759 break;
760 case '~':
762 m_aCurToken.eType = TBLANK;
763 m_aCurToken.cMathChar = '\0';
764 m_aCurToken.nGroup = TG::Blank;
765 m_aCurToken.nLevel = 5;
766 m_aCurToken.aText = "~";
768 break;
769 case '#':
771 if (m_aBufferString.match("##", nRealStart))
773 m_aCurToken.eType = TDPOUND;
774 m_aCurToken.cMathChar = '\0';
775 m_aCurToken.nGroup = TG::NONE;
776 m_aCurToken.nLevel = 0;
777 m_aCurToken.aText = "##";
779 rnEndPos = nRealStart + 2;
781 else
783 m_aCurToken.eType = TPOUND;
784 m_aCurToken.cMathChar = '\0';
785 m_aCurToken.nGroup = TG::NONE;
786 m_aCurToken.nLevel = 0;
787 m_aCurToken.aText = "#";
790 break;
791 case '&':
793 m_aCurToken.eType = TAND;
794 m_aCurToken.cMathChar = MS_AND;
795 m_aCurToken.nGroup = TG::Product;
796 m_aCurToken.nLevel = 0;
797 m_aCurToken.aText = "&";
799 break;
800 case '(':
802 m_aCurToken.eType = TLPARENT;
803 m_aCurToken.cMathChar = MS_LPARENT;
804 m_aCurToken.nGroup = TG::LBrace;
805 m_aCurToken.nLevel = 5; //! 0 to continue expression
806 m_aCurToken.aText = "(";
808 break;
809 case ')':
811 m_aCurToken.eType = TRPARENT;
812 m_aCurToken.cMathChar = MS_RPARENT;
813 m_aCurToken.nGroup = TG::RBrace;
814 m_aCurToken.nLevel = 0; //! 0 to terminate expression
815 m_aCurToken.aText = ")";
817 break;
818 case '*':
820 m_aCurToken.eType = TMULTIPLY;
821 m_aCurToken.cMathChar = MS_MULTIPLY;
822 m_aCurToken.nGroup = TG::Product;
823 m_aCurToken.nLevel = 0;
824 m_aCurToken.aText = "*";
826 break;
827 case '+':
829 if (m_aBufferString.match("+-", nRealStart))
831 m_aCurToken.eType = TPLUSMINUS;
832 m_aCurToken.cMathChar = MS_PLUSMINUS;
833 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
834 m_aCurToken.nLevel = 5;
835 m_aCurToken.aText = "+-";
837 rnEndPos = nRealStart + 2;
839 else
841 m_aCurToken.eType = TPLUS;
842 m_aCurToken.cMathChar = MS_PLUS;
843 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
844 m_aCurToken.nLevel = 5;
845 m_aCurToken.aText = "+";
848 break;
849 case '-':
851 if (m_aBufferString.match("-+", nRealStart))
853 m_aCurToken.eType = TMINUSPLUS;
854 m_aCurToken.cMathChar = MS_MINUSPLUS;
855 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
856 m_aCurToken.nLevel = 5;
857 m_aCurToken.aText = "-+";
859 rnEndPos = nRealStart + 2;
861 else if (m_aBufferString.match("->", nRealStart))
863 m_aCurToken.eType = TRIGHTARROW;
864 m_aCurToken.cMathChar = MS_RIGHTARROW;
865 m_aCurToken.nGroup = TG::Standalone;
866 m_aCurToken.nLevel = 5;
867 m_aCurToken.aText = "->";
869 rnEndPos = nRealStart + 2;
871 else
873 m_aCurToken.eType = TMINUS;
874 m_aCurToken.cMathChar = MS_MINUS;
875 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
876 m_aCurToken.nLevel = 5;
877 m_aCurToken.aText = "-";
880 break;
881 case '.':
883 // Only one character? Then it can't be a number.
884 if (m_nBufferIndex < m_aBufferString.getLength() - 1)
886 // for compatibility with SO5.2
887 // texts like .34 ...56 ... h ...78..90
888 // will be treated as numbers
889 m_aCurToken.eType = TNUMBER;
890 m_aCurToken.cMathChar = '\0';
891 m_aCurToken.nGroup = TG::NONE;
892 m_aCurToken.nLevel = 5;
894 sal_Int32 nTxtStart = m_nBufferIndex;
895 sal_Unicode cChar;
896 // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only
899 cChar = m_aBufferString[ ++m_nBufferIndex ];
901 while ( (cChar == '.' || rtl::isAsciiDigit( cChar )) &&
902 ( m_nBufferIndex < m_aBufferString.getLength() - 1 ) );
904 m_aCurToken.aText = m_aBufferString.copy( nTxtStart, m_nBufferIndex - nTxtStart );
905 aRes.EndPos = m_nBufferIndex;
907 else
908 bHandled = false;
910 break;
911 case '/':
913 m_aCurToken.eType = TDIVIDEBY;
914 m_aCurToken.cMathChar = MS_SLASH;
915 m_aCurToken.nGroup = TG::Product;
916 m_aCurToken.nLevel = 0;
917 m_aCurToken.aText = "/";
919 break;
920 case '=':
922 m_aCurToken.eType = TASSIGN;
923 m_aCurToken.cMathChar = MS_ASSIGN;
924 m_aCurToken.nGroup = TG::Relation;
925 m_aCurToken.nLevel = 0;
926 m_aCurToken.aText = "=";
928 break;
929 default:
930 bHandled = false;
934 else
935 bHandled = false;
937 if (!bHandled)
939 m_aCurToken.eType = TCHARACTER;
940 m_aCurToken.cMathChar = '\0';
941 m_aCurToken.nGroup = TG::NONE;
942 m_aCurToken.nLevel = 5;
944 // tdf#129372: we may have to deal with surrogate pairs
945 // (see https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates)
946 // in this case, we must read 2 sal_Unicode instead of 1
947 int nOffset(rtl::isSurrogate(m_aBufferString[nRealStart])? 2 : 1);
948 m_aCurToken.aText = m_aBufferString.copy( nRealStart, nOffset );
950 aRes.EndPos = nRealStart + nOffset;
953 if (TEND != m_aCurToken.eType)
954 m_nBufferIndex = aRes.EndPos;
957 void SmParser::NextTokenColor(bool dvipload)
960 sal_Int32 nBufLen = m_aBufferString.getLength();
961 ParseResult aRes;
962 sal_Int32 nRealStart;
963 bool bCont;
967 // skip white spaces
968 while (UnicodeType::SPACE_SEPARATOR ==
969 m_pSysCC->getType( m_aBufferString, m_nBufferIndex ))
970 ++m_nBufferIndex;
971 //parse, there are few options, so less strict.
972 aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex,
973 coStartFlags, "", coContFlags, "");
974 nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
975 m_nBufferIndex = nRealStart;
976 bCont = false;
977 if ( aRes.TokenType == 0 &&
978 nRealStart < nBufLen &&
979 '\n' == m_aBufferString[ nRealStart ] )
981 // keep data needed for tokens row and col entry up to date
982 ++m_nRow;
983 m_nBufferIndex = m_nColOff = nRealStart + 1;
984 bCont = true;
986 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
988 if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
990 //SkipComment
991 m_nBufferIndex = nRealStart + 2;
992 while (m_nBufferIndex < nBufLen &&
993 '\n' != m_aBufferString[ m_nBufferIndex ])
994 ++m_nBufferIndex;
995 bCont = true;
998 } while (bCont);
1000 // set index of current token
1001 m_nTokenIndex = m_nBufferIndex;
1002 m_aCurToken.nRow = m_nRow;
1003 m_aCurToken.nCol = nRealStart - m_nColOff + 1;
1004 if (nRealStart >= nBufLen) m_aCurToken.eType = TEND;
1005 else if (aRes.TokenType & KParseType::IDENTNAME)
1007 sal_Int32 n = aRes.EndPos - nRealStart;
1008 assert(n >= 0);
1009 OUString aName( m_aBufferString.copy( nRealStart, n ) );
1010 std::unique_ptr<SmColorTokenTableEntry> aSmColorTokenTableEntry;
1011 if(dvipload) aSmColorTokenTableEntry = starmathdatabase::Identify_ColorName_DVIPSNAMES( aName );
1012 else aSmColorTokenTableEntry = starmathdatabase::Identify_ColorName_Parser( aName );
1013 m_aCurToken = aSmColorTokenTableEntry;
1015 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
1017 if( m_aBufferString[ nRealStart ] == '#' && !m_aBufferString.match("##", nRealStart) )
1019 m_aCurToken.eType = THEX;
1020 m_aCurToken.cMathChar = '\0';
1021 m_aCurToken.nGroup = TG::Color;
1022 m_aCurToken.nLevel = 0;
1023 m_aCurToken.aText = "hex";
1026 else m_aCurToken.eType = TNONE;
1027 if (TEND != m_aCurToken.eType) m_nBufferIndex = aRes.EndPos;
1030 void SmParser::NextTokenFontSize()
1033 sal_Int32 nBufLen = m_aBufferString.getLength();
1034 ParseResult aRes;
1035 sal_Int32 nRealStart;
1036 bool bCont;
1037 bool hex = false;
1041 // skip white spaces
1042 while (UnicodeType::SPACE_SEPARATOR ==
1043 m_pSysCC->getType( m_aBufferString, m_nBufferIndex ))
1044 ++m_nBufferIndex;
1045 //hexadecimal parser
1046 aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex,
1047 coNum16StartFlags, ".", coNum16ContFlags, ".,");
1048 if (aRes.TokenType == 0)
1050 // Try again with the default token parsing.
1051 aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex,
1052 coStartFlags, "", coContFlags, "");
1054 else hex = true;
1055 nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
1056 m_nBufferIndex = nRealStart;
1057 bCont = false;
1058 if ( aRes.TokenType == 0 &&
1059 nRealStart < nBufLen &&
1060 '\n' == m_aBufferString[ nRealStart ] )
1062 // keep data needed for tokens row and col entry up to date
1063 ++m_nRow;
1064 m_nBufferIndex = m_nColOff = nRealStart + 1;
1065 bCont = true;
1067 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
1069 if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
1071 //SkipComment
1072 m_nBufferIndex = nRealStart + 2;
1073 while (m_nBufferIndex < nBufLen &&
1074 '\n' != m_aBufferString[ m_nBufferIndex ])
1075 ++m_nBufferIndex;
1076 bCont = true;
1079 } while (bCont);
1081 // set index of current token
1082 m_nTokenIndex = m_nBufferIndex;
1083 m_aCurToken.nRow = m_nRow;
1084 m_aCurToken.nCol = nRealStart - m_nColOff + 1;
1085 if (nRealStart >= nBufLen) m_aCurToken.eType = TEND;
1086 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
1088 if ( aRes.EndPos - nRealStart == 1 )
1090 switch ( m_aBufferString[ nRealStart ] )
1092 case '*':
1093 m_aCurToken.eType = TMULTIPLY;
1094 m_aCurToken.cMathChar = MS_MULTIPLY;
1095 m_aCurToken.nGroup = TG::Product;
1096 m_aCurToken.nLevel = 0;
1097 m_aCurToken.aText = "*";
1098 break;
1099 case '+':
1100 m_aCurToken.eType = TPLUS;
1101 m_aCurToken.cMathChar = MS_PLUS;
1102 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
1103 m_aCurToken.nLevel = 5;
1104 m_aCurToken.aText = "+";
1105 break;
1106 case '-':
1107 m_aCurToken.eType = TMINUS;
1108 m_aCurToken.cMathChar = MS_MINUS;
1109 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
1110 m_aCurToken.nLevel = 5;
1111 m_aCurToken.aText = "-";
1112 break;
1113 case '/':
1114 m_aCurToken.eType = TDIVIDEBY;
1115 m_aCurToken.cMathChar = MS_SLASH;
1116 m_aCurToken.nGroup = TG::Product;
1117 m_aCurToken.nLevel = 0;
1118 m_aCurToken.aText = "/";
1119 break;
1120 default:
1121 m_aCurToken.eType = TNONE;
1122 break;
1125 else m_aCurToken.eType = TNONE;
1127 else if(hex)
1129 assert(aRes.EndPos > 0);
1130 sal_Int32 n = aRes.EndPos - nRealStart;
1131 assert(n >= 0);
1132 m_aCurToken.eType = THEX;
1133 m_aCurToken.cMathChar = '\0';
1134 m_aCurToken.nGroup = TG::NONE;
1135 m_aCurToken.nLevel = 5;
1136 m_aCurToken.aText = m_aBufferString.copy( nRealStart, n );
1138 else m_aCurToken.eType = TNONE;
1139 if (TEND != m_aCurToken.eType) m_nBufferIndex = aRes.EndPos;
1142 namespace
1144 SmNodeArray buildNodeArray(std::vector<std::unique_ptr<SmNode>>& rSubNodes)
1146 SmNodeArray aSubArray(rSubNodes.size());
1147 for (size_t i = 0; i < rSubNodes.size(); ++i)
1148 aSubArray[i] = rSubNodes[i].release();
1149 return aSubArray;
1151 } //end namespace
1153 // grammar
1154 /*************************************************************************************************/
1156 std::unique_ptr<SmTableNode> SmParser::DoTable()
1158 DepthProtect aDepthGuard(m_nParseDepth);
1159 if (aDepthGuard.TooDeep())
1160 throw std::range_error("parser depth limit");
1162 std::vector<std::unique_ptr<SmNode>> aLineArray;
1163 aLineArray.push_back(DoLine());
1164 while (m_aCurToken.eType == TNEWLINE)
1166 NextToken();
1167 aLineArray.push_back(DoLine());
1169 assert(m_aCurToken.eType == TEND);
1170 std::unique_ptr<SmTableNode> xSNode(new SmTableNode(m_aCurToken));
1171 xSNode->SetSubNodes(buildNodeArray(aLineArray));
1172 return xSNode;
1175 std::unique_ptr<SmNode> SmParser::DoAlign(bool bUseExtraSpaces)
1176 // parse alignment info (if any), then go on with rest of expression
1178 DepthProtect aDepthGuard(m_nParseDepth);
1179 if (aDepthGuard.TooDeep())
1180 throw std::range_error("parser depth limit");
1182 std::unique_ptr<SmStructureNode> xSNode;
1184 if (TokenInGroup(TG::Align))
1186 xSNode.reset(new SmAlignNode(m_aCurToken));
1188 NextToken();
1190 // allow for just one align statement in 5.0
1191 if (TokenInGroup(TG::Align))
1192 return DoError(SmParseError::DoubleAlign);
1195 auto pNode = DoExpression(bUseExtraSpaces);
1197 if (xSNode)
1199 xSNode->SetSubNode(0, pNode.release());
1200 return xSNode;
1202 return pNode;
1205 // Postcondition: m_aCurToken.eType == TEND || m_aCurToken.eType == TNEWLINE
1206 std::unique_ptr<SmNode> SmParser::DoLine()
1208 DepthProtect aDepthGuard(m_nParseDepth);
1209 if (aDepthGuard.TooDeep())
1210 throw std::range_error("parser depth limit");
1212 std::vector<std::unique_ptr<SmNode>> ExpressionArray;
1214 // start with single expression that may have an alignment statement
1215 // (and go on with expressions that must not have alignment
1216 // statements in 'while' loop below. See also 'Expression()'.)
1217 if (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
1218 ExpressionArray.push_back(DoAlign());
1220 while (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
1221 ExpressionArray.push_back(DoExpression());
1223 //If there's no expression, add an empty one.
1224 //this is to avoid a formula tree without any caret
1225 //positions, in visual formula editor.
1226 if(ExpressionArray.empty())
1228 SmToken aTok;
1229 aTok.eType = TNEWLINE;
1230 ExpressionArray.emplace_back(std::unique_ptr<SmNode>(new SmExpressionNode(aTok)));
1233 auto xSNode = std::make_unique<SmLineNode>(m_aCurToken);
1234 xSNode->SetSubNodes(buildNodeArray(ExpressionArray));
1235 return xSNode;
1238 std::unique_ptr<SmNode> SmParser::DoExpression(bool bUseExtraSpaces)
1240 DepthProtect aDepthGuard(m_nParseDepth);
1241 if (aDepthGuard.TooDeep())
1242 throw std::range_error("parser depth limit");
1244 std::vector<std::unique_ptr<SmNode>> RelationArray;
1245 RelationArray.push_back(DoRelation());
1246 while (m_aCurToken.nLevel >= 4)
1247 RelationArray.push_back(DoRelation());
1249 if (RelationArray.size() > 1)
1251 std::unique_ptr<SmExpressionNode> xSNode(new SmExpressionNode(m_aCurToken));
1252 xSNode->SetSubNodes(buildNodeArray(RelationArray));
1253 xSNode->SetUseExtraSpaces(bUseExtraSpaces);
1254 return xSNode;
1256 else
1258 // This expression has only one node so just push this node.
1259 return std::move(RelationArray[0]);
1263 std::unique_ptr<SmNode> SmParser::DoRelation()
1265 DepthProtect aDepthGuard(m_nParseDepth);
1266 if (aDepthGuard.TooDeep())
1267 throw std::range_error("parser depth limit");
1269 int nDepthLimit = m_nParseDepth;
1271 auto xFirst = DoSum();
1272 while (TokenInGroup(TG::Relation))
1274 std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1275 auto xSecond = DoOpSubSup();
1276 auto xThird = DoSum();
1277 xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
1278 xFirst = std::move(xSNode);
1280 ++m_nParseDepth;
1281 if (aDepthGuard.TooDeep())
1282 throw std::range_error("parser depth limit");
1285 m_nParseDepth = nDepthLimit;
1287 return xFirst;
1290 std::unique_ptr<SmNode> SmParser::DoSum()
1292 DepthProtect aDepthGuard(m_nParseDepth);
1293 if (aDepthGuard.TooDeep())
1294 throw std::range_error("parser depth limit");
1296 auto xFirst = DoProduct();
1297 while (TokenInGroup(TG::Sum))
1299 std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1300 auto xSecond = DoOpSubSup();
1301 auto xThird = DoProduct();
1302 xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
1303 xFirst = std::move(xSNode);
1305 return xFirst;
1308 std::unique_ptr<SmNode> SmParser::DoProduct()
1310 DepthProtect aDepthGuard(m_nParseDepth);
1311 if (aDepthGuard.TooDeep())
1312 throw std::range_error("parser depth limit");
1314 auto xFirst = DoPower();
1316 int nDepthLimit = 0;
1318 while (TokenInGroup(TG::Product))
1320 //this linear loop builds a recursive structure, if it gets
1321 //too deep then later processing, e.g. releasing the tree,
1322 //can exhaust stack
1323 if (nDepthLimit > DEPTH_LIMIT)
1324 throw std::range_error("parser depth limit");
1326 std::unique_ptr<SmStructureNode> xSNode;
1327 std::unique_ptr<SmNode> xOper;
1328 bool bSwitchArgs = false;
1330 SmTokenType eType = m_aCurToken.eType;
1331 switch (eType)
1333 case TOVER:
1334 xSNode.reset(new SmBinVerNode(m_aCurToken));
1335 xOper.reset(new SmRectangleNode(m_aCurToken));
1336 NextToken();
1337 break;
1339 case TBOPER:
1340 xSNode.reset(new SmBinHorNode(m_aCurToken));
1342 NextToken();
1344 //Let the glyph node know it's a binary operation
1345 m_aCurToken.eType = TBOPER;
1346 m_aCurToken.nGroup = TG::Product;
1347 xOper = DoGlyphSpecial();
1348 break;
1350 case TOVERBRACE :
1351 case TUNDERBRACE :
1352 xSNode.reset(new SmVerticalBraceNode(m_aCurToken));
1353 xOper.reset(new SmMathSymbolNode(m_aCurToken));
1355 NextToken();
1356 break;
1358 case TWIDEBACKSLASH:
1359 case TWIDESLASH:
1361 SmBinDiagonalNode *pSTmp = new SmBinDiagonalNode(m_aCurToken);
1362 pSTmp->SetAscending(eType == TWIDESLASH);
1363 xSNode.reset(pSTmp);
1365 xOper.reset(new SmPolyLineNode(m_aCurToken));
1366 NextToken();
1368 bSwitchArgs = true;
1369 break;
1372 default:
1373 xSNode.reset(new SmBinHorNode(m_aCurToken));
1375 xOper = DoOpSubSup();
1378 auto xArg = DoPower();
1380 if (bSwitchArgs)
1382 //! vgl siehe SmBinDiagonalNode::Arrange
1383 xSNode->SetSubNodes(std::move(xFirst), std::move(xArg), std::move(xOper));
1385 else
1387 xSNode->SetSubNodes(std::move(xFirst), std::move(xOper), std::move(xArg));
1389 xFirst = std::move(xSNode);
1390 ++nDepthLimit;
1392 return xFirst;
1395 std::unique_ptr<SmNode> SmParser::DoSubSup(TG nActiveGroup, SmNode *pGivenNode)
1397 std::unique_ptr<SmNode> xGivenNode(pGivenNode);
1398 DepthProtect aDepthGuard(m_nParseDepth);
1399 if (aDepthGuard.TooDeep())
1400 throw std::range_error("parser depth limit");
1402 assert(nActiveGroup == TG::Power || nActiveGroup == TG::Limit);
1403 assert(m_aCurToken.nGroup == nActiveGroup);
1405 std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
1406 //! Of course 'm_aCurToken' is just the first sub-/supscript token.
1407 //! It should be of no further interest. The positions of the
1408 //! sub-/supscripts will be identified by the corresponding subnodes
1409 //! index in the 'aSubNodes' array (enum value from 'SmSubSup').
1411 pNode->SetUseLimits(nActiveGroup == TG::Limit);
1413 // initialize subnodes array
1414 std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
1415 aSubNodes[0] = std::move(xGivenNode);
1417 // process all sub-/supscripts
1418 int nIndex = 0;
1419 while (TokenInGroup(nActiveGroup))
1421 SmTokenType eType (m_aCurToken.eType);
1423 switch (eType)
1425 case TRSUB : nIndex = static_cast<int>(RSUB); break;
1426 case TRSUP : nIndex = static_cast<int>(RSUP); break;
1427 case TFROM :
1428 case TCSUB : nIndex = static_cast<int>(CSUB); break;
1429 case TTO :
1430 case TCSUP : nIndex = static_cast<int>(CSUP); break;
1431 case TLSUB : nIndex = static_cast<int>(LSUB); break;
1432 case TLSUP : nIndex = static_cast<int>(LSUP); break;
1433 default :
1434 SAL_WARN( "starmath", "unknown case");
1436 nIndex++;
1437 assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES);
1439 std::unique_ptr<SmNode> xENode;
1440 if (aSubNodes[nIndex]) // if already occupied at earlier iteration
1442 // forget the earlier one, remember an error instead
1443 aSubNodes[nIndex].reset();
1444 xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
1446 else
1448 // skip sub-/supscript token
1449 NextToken();
1452 // get sub-/supscript node
1453 // (even when we saw a double-sub/supscript error in the above
1454 // in order to minimize mess and continue parsing.)
1455 std::unique_ptr<SmNode> xSNode;
1456 if (eType == TFROM || eType == TTO)
1458 // parse limits in old 4.0 and 5.0 style
1459 xSNode = DoRelation();
1461 else
1462 xSNode = DoTerm(true);
1464 aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
1467 pNode->SetSubNodes(buildNodeArray(aSubNodes));
1468 return pNode;
1471 std::unique_ptr<SmNode> SmParser::DoSubSupEvaluate(SmNode *pGivenNode)
1473 std::unique_ptr<SmNode> xGivenNode(pGivenNode);
1474 DepthProtect aDepthGuard(m_nParseDepth);
1475 if (aDepthGuard.TooDeep()) throw std::range_error("parser depth limit");
1477 std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
1478 pNode->SetUseLimits(true);
1480 // initialize subnodes array
1481 std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
1482 aSubNodes[0] = std::move(xGivenNode);
1484 // process all sub-/supscripts
1485 int nIndex = 0;
1486 while (TokenInGroup(TG::Limit))
1488 SmTokenType eType (m_aCurToken.eType);
1490 switch (eType)
1492 case TFROM : nIndex = static_cast<int>(RSUB); break;
1493 case TTO : nIndex = static_cast<int>(RSUP); break;
1494 default :
1495 SAL_WARN( "starmath", "unknown case");
1497 nIndex++;
1498 assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES);
1500 std::unique_ptr<SmNode> xENode;
1501 if (aSubNodes[nIndex]) // if already occupied at earlier iteration
1503 // forget the earlier one, remember an error instead
1504 aSubNodes[nIndex].reset();
1505 xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
1507 else NextToken(); // skip sub-/supscript token
1509 // get sub-/supscript node
1510 std::unique_ptr<SmNode> xSNode;
1511 xSNode = DoTerm(true);
1513 aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
1516 pNode->SetSubNodes(buildNodeArray(aSubNodes));
1517 return pNode;
1520 std::unique_ptr<SmNode> SmParser::DoOpSubSup()
1522 DepthProtect aDepthGuard(m_nParseDepth);
1523 if (aDepthGuard.TooDeep())
1524 throw std::range_error("parser depth limit");
1526 // get operator symbol
1527 auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1528 // skip operator token
1529 NextToken();
1530 // get sub- supscripts if any
1531 if (m_aCurToken.nGroup == TG::Power)
1532 return DoSubSup(TG::Power, pNode.release());
1533 return pNode;
1536 std::unique_ptr<SmNode> SmParser::DoPower()
1538 DepthProtect aDepthGuard(m_nParseDepth);
1539 if (aDepthGuard.TooDeep())
1540 throw std::range_error("parser depth limit");
1542 // get body for sub- supscripts on top of stack
1543 std::unique_ptr<SmNode> xNode(DoTerm(false));
1545 if (m_aCurToken.nGroup == TG::Power)
1546 return DoSubSup(TG::Power, xNode.release());
1547 return xNode;
1550 std::unique_ptr<SmBlankNode> SmParser::DoBlank()
1552 DepthProtect aDepthGuard(m_nParseDepth);
1553 if (aDepthGuard.TooDeep())
1554 throw std::range_error("parser depth limit");
1556 assert(TokenInGroup(TG::Blank));
1557 std::unique_ptr<SmBlankNode> pBlankNode(new SmBlankNode(m_aCurToken));
1561 pBlankNode->IncreaseBy(m_aCurToken);
1562 NextToken();
1564 while (TokenInGroup(TG::Blank));
1566 // Ignore trailing spaces, if corresponding option is set
1567 if ( m_aCurToken.eType == TNEWLINE ||
1568 (m_aCurToken.eType == TEND && !utl::ConfigManager::IsFuzzing() && SM_MOD()->GetConfig()->IsIgnoreSpacesRight()) )
1570 pBlankNode->Clear();
1572 return pBlankNode;
1575 std::unique_ptr<SmNode> SmParser::DoTerm(bool bGroupNumberIdent)
1577 DepthProtect aDepthGuard(m_nParseDepth);
1578 if (aDepthGuard.TooDeep())
1579 throw std::range_error("parser depth limit");
1581 switch (m_aCurToken.eType)
1583 case TESCAPE :
1584 return DoEscape();
1586 case TNOSPACE :
1587 case TLGROUP :
1589 bool bNoSpace = m_aCurToken.eType == TNOSPACE;
1590 if (bNoSpace)
1591 NextToken();
1592 if (m_aCurToken.eType != TLGROUP)
1593 return DoTerm(false); // nospace is no longer concerned
1595 NextToken();
1597 // allow for empty group
1598 if (m_aCurToken.eType == TRGROUP)
1600 std::unique_ptr<SmStructureNode> xSNode(new SmExpressionNode(m_aCurToken));
1601 xSNode->SetSubNodes(nullptr, nullptr);
1603 NextToken();
1604 return std::unique_ptr<SmNode>(xSNode.release());
1607 auto pNode = DoAlign(!bNoSpace);
1608 if (m_aCurToken.eType == TRGROUP) {
1609 NextToken();
1610 return pNode;
1612 auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
1613 std::unique_ptr<SmNode> xError(DoError(SmParseError::RgroupExpected));
1614 xSNode->SetSubNodes(std::move(pNode), std::move(xError));
1615 return std::unique_ptr<SmNode>(xSNode.release());
1618 case TLEFT :
1619 return DoBrace();
1620 case TEVALUATE:
1621 return DoEvaluate();
1623 case TBLANK :
1624 case TSBLANK :
1625 return DoBlank();
1627 case TTEXT :
1629 auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_TEXT);
1630 NextToken();
1631 return std::unique_ptr<SmNode>(pNode.release());
1633 case TCHARACTER :
1635 auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_VARIABLE);
1636 NextToken();
1637 return std::unique_ptr<SmNode>(pNode.release());
1639 case TIDENT :
1640 case TNUMBER :
1642 auto pTextNode = std::make_unique<SmTextNode>(m_aCurToken,
1643 m_aCurToken.eType == TNUMBER ?
1644 FNT_NUMBER :
1645 FNT_VARIABLE);
1646 if (!bGroupNumberIdent)
1648 NextToken();
1649 return std::unique_ptr<SmNode>(pTextNode.release());
1651 std::vector<std::unique_ptr<SmNode>> aNodes;
1652 // Some people want to be able to write "x_2n" for "x_{2n}"
1653 // although e.g. LaTeX or AsciiMath interpret that as "x_2 n".
1654 // The tokenizer skips whitespaces so we need some additional
1655 // work to distinguish from "x_2 n".
1656 // See https://bz.apache.org/ooo/show_bug.cgi?id=11752 and
1657 // https://bugs.libreoffice.org/show_bug.cgi?id=55853
1658 sal_Int32 nBufLen = m_aBufferString.getLength();
1660 // We need to be careful to call NextToken() only after having
1661 // tested for a whitespace separator (otherwise it will be
1662 // skipped!)
1663 bool moveToNextToken = true;
1664 while (m_nBufferIndex < nBufLen &&
1665 m_pSysCC->getType(m_aBufferString, m_nBufferIndex) !=
1666 UnicodeType::SPACE_SEPARATOR)
1668 NextToken();
1669 if (m_aCurToken.eType != TNUMBER &&
1670 m_aCurToken.eType != TIDENT)
1672 // Neither a number nor an identifier. We just moved to
1673 // the next token, so no need to do that again.
1674 moveToNextToken = false;
1675 break;
1677 aNodes.emplace_back(std::unique_ptr<SmNode>(new SmTextNode(m_aCurToken,
1678 m_aCurToken.eType ==
1679 TNUMBER ?
1680 FNT_NUMBER :
1681 FNT_VARIABLE)));
1683 if (moveToNextToken)
1684 NextToken();
1685 if (aNodes.empty())
1686 return std::unique_ptr<SmNode>(pTextNode.release());
1687 // We have several concatenated identifiers and numbers.
1688 // Let's group them into one SmExpressionNode.
1689 aNodes.insert(aNodes.begin(), std::move(pTextNode));
1690 std::unique_ptr<SmExpressionNode> xNode(new SmExpressionNode(SmToken()));
1691 xNode->SetSubNodes(buildNodeArray(aNodes));
1692 return std::unique_ptr<SmNode>(xNode.release());
1694 case TLEFTARROW :
1695 case TRIGHTARROW :
1696 case TUPARROW :
1697 case TDOWNARROW :
1698 case TCIRC :
1699 case TDRARROW :
1700 case TDLARROW :
1701 case TDLRARROW :
1702 case TEXISTS :
1703 case TNOTEXISTS :
1704 case TFORALL :
1705 case TPARTIAL :
1706 case TNABLA :
1707 case TLAPLACE :
1708 case TFOURIER :
1709 case TTOWARD :
1710 case TDOTSAXIS :
1711 case TDOTSDIAG :
1712 case TDOTSDOWN :
1713 case TDOTSLOW :
1714 case TDOTSUP :
1715 case TDOTSVERT :
1717 auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1718 NextToken();
1719 return std::unique_ptr<SmNode>(pNode.release());
1722 case TSETN :
1723 case TSETZ :
1724 case TSETQ :
1725 case TSETR :
1726 case TSETC :
1727 case THBAR :
1728 case TLAMBDABAR :
1729 case TBACKEPSILON :
1730 case TALEPH :
1731 case TIM :
1732 case TRE :
1733 case TWP :
1734 case TEMPTYSET :
1735 case TINFINITY :
1737 auto pNode = std::make_unique<SmMathIdentifierNode>(m_aCurToken);
1738 NextToken();
1739 return std::unique_ptr<SmNode>(pNode.release());
1742 case TPLACE:
1744 auto pNode = std::make_unique<SmPlaceNode>(m_aCurToken);
1745 NextToken();
1746 return std::unique_ptr<SmNode>(pNode.release());
1749 case TSPECIAL:
1750 return DoSpecial();
1752 case TBINOM:
1753 return DoBinom();
1755 case TFRAC:
1756 return DoFrac();
1758 case TSTACK:
1759 return DoStack();
1761 case TMATRIX:
1762 return DoMatrix();
1764 case THEX:
1765 NextTokenFontSize();
1766 if( m_aCurToken.eType == THEX )
1768 auto pTextNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_NUMBER );
1769 NextToken();
1770 return pTextNode;
1772 else return DoError(SmParseError::NumberExpected);
1773 default:
1774 if (TokenInGroup(TG::LBrace)) return DoBrace();
1775 if (TokenInGroup(TG::Oper)) return DoOperator();
1776 if (TokenInGroup(TG::UnOper)) return DoUnOper();
1777 if ( TokenInGroup(TG::Attribute) || TokenInGroup(TG::FontAttr) )
1779 std::stack<std::unique_ptr<SmStructureNode>> aStack;
1780 bool bIsAttr;
1781 for (;;)
1783 bIsAttr = TokenInGroup(TG::Attribute);
1784 if (!bIsAttr && !TokenInGroup(TG::FontAttr))
1785 break;
1786 aStack.push(bIsAttr ? DoAttribut() : DoFontAttribut());
1789 auto xFirstNode = DoPower();
1790 while (!aStack.empty())
1792 std::unique_ptr<SmStructureNode> xNode = std::move(aStack.top());
1793 aStack.pop();
1794 xNode->SetSubNodes(nullptr, std::move(xFirstNode));
1795 xFirstNode = std::move(xNode);
1797 return xFirstNode;
1799 if (TokenInGroup(TG::Function))
1800 return DoFunction();
1801 return DoError(SmParseError::UnexpectedChar);
1805 std::unique_ptr<SmNode> SmParser::DoEscape()
1807 DepthProtect aDepthGuard(m_nParseDepth);
1808 if (aDepthGuard.TooDeep())
1809 throw std::range_error("parser depth limit");
1811 NextToken();
1813 switch (m_aCurToken.eType)
1815 case TLPARENT :
1816 case TRPARENT :
1817 case TLBRACKET :
1818 case TRBRACKET :
1819 case TLDBRACKET :
1820 case TRDBRACKET :
1821 case TLBRACE :
1822 case TLGROUP :
1823 case TRBRACE :
1824 case TRGROUP :
1825 case TLANGLE :
1826 case TRANGLE :
1827 case TLCEIL :
1828 case TRCEIL :
1829 case TLFLOOR :
1830 case TRFLOOR :
1831 case TLLINE :
1832 case TRLINE :
1833 case TLDLINE :
1834 case TRDLINE :
1836 auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1837 NextToken();
1838 return std::unique_ptr<SmNode>(pNode.release());
1840 default:
1841 return DoError(SmParseError::UnexpectedToken);
1845 std::unique_ptr<SmOperNode> SmParser::DoOperator()
1847 DepthProtect aDepthGuard(m_nParseDepth);
1848 if (aDepthGuard.TooDeep())
1849 throw std::range_error("parser depth limit");
1851 assert(TokenInGroup(TG::Oper));
1853 auto xSNode = std::make_unique<SmOperNode>(m_aCurToken);
1855 // get operator
1856 auto xOperator = DoOper();
1858 if (m_aCurToken.nGroup == TG::Limit || m_aCurToken.nGroup == TG::Power)
1859 xOperator = DoSubSup(m_aCurToken.nGroup, xOperator.release());
1861 // get argument
1862 auto xArg = DoPower();
1864 xSNode->SetSubNodes(std::move(xOperator), std::move(xArg));
1865 return xSNode;
1868 std::unique_ptr<SmNode> SmParser::DoOper()
1870 DepthProtect aDepthGuard(m_nParseDepth);
1871 if (aDepthGuard.TooDeep())
1872 throw std::range_error("parser depth limit");
1874 SmTokenType eType (m_aCurToken.eType);
1875 std::unique_ptr<SmNode> pNode;
1877 switch (eType)
1879 case TSUM :
1880 case TPROD :
1881 case TCOPROD :
1882 case TINT :
1883 case TINTD :
1884 case TIINT :
1885 case TIIINT :
1886 case TLINT :
1887 case TLLINT :
1888 case TLLLINT :
1889 pNode.reset(new SmMathSymbolNode(m_aCurToken));
1890 break;
1892 case TLIM :
1893 case TLIMSUP :
1894 case TLIMINF :
1896 const char* pLim = nullptr;
1897 switch (eType)
1899 case TLIM : pLim = "lim"; break;
1900 case TLIMSUP : pLim = "lim sup"; break;
1901 case TLIMINF : pLim = "lim inf"; break;
1902 default:
1903 break;
1905 if( pLim )
1906 m_aCurToken.aText = OUString::createFromAscii(pLim);
1907 pNode.reset(new SmTextNode(m_aCurToken, FNT_TEXT));
1909 break;
1911 case TOPER :
1912 NextToken();
1913 OSL_ENSURE(m_aCurToken.eType == TSPECIAL, "Sm: wrong token");
1914 m_aCurToken.eType = TOPER;
1915 pNode.reset(new SmGlyphSpecialNode(m_aCurToken));
1916 break;
1918 default :
1919 assert(false && "unknown case");
1922 NextToken();
1923 return pNode;
1926 std::unique_ptr<SmStructureNode> SmParser::DoUnOper()
1928 DepthProtect aDepthGuard(m_nParseDepth);
1929 if (aDepthGuard.TooDeep())
1930 throw std::range_error("parser depth limit");
1932 assert(TokenInGroup(TG::UnOper));
1934 SmToken aNodeToken = m_aCurToken;
1935 SmTokenType eType = m_aCurToken.eType;
1936 bool bIsPostfix = eType == TFACT;
1938 std::unique_ptr<SmStructureNode> xSNode;
1939 std::unique_ptr<SmNode> xOper;
1940 std::unique_ptr<SmNode> xExtra;
1941 std::unique_ptr<SmNode> xArg;
1943 switch (eType)
1945 case TABS :
1946 case TSQRT :
1947 NextToken();
1948 break;
1950 case TNROOT :
1951 NextToken();
1952 xExtra = DoPower();
1953 break;
1955 case TUOPER :
1956 NextToken();
1957 //Let the glyph know what it is...
1958 m_aCurToken.eType = TUOPER;
1959 m_aCurToken.nGroup = TG::UnOper;
1960 xOper = DoGlyphSpecial();
1961 break;
1963 case TPLUS :
1964 case TMINUS :
1965 case TPLUSMINUS :
1966 case TMINUSPLUS :
1967 case TNEG :
1968 case TFACT :
1969 xOper = DoOpSubSup();
1970 break;
1972 default :
1973 assert(false);
1976 // get argument
1977 xArg = DoPower();
1979 if (eType == TABS)
1981 xSNode.reset(new SmBraceNode(aNodeToken));
1982 xSNode->SetScaleMode(SmScaleMode::Height);
1984 // build nodes for left & right lines
1985 // (text, group, level of the used token are of no interest here)
1986 // we'll use row & column of the keyword for abs
1987 aNodeToken.eType = TABS;
1989 aNodeToken.cMathChar = MS_VERTLINE;
1990 std::unique_ptr<SmNode> xLeft(new SmMathSymbolNode(aNodeToken));
1991 std::unique_ptr<SmNode> xRight(new SmMathSymbolNode(aNodeToken));
1993 xSNode->SetSubNodes(std::move(xLeft), std::move(xArg), std::move(xRight));
1995 else if (eType == TSQRT || eType == TNROOT)
1997 xSNode.reset(new SmRootNode(aNodeToken));
1998 xOper.reset(new SmRootSymbolNode(aNodeToken));
1999 xSNode->SetSubNodes(std::move(xExtra), std::move(xOper), std::move(xArg));
2001 else
2003 xSNode.reset(new SmUnHorNode(aNodeToken));
2004 if (bIsPostfix)
2005 xSNode->SetSubNodes(std::move(xArg), std::move(xOper));
2006 else
2008 // prefix operator
2009 xSNode->SetSubNodes(std::move(xOper), std::move(xArg));
2012 return xSNode;
2015 std::unique_ptr<SmStructureNode> SmParser::DoAttribut()
2017 DepthProtect aDepthGuard(m_nParseDepth);
2018 if (aDepthGuard.TooDeep())
2019 throw std::range_error("parser depth limit");
2021 assert(TokenInGroup(TG::Attribute));
2023 auto xSNode = std::make_unique<SmAttributNode>(m_aCurToken);
2024 std::unique_ptr<SmNode> xAttr;
2025 SmScaleMode eScaleMode = SmScaleMode::None;
2027 // get appropriate node for the attribute itself
2028 switch (m_aCurToken.eType)
2029 { case TUNDERLINE :
2030 case TOVERLINE :
2031 case TOVERSTRIKE :
2032 xAttr.reset(new SmRectangleNode(m_aCurToken));
2033 eScaleMode = SmScaleMode::Width;
2034 break;
2036 case TWIDEVEC :
2037 case TWIDEHARPOON :
2038 case TWIDEHAT :
2039 case TWIDETILDE :
2040 xAttr.reset(new SmMathSymbolNode(m_aCurToken));
2041 eScaleMode = SmScaleMode::Width;
2042 break;
2044 default :
2045 xAttr.reset(new SmMathSymbolNode(m_aCurToken));
2048 NextToken();
2050 xSNode->SetSubNodes(std::move(xAttr), nullptr); // the body will be filled later
2051 xSNode->SetScaleMode(eScaleMode);
2052 return xSNode;
2055 std::unique_ptr<SmStructureNode> SmParser::DoFontAttribut()
2057 DepthProtect aDepthGuard(m_nParseDepth);
2058 if (aDepthGuard.TooDeep())
2059 throw std::range_error("parser depth limit");
2061 assert(TokenInGroup(TG::FontAttr));
2063 switch (m_aCurToken.eType)
2065 case TITALIC :
2066 case TNITALIC :
2067 case TBOLD :
2068 case TNBOLD :
2069 case TPHANTOM :
2071 auto pNode = std::make_unique<SmFontNode>(m_aCurToken);
2072 NextToken();
2073 return pNode;
2076 case TSIZE :
2077 return DoFontSize();
2079 case TFONT :
2080 return DoFont();
2082 case TCOLOR :
2083 return DoColor();
2085 default :
2086 assert(false);
2087 return {};
2091 std::unique_ptr<SmStructureNode> SmParser::DoColor()
2093 DepthProtect aDepthGuard(m_nParseDepth);
2094 if (aDepthGuard.TooDeep()) throw std::range_error("parser depth limit");
2096 assert(m_aCurToken.eType == TCOLOR);
2097 NextTokenColor(false);
2098 SmToken aToken;
2100 if( m_aCurToken.eType == TDVIPSNAMESCOL ) NextTokenColor(true);
2101 if( m_aCurToken.eType == TERROR ) return DoError(SmParseError::ColorExpected);
2102 if (TokenInGroup(TG::Color))
2104 aToken = m_aCurToken;
2105 if( m_aCurToken.eType == TRGB ) //loads r, g and b
2107 sal_uInt32 nr, ng, nb, nc;
2108 NextTokenFontSize();
2109 if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2110 return DoError(SmParseError::ColorExpected);
2111 nr = m_aCurToken.aText.toUInt32();
2112 if( nr > 255 )return DoError(SmParseError::ColorExpected);
2113 NextTokenFontSize();
2114 if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2115 return DoError(SmParseError::ColorExpected);
2116 ng = m_aCurToken.aText.toUInt32();
2117 if( ng > 255 )return DoError(SmParseError::ColorExpected);
2118 NextTokenFontSize();
2119 if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2120 return DoError(SmParseError::ColorExpected);
2121 nb = m_aCurToken.aText.toUInt32();
2122 if( nb > 255 )return DoError(SmParseError::ColorExpected);
2123 nc = nb | ng << 8 | nr << 16 | sal_uInt32(0) << 24;
2124 aToken.aText = OUString::number(nc, 16);
2126 else if( m_aCurToken.eType == TRGBA ) //loads r, g and b
2128 sal_uInt32 nr, na, ng, nb, nc;
2129 NextTokenFontSize();
2130 if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2131 return DoError(SmParseError::ColorExpected);
2132 nr = m_aCurToken.aText.toUInt32();
2133 if( nr > 255 )return DoError(SmParseError::ColorExpected);
2134 NextTokenFontSize();
2135 if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2136 return DoError(SmParseError::ColorExpected);
2137 ng = m_aCurToken.aText.toUInt32();
2138 if( ng > 255 )return DoError(SmParseError::ColorExpected);
2139 NextTokenFontSize();
2140 if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2141 return DoError(SmParseError::ColorExpected);
2142 nb = m_aCurToken.aText.toUInt32();
2143 if( nb > 255 )return DoError(SmParseError::ColorExpected);
2144 NextTokenFontSize();
2145 if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2146 return DoError(SmParseError::ColorExpected);
2147 na = m_aCurToken.aText.toUInt32();
2148 if( na > 255 )return DoError(SmParseError::ColorExpected);
2149 nc = nb | ng << 8 | nr << 16 | na << 24;
2150 aToken.aText = OUString::number(nc, 16);
2152 else if( m_aCurToken.eType == THEX ) //loads hex code
2154 sal_uInt32 nc;
2155 NextTokenFontSize();
2156 if( lcl_IsNotWholeNumber16(m_aCurToken.aText) )
2157 return DoError(SmParseError::ColorExpected);
2158 nc = m_aCurToken.aText.toUInt32(16);
2159 aToken.aText = OUString::number(nc, 16);
2161 NextToken();
2163 else return DoError(SmParseError::ColorExpected);
2165 std::unique_ptr<SmStructureNode> xNode;
2166 xNode.reset(new SmFontNode(aToken));
2167 return xNode;
2170 std::unique_ptr<SmStructureNode> SmParser::DoFont()
2172 DepthProtect aDepthGuard(m_nParseDepth);
2173 if (aDepthGuard.TooDeep())
2174 throw std::range_error("parser depth limit");
2176 assert(m_aCurToken.eType == TFONT);
2178 std::unique_ptr<SmStructureNode> xNode;
2179 // last font rules, get that one
2180 SmToken aToken;
2182 { NextToken();
2184 if (TokenInGroup(TG::Font))
2185 { aToken = m_aCurToken;
2186 NextToken();
2188 else
2190 return DoError(SmParseError::FontExpected);
2192 } while (m_aCurToken.eType == TFONT);
2194 xNode.reset(new SmFontNode(aToken));
2195 return xNode;
2198 std::unique_ptr<SmStructureNode> SmParser::DoFontSize()
2200 DepthProtect aDepthGuard(m_nParseDepth);
2201 if (aDepthGuard.TooDeep()) throw std::range_error("parser depth limit");
2202 std::unique_ptr<SmFontNode> pFontNode(new SmFontNode(m_aCurToken));
2203 NextTokenFontSize();
2204 FontSizeType Type;
2206 switch (m_aCurToken.eType)
2208 case THEX: Type = FontSizeType::ABSOLUT; break;
2209 case TPLUS: Type = FontSizeType::PLUS; break;
2210 case TMINUS: Type = FontSizeType::MINUS; break;
2211 case TMULTIPLY: Type = FontSizeType::MULTIPLY; break;
2212 case TDIVIDEBY: Type = FontSizeType::DIVIDE; break;
2214 default:
2215 return DoError(SmParseError::SizeExpected);
2218 if (Type != FontSizeType::ABSOLUT)
2220 NextTokenFontSize();
2221 if (m_aCurToken.eType != THEX) return DoError(SmParseError::SizeExpected);
2224 // get number argument
2225 Fraction aValue( 1 );
2226 if (lcl_IsNumber( m_aCurToken.aText ))
2228 aValue = m_aCurToken.aText.toDouble();
2229 //!! Reduce values in order to avoid numerical errors
2230 if (aValue.GetDenominator() > 1000)
2232 tools::Long nNum = aValue.GetNumerator();
2233 tools::Long nDenom = aValue.GetDenominator();
2234 while ( nDenom > 1000 ) //remove big denominator
2236 nNum /= 10;
2237 nDenom /= 10;
2239 aValue = Fraction( nNum, nDenom );
2242 else return DoError(SmParseError::SizeExpected);
2244 pFontNode->SetSizeParameter(aValue, Type);
2245 NextToken();
2246 return pFontNode;
2249 std::unique_ptr<SmStructureNode> SmParser::DoBrace()
2251 DepthProtect aDepthGuard(m_nParseDepth);
2252 if (aDepthGuard.TooDeep())
2253 throw std::range_error("parser depth limit");
2255 assert(m_aCurToken.eType == TLEFT || TokenInGroup(TG::LBrace));
2257 std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
2258 std::unique_ptr<SmNode> pBody, pLeft, pRight;
2259 SmScaleMode eScaleMode = SmScaleMode::None;
2260 SmParseError eError = SmParseError::None;
2262 if (m_aCurToken.eType == TLEFT)
2263 { NextToken();
2265 eScaleMode = SmScaleMode::Height;
2267 // check for left bracket
2268 if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
2270 pLeft.reset(new SmMathSymbolNode(m_aCurToken));
2272 NextToken();
2273 pBody = DoBracebody(true);
2275 if (m_aCurToken.eType == TRIGHT)
2276 { NextToken();
2278 // check for right bracket
2279 if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
2281 pRight.reset(new SmMathSymbolNode(m_aCurToken));
2282 NextToken();
2284 else
2285 eError = SmParseError::RbraceExpected;
2287 else
2288 eError = SmParseError::RightExpected;
2290 else
2291 eError = SmParseError::LbraceExpected;
2293 else
2295 assert(TokenInGroup(TG::LBrace));
2297 pLeft.reset(new SmMathSymbolNode(m_aCurToken));
2299 NextToken();
2300 pBody = DoBracebody(false);
2302 SmTokenType eExpectedType = TUNKNOWN;
2303 switch (pLeft->GetToken().eType)
2304 { case TLPARENT : eExpectedType = TRPARENT; break;
2305 case TLBRACKET : eExpectedType = TRBRACKET; break;
2306 case TLBRACE : eExpectedType = TRBRACE; break;
2307 case TLDBRACKET : eExpectedType = TRDBRACKET; break;
2308 case TLLINE : eExpectedType = TRLINE; break;
2309 case TLDLINE : eExpectedType = TRDLINE; break;
2310 case TLANGLE : eExpectedType = TRANGLE; break;
2311 case TLFLOOR : eExpectedType = TRFLOOR; break;
2312 case TLCEIL : eExpectedType = TRCEIL; break;
2313 case TLRLINE : eExpectedType = TLRLINE; break;
2314 case TLRDLINE : eExpectedType = TLRDLINE; break;
2315 default :
2316 SAL_WARN("starmath", "unknown case");
2319 if (m_aCurToken.eType == eExpectedType)
2321 pRight.reset(new SmMathSymbolNode(m_aCurToken));
2322 NextToken();
2324 else
2325 eError = SmParseError::ParentMismatch;
2328 if (eError == SmParseError::None)
2330 assert(pLeft);
2331 assert(pRight);
2332 xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight));
2333 xSNode->SetScaleMode(eScaleMode);
2334 return xSNode;
2336 return DoError(eError);
2339 std::unique_ptr<SmBracebodyNode> SmParser::DoBracebody(bool bIsLeftRight)
2341 DepthProtect aDepthGuard(m_nParseDepth);
2342 if (aDepthGuard.TooDeep())
2343 throw std::range_error("parser depth limit");
2345 auto pBody = std::make_unique<SmBracebodyNode>(m_aCurToken);
2347 std::vector<std::unique_ptr<SmNode>> aNodes;
2348 // get body if any
2349 if (bIsLeftRight)
2353 if (m_aCurToken.eType == TMLINE)
2355 aNodes.emplace_back(std::make_unique<SmMathSymbolNode>(m_aCurToken));
2356 NextToken();
2358 else if (m_aCurToken.eType != TRIGHT)
2360 aNodes.push_back(DoAlign());
2361 if (m_aCurToken.eType != TMLINE && m_aCurToken.eType != TRIGHT)
2362 aNodes.emplace_back(DoError(SmParseError::RightExpected));
2364 } while (m_aCurToken.eType != TEND && m_aCurToken.eType != TRIGHT);
2366 else
2370 if (m_aCurToken.eType == TMLINE)
2372 aNodes.emplace_back(std::make_unique<SmMathSymbolNode>(m_aCurToken));
2373 NextToken();
2375 else if (!TokenInGroup(TG::RBrace))
2377 aNodes.push_back(DoAlign());
2378 if (m_aCurToken.eType != TMLINE && !TokenInGroup(TG::RBrace))
2379 aNodes.emplace_back(DoError(SmParseError::RbraceExpected));
2381 } while (m_aCurToken.eType != TEND && !TokenInGroup(TG::RBrace));
2384 pBody->SetSubNodes(buildNodeArray(aNodes));
2385 pBody->SetScaleMode(bIsLeftRight ? SmScaleMode::Height : SmScaleMode::None);
2386 return pBody;
2389 std::unique_ptr<SmNode> SmParser::DoEvaluate()
2392 // Checkout depth and create node
2393 DepthProtect aDepthGuard(m_nParseDepth);
2394 if (aDepthGuard.TooDeep()) throw std::range_error("parser depth limit");
2395 std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
2396 SmToken aToken( TRLINE, MS_VERTLINE, "evaluate", TG::RBrace, 5);
2397 aToken.nRow = m_aCurToken.nRow;
2398 aToken.nCol = m_aCurToken.nCol;
2400 // Parse body && left none
2401 NextToken();
2402 std::unique_ptr<SmNode> pBody = DoPower();
2403 SmToken bToken( TNONE, '\0', "", TG::LBrace, 5);
2404 std::unique_ptr<SmNode> pLeft;
2405 pLeft.reset(new SmMathSymbolNode(bToken));
2407 // Mount nodes
2408 std::unique_ptr<SmNode> pRight;
2409 pRight.reset(new SmMathSymbolNode(aToken));
2410 xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight));
2411 xSNode->SetScaleMode(SmScaleMode::Height); // scalable line
2413 // Parse from to
2414 if ( m_aCurToken.nGroup == TG::Limit )
2416 std::unique_ptr<SmNode> rSNode;
2417 rSNode = DoSubSupEvaluate(xSNode.release());
2418 rSNode->GetToken().eType = TEVALUATE;
2419 return rSNode;
2422 return xSNode;
2426 std::unique_ptr<SmTextNode> SmParser::DoFunction()
2428 DepthProtect aDepthGuard(m_nParseDepth);
2429 if (aDepthGuard.TooDeep()) throw std::range_error("parser depth limit");
2430 if( m_aCurToken.eType == TFUNC )
2432 NextToken(); // skip "FUNC"-statement
2433 m_aCurToken.eType = TFUNC;
2434 m_aCurToken.nGroup = TG::Function;
2436 auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_FUNCTION);
2437 NextToken();
2438 return pNode;
2441 std::unique_ptr<SmTableNode> SmParser::DoBinom()
2443 DepthProtect aDepthGuard(m_nParseDepth);
2444 if (aDepthGuard.TooDeep())
2445 throw std::range_error("parser depth limit");
2447 auto xSNode = std::make_unique<SmTableNode>(m_aCurToken);
2449 NextToken();
2451 auto xFirst = DoSum();
2452 auto xSecond = DoSum();
2453 xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond));
2454 return xSNode;
2457 std::unique_ptr<SmBinVerNode> SmParser::DoFrac()
2459 DepthProtect aDepthGuard(m_nParseDepth);
2460 if (aDepthGuard.TooDeep()) throw std::range_error("parser depth limit");
2462 std::unique_ptr<SmBinVerNode> xSNode = std::make_unique<SmBinVerNode>(m_aCurToken);
2463 std::unique_ptr<SmNode> xOper = std::make_unique<SmRectangleNode>(m_aCurToken);
2465 NextToken();
2467 auto xFirst = DoSum();
2468 auto xSecond = DoSum();
2469 xSNode->SetSubNodes(std::move(xFirst), std::move(xOper), std::move(xSecond));
2470 return xSNode;
2473 std::unique_ptr<SmStructureNode> SmParser::DoStack()
2475 DepthProtect aDepthGuard(m_nParseDepth);
2476 if (aDepthGuard.TooDeep())
2477 throw std::range_error("parser depth limit");
2479 std::unique_ptr<SmStructureNode> xSNode(new SmTableNode(m_aCurToken));
2480 NextToken();
2481 if (m_aCurToken.eType != TLGROUP)
2482 return DoError(SmParseError::LgroupExpected);
2483 std::vector<std::unique_ptr<SmNode>> aExprArr;
2486 NextToken();
2487 aExprArr.push_back(DoAlign());
2489 while (m_aCurToken.eType == TPOUND);
2491 if (m_aCurToken.eType == TRGROUP)
2492 NextToken();
2493 else
2494 aExprArr.emplace_back(DoError(SmParseError::RgroupExpected));
2496 xSNode->SetSubNodes(buildNodeArray(aExprArr));
2497 return xSNode;
2500 std::unique_ptr<SmStructureNode> SmParser::DoMatrix()
2502 DepthProtect aDepthGuard(m_nParseDepth);
2503 if (aDepthGuard.TooDeep())
2504 throw std::range_error("parser depth limit");
2506 std::unique_ptr<SmMatrixNode> xMNode(new SmMatrixNode(m_aCurToken));
2507 NextToken();
2508 if (m_aCurToken.eType != TLGROUP)
2509 return DoError(SmParseError::LgroupExpected);
2511 std::vector<std::unique_ptr<SmNode>> aExprArr;
2514 NextToken();
2515 aExprArr.push_back(DoAlign());
2517 while (m_aCurToken.eType == TPOUND);
2519 size_t nCol = aExprArr.size();
2520 size_t nRow = 1;
2521 while (m_aCurToken.eType == TDPOUND)
2523 NextToken();
2524 for (size_t i = 0; i < nCol; i++)
2526 auto xNode = DoAlign();
2527 if (i < (nCol - 1))
2529 if (m_aCurToken.eType == TPOUND)
2530 NextToken();
2531 else
2532 xNode = DoError(SmParseError::PoundExpected);
2534 aExprArr.emplace_back(std::move(xNode));
2536 ++nRow;
2539 if (m_aCurToken.eType == TRGROUP)
2540 NextToken();
2541 else
2543 std::unique_ptr<SmNode> xENode(DoError(SmParseError::RgroupExpected));
2544 if (aExprArr.empty())
2545 nRow = nCol = 1;
2546 else
2547 aExprArr.pop_back();
2548 aExprArr.emplace_back(std::move(xENode));
2551 xMNode->SetSubNodes(buildNodeArray(aExprArr));
2552 xMNode->SetRowCol(static_cast<sal_uInt16>(nRow),
2553 static_cast<sal_uInt16>(nCol));
2554 return std::unique_ptr<SmStructureNode>(xMNode.release());
2557 std::unique_ptr<SmSpecialNode> SmParser::DoSpecial()
2559 DepthProtect aDepthGuard(m_nParseDepth);
2560 if (aDepthGuard.TooDeep())
2561 throw std::range_error("parser depth limit");
2563 bool bReplace = false;
2564 OUString &rName = m_aCurToken.aText;
2565 OUString aNewName;
2567 // conversion of symbol names for 6.0 (XML) file format
2568 // (name change on import / export.
2569 // UI uses localized names XML file format does not.)
2570 if( rName.startsWith("%") )
2572 if (IsImportSymbolNames())
2574 aNewName = SmLocalizedSymbolData::GetUiSymbolName(rName.copy(1));
2575 bReplace = true;
2577 else if (IsExportSymbolNames())
2579 aNewName = SmLocalizedSymbolData::GetExportSymbolName(rName.copy(1));
2580 bReplace = true;
2583 if (!aNewName.isEmpty())
2584 aNewName = "%" + aNewName;
2587 if (bReplace && !aNewName.isEmpty() && rName != aNewName)
2589 Replace(GetTokenIndex(), rName.getLength(), aNewName);
2590 rName = aNewName;
2593 // add symbol name to list of used symbols
2594 const OUString aSymbolName(m_aCurToken.aText.copy(1));
2595 if (!aSymbolName.isEmpty())
2596 m_aUsedSymbols.insert( aSymbolName );
2598 auto pNode = std::make_unique<SmSpecialNode>(m_aCurToken);
2599 NextToken();
2600 return pNode;
2603 std::unique_ptr<SmGlyphSpecialNode> SmParser::DoGlyphSpecial()
2605 DepthProtect aDepthGuard(m_nParseDepth);
2606 if (aDepthGuard.TooDeep())
2607 throw std::range_error("parser depth limit");
2609 auto pNode = std::make_unique<SmGlyphSpecialNode>(m_aCurToken);
2610 NextToken();
2611 return pNode;
2614 std::unique_ptr<SmExpressionNode> SmParser::DoError(SmParseError eError)
2616 DepthProtect aDepthGuard(m_nParseDepth);
2617 if (aDepthGuard.TooDeep())
2618 throw std::range_error("parser depth limit");
2620 auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
2621 std::unique_ptr<SmErrorNode> pErr(new SmErrorNode(m_aCurToken));
2622 xSNode->SetSubNodes(std::move(pErr), nullptr);
2624 AddError(eError, xSNode.get());
2626 NextToken();
2628 return xSNode;
2631 // end grammar
2634 SmParser::SmParser()
2635 : m_nCurError( 0 )
2636 , m_nBufferIndex( 0 )
2637 , m_nTokenIndex( 0 )
2638 , m_nRow( 0 )
2639 , m_nColOff( 0 )
2640 , m_bImportSymNames( false )
2641 , m_bExportSymNames( false )
2642 , m_nParseDepth(0)
2643 , m_aNumCC( LanguageTag( LANGUAGE_ENGLISH_US ) )
2644 , m_pSysCC( SM_MOD()->GetSysLocale().GetCharClassPtr() )
2648 std::unique_ptr<SmTableNode> SmParser::Parse(const OUString &rBuffer)
2650 m_aUsedSymbols.clear();
2652 m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2653 m_nBufferIndex = 0;
2654 m_nTokenIndex = 0;
2655 m_nRow = 1;
2656 m_nColOff = 0;
2657 m_nCurError = -1;
2659 m_aErrDescList.clear();
2661 NextToken();
2662 return DoTable();
2665 std::unique_ptr<SmNode> SmParser::ParseExpression(const OUString &rBuffer)
2667 m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2668 m_nBufferIndex = 0;
2669 m_nTokenIndex = 0;
2670 m_nRow = 1;
2671 m_nColOff = 0;
2672 m_nCurError = -1;
2674 m_aErrDescList.clear();
2676 NextToken();
2677 return DoExpression();
2681 void SmParser::AddError(SmParseError Type, SmNode *pNode)
2683 std::unique_ptr<SmErrorDesc> pErrDesc(new SmErrorDesc);
2685 pErrDesc->m_eType = Type;
2686 pErrDesc->m_pNode = pNode;
2687 pErrDesc->m_aText = SmResId(RID_ERR_IDENT);
2689 const char* pRID;
2690 switch (Type)
2692 case SmParseError::UnexpectedChar: pRID = RID_ERR_UNEXPECTEDCHARACTER; break;
2693 case SmParseError::UnexpectedToken: pRID = RID_ERR_UNEXPECTEDTOKEN; break;
2694 case SmParseError::PoundExpected: pRID = RID_ERR_POUNDEXPECTED; break;
2695 case SmParseError::ColorExpected: pRID = RID_ERR_COLOREXPECTED; break;
2696 case SmParseError::LgroupExpected: pRID = RID_ERR_LGROUPEXPECTED; break;
2697 case SmParseError::RgroupExpected: pRID = RID_ERR_RGROUPEXPECTED; break;
2698 case SmParseError::LbraceExpected: pRID = RID_ERR_LBRACEEXPECTED; break;
2699 case SmParseError::RbraceExpected: pRID = RID_ERR_RBRACEEXPECTED; break;
2700 case SmParseError::ParentMismatch: pRID = RID_ERR_PARENTMISMATCH; break;
2701 case SmParseError::RightExpected: pRID = RID_ERR_RIGHTEXPECTED; break;
2702 case SmParseError::FontExpected: pRID = RID_ERR_FONTEXPECTED; break;
2703 case SmParseError::SizeExpected: pRID = RID_ERR_SIZEEXPECTED; break;
2704 case SmParseError::DoubleAlign: pRID = RID_ERR_DOUBLEALIGN; break;
2705 case SmParseError::DoubleSubsupscript: pRID = RID_ERR_DOUBLESUBSUPSCRIPT; break;
2706 case SmParseError::NumberExpected: pRID = RID_ERR_NUMBEREXPECTED; break;
2707 default:
2708 assert(false);
2709 return;
2711 pErrDesc->m_aText += SmResId(pRID);
2713 m_aErrDescList.push_back(std::move(pErrDesc));
2717 const SmErrorDesc *SmParser::NextError()
2719 if ( !m_aErrDescList.empty() )
2720 if (m_nCurError > 0) return m_aErrDescList[ --m_nCurError ].get();
2721 else
2723 m_nCurError = 0;
2724 return m_aErrDescList[ m_nCurError ].get();
2726 else return nullptr;
2730 const SmErrorDesc *SmParser::PrevError()
2732 if ( !m_aErrDescList.empty() )
2733 if (m_nCurError < static_cast<int>(m_aErrDescList.size() - 1)) return m_aErrDescList[ ++m_nCurError ].get();
2734 else
2736 m_nCurError = static_cast<int>(m_aErrDescList.size() - 1);
2737 return m_aErrDescList[ m_nCurError ].get();
2739 else return nullptr;
2743 const SmErrorDesc *SmParser::GetError()
2745 if ( !m_aErrDescList.empty() )
2746 return m_aErrDescList.front().get();
2747 return nullptr;
2750 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */