Version 7.6.3.2-android, tag libreoffice-7.6.3.2-android
[LibreOffice.git] / starmath / source / parse5.cxx
blob13f3701db26a91c8e7f819bce5c6ced3cb883b28
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/i18n/UnicodeType.hpp>
21 #include <com/sun/star/i18n/KParseTokens.hpp>
22 #include <com/sun/star/i18n/KParseType.hpp>
23 #include <i18nlangtag/lang.h>
24 #include <tools/lineend.hxx>
25 #include <unotools/configmgr.hxx>
26 #include <unotools/syslocale.hxx>
27 #include <osl/diagnose.h>
28 #include <rtl/character.hxx>
29 #include <parse5.hxx>
30 #include <strings.hrc>
31 #include <smmod.hxx>
32 #include <cfgitem.hxx>
33 #include <starmathdatabase.hxx>
35 #include <stack>
37 using namespace ::com::sun::star::i18n;
39 //Definition of math keywords
40 const SmTokenTableEntry aTokenTable[]
41 = { { u"abs", TABS, '\0', TG::UnOper, 13 },
42 { u"acute", TACUTE, MS_ACUTE, TG::Attribute, 5 },
43 { u"aleph", TALEPH, MS_ALEPH, TG::Standalone, 5 },
44 { u"alignb", TALIGNC, '\0', TG::Align, 0 },
45 { u"alignc", TALIGNC, '\0', TG::Align, 0 },
46 { u"alignl", TALIGNL, '\0', TG::Align, 0 },
47 { u"alignm", TALIGNC, '\0', TG::Align, 0 },
48 { u"alignr", TALIGNR, '\0', TG::Align, 0 },
49 { u"alignt", TALIGNC, '\0', TG::Align, 0 },
50 { u"and", TAND, MS_AND, TG::Product, 0 },
51 { u"approx", TAPPROX, MS_APPROX, TG::Relation, 0 },
52 { u"arccos", TACOS, '\0', TG::Function, 5 },
53 { u"arccot", TACOT, '\0', TG::Function, 5 },
54 { u"arcosh", TACOSH, '\0', TG::Function, 5 },
55 { u"arcoth", TACOTH, '\0', TG::Function, 5 },
56 { u"arcsin", TASIN, '\0', TG::Function, 5 },
57 { u"arctan", TATAN, '\0', TG::Function, 5 },
58 { u"arsinh", TASINH, '\0', TG::Function, 5 },
59 { u"artanh", TATANH, '\0', TG::Function, 5 },
60 { u"backepsilon", TBACKEPSILON, MS_BACKEPSILON, TG::Standalone, 5 },
61 { u"bar", TBAR, MS_BAR, TG::Attribute, 5 },
62 { u"binom", TBINOM, '\0', TG::NONE, 5 },
63 { u"bold", TBOLD, '\0', TG::FontAttr, 5 },
64 { u"boper", TBOPER, '\0', TG::Product, 0 },
65 { u"breve", TBREVE, MS_BREVE, TG::Attribute, 5 },
66 { u"bslash", TBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
67 { u"cdot", TCDOT, MS_CDOT, TG::Product, 0 },
68 { u"check", TCHECK, MS_CHECK, TG::Attribute, 5 },
69 { u"circ", TCIRC, MS_CIRC, TG::Standalone, 5 },
70 { u"circle", TCIRCLE, MS_CIRCLE, TG::Attribute, 5 },
71 { u"color", TCOLOR, '\0', TG::FontAttr, 5 },
72 { u"coprod", TCOPROD, MS_COPROD, TG::Oper, 5 },
73 { u"cos", TCOS, '\0', TG::Function, 5 },
74 { u"cosh", TCOSH, '\0', TG::Function, 5 },
75 { u"cot", TCOT, '\0', TG::Function, 5 },
76 { u"coth", TCOTH, '\0', TG::Function, 5 },
77 { u"csub", TCSUB, '\0', TG::Power, 0 },
78 { u"csup", TCSUP, '\0', TG::Power, 0 },
79 { u"dddot", TDDDOT, MS_DDDOT, TG::Attribute, 5 },
80 { u"ddot", TDDOT, MS_DDOT, TG::Attribute, 5 },
81 { u"def", TDEF, MS_DEF, TG::Relation, 0 },
82 { u"div", TDIV, MS_DIV, TG::Product, 0 },
83 { u"divides", TDIVIDES, MS_LINE, TG::Relation, 0 },
84 { u"dlarrow", TDLARROW, MS_DLARROW, TG::Standalone, 5 },
85 { u"dlrarrow", TDLRARROW, MS_DLRARROW, TG::Standalone, 5 },
86 { u"dot", TDOT, MS_DOT, TG::Attribute, 5 },
87 { u"dotsaxis", TDOTSAXIS, MS_DOTSAXIS, TG::Standalone, 5 }, // 5 to continue expression
88 { u"dotsdiag", TDOTSDIAG, MS_DOTSUP, TG::Standalone, 5 },
89 { u"dotsdown", TDOTSDOWN, MS_DOTSDOWN, TG::Standalone, 5 },
90 { u"dotslow", TDOTSLOW, MS_DOTSLOW, TG::Standalone, 5 },
91 { u"dotsup", TDOTSUP, MS_DOTSUP, TG::Standalone, 5 },
92 { u"dotsvert", TDOTSVERT, MS_DOTSVERT, TG::Standalone, 5 },
93 { u"downarrow", TDOWNARROW, MS_DOWNARROW, TG::Standalone, 5 },
94 { u"drarrow", TDRARROW, MS_DRARROW, TG::Standalone, 5 },
95 { u"emptyset", TEMPTYSET, MS_EMPTYSET, TG::Standalone, 5 },
96 { u"equiv", TEQUIV, MS_EQUIV, TG::Relation, 0 },
97 { u"evaluate", TEVALUATE, '\0', TG::NONE, 0 },
98 { u"exists", TEXISTS, MS_EXISTS, TG::Standalone, 5 },
99 { u"exp", TEXP, '\0', TG::Function, 5 },
100 { u"fact", TFACT, MS_FACT, TG::UnOper, 5 },
101 { u"fixed", TFIXED, '\0', TG::Font, 0 },
102 { u"font", TFONT, '\0', TG::FontAttr, 5 },
103 { u"forall", TFORALL, MS_FORALL, TG::Standalone, 5 },
104 { u"fourier", TFOURIER, MS_FOURIER, TG::Standalone, 5 },
105 { u"frac", TFRAC, '\0', TG::NONE, 5 },
106 { u"from", TFROM, '\0', TG::Limit, 0 },
107 { u"func", TFUNC, '\0', TG::Function, 5 },
108 { u"ge", TGE, MS_GE, TG::Relation, 0 },
109 { u"geslant", TGESLANT, MS_GESLANT, TG::Relation, 0 },
110 { u"gg", TGG, MS_GG, TG::Relation, 0 },
111 { u"grave", TGRAVE, MS_GRAVE, TG::Attribute, 5 },
112 { u"gt", TGT, MS_GT, TG::Relation, 0 },
113 { u"harpoon", THARPOON, MS_HARPOON, TG::Attribute, 5 },
114 { u"hat", THAT, MS_HAT, TG::Attribute, 5 },
115 { u"hbar", THBAR, MS_HBAR, TG::Standalone, 5 },
116 { u"hex", THEX, '\0', TG::NONE, 5 },
117 { u"iiint", TIIINT, MS_IIINT, TG::Oper, 5 },
118 { u"iint", TIINT, MS_IINT, TG::Oper, 5 },
119 { u"im", TIM, MS_IM, TG::Standalone, 5 },
120 { u"in", TIN, MS_IN, TG::Relation, 0 },
121 { u"infinity", TINFINITY, MS_INFINITY, TG::Standalone, 5 },
122 { u"infty", TINFINITY, MS_INFINITY, TG::Standalone, 5 },
123 { u"int", TINT, MS_INT, TG::Oper, 5 },
124 { u"intd", TINTD, MS_INT, TG::Oper, 5 },
125 { u"intersection", TINTERSECT, MS_INTERSECT, TG::Product, 0 },
126 { u"it", TIT, '\0', TG::Product, 0 },
127 { u"ital", TITALIC, '\0', TG::FontAttr, 5 },
128 { u"italic", TITALIC, '\0', TG::FontAttr, 5 },
129 { u"lambdabar", TLAMBDABAR, MS_LAMBDABAR, TG::Standalone, 5 },
130 { u"langle", TLANGLE, MS_LMATHANGLE, TG::LBrace, 5 },
131 { u"laplace", TLAPLACE, MS_LAPLACE, TG::Standalone, 5 },
132 { u"lbrace", TLBRACE, MS_LBRACE, TG::LBrace, 5 },
133 { u"lceil", TLCEIL, MS_LCEIL, TG::LBrace, 5 },
134 { u"ldbracket", TLDBRACKET, MS_LDBRACKET, TG::LBrace, 5 },
135 { u"ldline", TLDLINE, MS_DVERTLINE, TG::LBrace, 5 },
136 { u"le", TLE, MS_LE, TG::Relation, 0 },
137 { u"left", TLEFT, '\0', TG::NONE, 5 },
138 { u"leftarrow", TLEFTARROW, MS_LEFTARROW, TG::Standalone, 5 },
139 { u"leslant", TLESLANT, MS_LESLANT, TG::Relation, 0 },
140 { u"lfloor", TLFLOOR, MS_LFLOOR, TG::LBrace, 5 },
141 { u"lim", TLIM, '\0', TG::Oper, 5 },
142 { u"liminf", TLIMINF, '\0', TG::Oper, 5 },
143 { u"limsup", TLIMSUP, '\0', TG::Oper, 5 },
144 { u"lint", TLINT, MS_LINT, TG::Oper, 5 },
145 { u"ll", TLL, MS_LL, TG::Relation, 0 },
146 { u"lline", TLLINE, MS_VERTLINE, TG::LBrace, 5 },
147 { u"llint", TLLINT, MS_LLINT, TG::Oper, 5 },
148 { u"lllint", TLLLINT, MS_LLLINT, TG::Oper, 5 },
149 { u"ln", TLN, '\0', TG::Function, 5 },
150 { u"log", TLOG, '\0', TG::Function, 5 },
151 { u"lrline", TLRLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5 },
152 { u"lrdline", TLRDLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5 },
153 { u"lsub", TLSUB, '\0', TG::Power, 0 },
154 { u"lsup", TLSUP, '\0', TG::Power, 0 },
155 { u"lt", TLT, MS_LT, TG::Relation, 0 },
156 { u"matrix", TMATRIX, '\0', TG::NONE, 5 },
157 { u"minusplus", TMINUSPLUS, MS_MINUSPLUS, TG::UnOper | TG::Sum, 5 },
158 { u"mline", TMLINE, MS_VERTLINE, TG::NONE, 0 }, //! not in TG::RBrace, Level 0
159 { u"nabla", TNABLA, MS_NABLA, TG::Standalone, 5 },
160 { u"nbold", TNBOLD, '\0', TG::FontAttr, 5 },
161 { u"ndivides", TNDIVIDES, MS_NDIVIDES, TG::Relation, 0 },
162 { u"neg", TNEG, MS_NEG, TG::UnOper, 5 },
163 { u"neq", TNEQ, MS_NEQ, TG::Relation, 0 },
164 { u"newline", TNEWLINE, '\0', TG::NONE, 0 },
165 { u"ni", TNI, MS_NI, TG::Relation, 0 },
166 { u"nitalic", TNITALIC, '\0', TG::FontAttr, 5 },
167 { u"none", TNONE, '\0', TG::LBrace | TG::RBrace, 0 },
168 { u"nospace", TNOSPACE, '\0', TG::Standalone, 5 },
169 { u"notexists", TNOTEXISTS, MS_NOTEXISTS, TG::Standalone, 5 },
170 { u"notin", TNOTIN, MS_NOTIN, TG::Relation, 0 },
171 { u"nprec", TNOTPRECEDES, MS_NOTPRECEDES, TG::Relation, 0 },
172 { u"nroot", TNROOT, MS_SQRT, TG::UnOper, 5 },
173 { u"nsubset", TNSUBSET, MS_NSUBSET, TG::Relation, 0 },
174 { u"nsubseteq", TNSUBSETEQ, MS_NSUBSETEQ, TG::Relation, 0 },
175 { u"nsucc", TNOTSUCCEEDS, MS_NOTSUCCEEDS, TG::Relation, 0 },
176 { u"nsupset", TNSUPSET, MS_NSUPSET, TG::Relation, 0 },
177 { u"nsupseteq", TNSUPSETEQ, MS_NSUPSETEQ, TG::Relation, 0 },
178 { u"odivide", TODIVIDE, MS_ODIVIDE, TG::Product, 0 },
179 { u"odot", TODOT, MS_ODOT, TG::Product, 0 },
180 { u"ominus", TOMINUS, MS_OMINUS, TG::Sum, 0 },
181 { u"oper", TOPER, '\0', TG::Oper, 5 },
182 { u"oplus", TOPLUS, MS_OPLUS, TG::Sum, 0 },
183 { u"or", TOR, MS_OR, TG::Sum, 0 },
184 { u"ortho", TORTHO, MS_ORTHO, TG::Relation, 0 },
185 { u"otimes", TOTIMES, MS_OTIMES, TG::Product, 0 },
186 { u"over", TOVER, '\0', TG::Product, 0 },
187 { u"overbrace", TOVERBRACE, MS_OVERBRACE, TG::Product, 5 },
188 { u"overline", TOVERLINE, '\0', TG::Attribute, 5 },
189 { u"overstrike", TOVERSTRIKE, '\0', TG::Attribute, 5 },
190 { u"owns", TNI, MS_NI, TG::Relation, 0 },
191 { u"parallel", TPARALLEL, MS_DLINE, TG::Relation, 0 },
192 { u"partial", TPARTIAL, MS_PARTIAL, TG::Standalone, 5 },
193 { u"phantom", TPHANTOM, '\0', TG::FontAttr, 5 },
194 { u"plusminus", TPLUSMINUS, MS_PLUSMINUS, TG::UnOper | TG::Sum, 5 },
195 { u"prec", TPRECEDES, MS_PRECEDES, TG::Relation, 0 },
196 { u"preccurlyeq", TPRECEDESEQUAL, MS_PRECEDESEQUAL, TG::Relation, 0 },
197 { u"precsim", TPRECEDESEQUIV, MS_PRECEDESEQUIV, TG::Relation, 0 },
198 { u"prod", TPROD, MS_PROD, TG::Oper, 5 },
199 { u"prop", TPROP, MS_PROP, TG::Relation, 0 },
200 { u"rangle", TRANGLE, MS_RMATHANGLE, TG::RBrace, 0 }, //! 0 to terminate expression
201 { u"rbrace", TRBRACE, MS_RBRACE, TG::RBrace, 0 },
202 { u"rceil", TRCEIL, MS_RCEIL, TG::RBrace, 0 },
203 { u"rdbracket", TRDBRACKET, MS_RDBRACKET, TG::RBrace, 0 },
204 { u"rdline", TRDLINE, MS_DVERTLINE, TG::RBrace, 0 },
205 { u"re", TRE, MS_RE, TG::Standalone, 5 },
206 { u"rfloor", TRFLOOR, MS_RFLOOR, TG::RBrace, 0 }, //! 0 to terminate expression
207 { u"right", TRIGHT, '\0', TG::NONE, 0 },
208 { u"rightarrow", TRIGHTARROW, MS_RIGHTARROW, TG::Standalone, 5 },
209 { u"rline", TRLINE, MS_VERTLINE, TG::RBrace, 0 }, //! 0 to terminate expression
210 { u"rsub", TRSUB, '\0', TG::Power, 0 },
211 { u"rsup", TRSUP, '\0', TG::Power, 0 },
212 { u"sans", TSANS, '\0', TG::Font, 0 },
213 { u"serif", TSERIF, '\0', TG::Font, 0 },
214 { u"setC", TSETC, MS_SETC, TG::Standalone, 5 },
215 { u"setminus", TSETMINUS, MS_BACKSLASH, TG::Product, 0 },
216 { u"setN", TSETN, MS_SETN, TG::Standalone, 5 },
217 { u"setQ", TSETQ, MS_SETQ, TG::Standalone, 5 },
218 { u"setquotient", TSETQUOTIENT, MS_SLASH, TG::Product, 0 },
219 { u"setR", TSETR, MS_SETR, TG::Standalone, 5 },
220 { u"setZ", TSETZ, MS_SETZ, TG::Standalone, 5 },
221 { u"sim", TSIM, MS_SIM, TG::Relation, 0 },
222 { u"simeq", TSIMEQ, MS_SIMEQ, TG::Relation, 0 },
223 { u"sin", TSIN, '\0', TG::Function, 5 },
224 { u"sinh", TSINH, '\0', TG::Function, 5 },
225 { u"size", TSIZE, '\0', TG::FontAttr, 5 },
226 { u"slash", TSLASH, MS_SLASH, TG::Product, 0 },
227 { u"sqrt", TSQRT, MS_SQRT, TG::UnOper, 5 },
228 { u"stack", TSTACK, '\0', TG::NONE, 5 },
229 { u"sub", TRSUB, '\0', TG::Power, 0 },
230 { u"subset", TSUBSET, MS_SUBSET, TG::Relation, 0 },
231 { u"subseteq", TSUBSETEQ, MS_SUBSETEQ, TG::Relation, 0 },
232 { u"succ", TSUCCEEDS, MS_SUCCEEDS, TG::Relation, 0 },
233 { u"succcurlyeq", TSUCCEEDSEQUAL, MS_SUCCEEDSEQUAL, TG::Relation, 0 },
234 { u"succsim", TSUCCEEDSEQUIV, MS_SUCCEEDSEQUIV, TG::Relation, 0 },
235 { u"sum", TSUM, MS_SUM, TG::Oper, 5 },
236 { u"sup", TRSUP, '\0', TG::Power, 0 },
237 { u"supset", TSUPSET, MS_SUPSET, TG::Relation, 0 },
238 { u"supseteq", TSUPSETEQ, MS_SUPSETEQ, TG::Relation, 0 },
239 { u"tan", TTAN, '\0', TG::Function, 5 },
240 { u"tanh", TTANH, '\0', TG::Function, 5 },
241 { u"tilde", TTILDE, MS_TILDE, TG::Attribute, 5 },
242 { u"times", TTIMES, MS_TIMES, TG::Product, 0 },
243 { u"to", TTO, '\0', TG::Limit, 0 },
244 { u"toward", TTOWARD, MS_RIGHTARROW, TG::Relation, 0 },
245 { u"transl", TTRANSL, MS_TRANSL, TG::Relation, 0 },
246 { u"transr", TTRANSR, MS_TRANSR, TG::Relation, 0 },
247 { u"underbrace", TUNDERBRACE, MS_UNDERBRACE, TG::Product, 5 },
248 { u"underline", TUNDERLINE, '\0', TG::Attribute, 5 },
249 { u"union", TUNION, MS_UNION, TG::Sum, 0 },
250 { u"uoper", TUOPER, '\0', TG::UnOper, 5 },
251 { u"uparrow", TUPARROW, MS_UPARROW, TG::Standalone, 5 },
252 { u"vec", TVEC, MS_VEC, TG::Attribute, 5 },
253 { u"widebslash", TWIDEBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
254 { u"wideharpoon", TWIDEHARPOON, MS_HARPOON, TG::Attribute, 5 },
255 { u"widehat", TWIDEHAT, MS_HAT, TG::Attribute, 5 },
256 { u"wideslash", TWIDESLASH, MS_SLASH, TG::Product, 0 },
257 { u"widetilde", TWIDETILDE, MS_TILDE, TG::Attribute, 5 },
258 { u"widevec", TWIDEVEC, MS_VEC, TG::Attribute, 5 },
259 { u"wp", TWP, MS_WP, TG::Standalone, 5 } };
261 // First character may be any alphabetic
262 const sal_Int32 coStartFlags = KParseTokens::ANY_LETTER | KParseTokens::IGNORE_LEADING_WS;
264 // Continuing characters may be any alphabetic
265 const sal_Int32 coContFlags = (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
266 | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
267 // First character for numbers, may be any numeric or dot
268 const sal_Int32 coNumStartFlags
269 = KParseTokens::ASC_DIGIT | KParseTokens::ASC_DOT | KParseTokens::IGNORE_LEADING_WS;
270 // Continuing characters for numbers, may be any numeric or dot or comma.
271 // tdf#127873: additionally accept ',' comma group separator as too many
272 // existing documents unwittingly may have used that as decimal separator
273 // in such locales (though it never was as this is always the en-US locale
274 // and the group separator is only parsed away).
275 const sal_Int32 coNumContFlags = (coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
276 | KParseTokens::GROUP_SEPARATOR_IN_NUMBER;
277 // First character for numbers hexadecimal
278 const sal_Int32 coNum16StartFlags
279 = KParseTokens::ASC_DIGIT | KParseTokens::ASC_UPALPHA | KParseTokens::IGNORE_LEADING_WS;
281 // Continuing characters for numbers hexadecimal
282 const sal_Int32 coNum16ContFlags = (coNum16StartFlags & ~KParseTokens::IGNORE_LEADING_WS);
283 // user-defined char continuing characters may be any alphanumeric or dot.
284 const sal_Int32 coUserDefinedCharContFlags = KParseTokens::ANY_LETTER_OR_NUMBER
285 | KParseTokens::ASC_DOT
286 | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
288 //Checks if keyword is in the list.
289 static inline bool findCompare(const SmTokenTableEntry& lhs, const OUString& s)
291 return s.compareToIgnoreAsciiCase(lhs.aIdent) > 0;
294 //Returns the SmTokenTableEntry for a keyword
295 static const SmTokenTableEntry* GetTokenTableEntry(const OUString& rName)
297 if (rName.isEmpty())
298 return nullptr; //avoid null pointer exceptions
299 //Looks for the first keyword after or equal to rName in alphabetical order.
300 auto findIter
301 = std::lower_bound(std::begin(aTokenTable), std::end(aTokenTable), rName, findCompare);
302 if (findIter != std::end(aTokenTable) && rName.equalsIgnoreAsciiCase(findIter->aIdent))
303 return &*findIter; //check is equal
304 return nullptr; //not found
307 static bool IsDelimiter(const OUString& rTxt, sal_Int32 nPos)
308 { // returns 'true' iff cChar is '\0' or a delimiter
310 assert(nPos <= rTxt.getLength()); //index out of range
311 if (nPos == rTxt.getLength())
312 return true; //This is EOF
313 sal_Unicode cChar = rTxt[nPos];
315 // check if 'cChar' is in the delimiter table
316 static const sal_Unicode aDelimiterTable[] = {
317 ' ', '{', '}', '(', ')', '\t', '\n', '\r', '+', '-', '*', '/', '=', '[',
318 ']', '^', '_', '#', '%', '>', '<', '&', '|', '\\', '"', '~', '`'
319 }; //reordered by usage (by eye) for nanoseconds saving.
321 //checks the array
322 for (auto const& cDelimiter : aDelimiterTable)
324 if (cDelimiter == cChar)
325 return true;
328 //special chars support
329 sal_Int16 nTypJp = SM_MOD()->GetSysLocale().GetCharClass().getType(rTxt, nPos);
330 return (nTypJp == css::i18n::UnicodeType::SPACE_SEPARATOR
331 || nTypJp == css::i18n::UnicodeType::CONTROL);
334 // checks number used as arguments in Math formulas (e.g. 'size' command)
335 // Format: no negative numbers, must start with a digit, no exponent notation, ...
336 static bool lcl_IsNumber(const OUString& rText)
338 bool bPoint = false;
339 const sal_Unicode* pBuffer = rText.getStr();
340 for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
342 const sal_Unicode cChar = *pBuffer;
343 if (cChar == '.')
345 if (bPoint)
346 return false;
347 else
348 bPoint = true;
350 else if (!rtl::isAsciiDigit(cChar))
351 return false;
353 return true;
355 // checks number used as arguments in Math formulas (e.g. 'size' command)
356 // Format: no negative numbers, must start with a digit, no exponent notation, ...
357 static bool lcl_IsNotWholeNumber(const OUString& rText)
359 const sal_Unicode* pBuffer = rText.getStr();
360 for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
361 if (!rtl::isAsciiDigit(*pBuffer))
362 return true;
363 return false;
365 // checks hex number used as arguments in Math formulas (e.g. 'hex' command)
366 // Format: no negative numbers, must start with a digit, no exponent notation, ...
367 static bool lcl_IsNotWholeNumber16(const OUString& rText)
369 const sal_Unicode* pBuffer = rText.getStr();
370 for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
371 if (!rtl::isAsciiCanonicHexDigit(*pBuffer))
372 return true;
373 return false;
376 //Text replace onto m_aBufferString
377 void SmParser5::Replace(sal_Int32 nPos, sal_Int32 nLen, std::u16string_view aText)
379 assert(nPos + nLen <= m_aBufferString.getLength()); //checks if length allows text replace
381 m_aBufferString = m_aBufferString.replaceAt(nPos, nLen, aText); //replace and reindex
382 sal_Int32 nChg = aText.size() - nLen;
383 m_nBufferIndex = m_nBufferIndex + nChg;
384 m_nTokenIndex = m_nTokenIndex + nChg;
387 void SmParser5::NextToken() //Central part of the parser
389 sal_Int32 nBufLen = m_aBufferString.getLength();
390 ParseResult aRes;
391 sal_Int32 nRealStart;
392 bool bCont;
395 // skip white spaces
396 while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex))
397 ++m_nBufferIndex;
399 // Try to parse a number in a locale-independent manner using
400 // '.' as decimal separator.
401 // See https://bz.apache.org/ooo/show_bug.cgi?id=45779
402 aRes
403 = m_aNumCC.parsePredefinedToken(KParseType::ASC_NUMBER, m_aBufferString, m_nBufferIndex,
404 coNumStartFlags, "", coNumContFlags, "");
406 if (aRes.TokenType == 0)
408 // Try again with the default token parsing.
409 aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, "",
410 coContFlags, "");
413 nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
414 m_nBufferIndex = nRealStart;
416 bCont = false;
417 if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart])
419 // keep data needed for tokens row and col entry up to date
420 ++m_nRow;
421 m_nBufferIndex = m_nColOff = nRealStart + 1;
422 bCont = true;
424 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
426 if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
428 //SkipComment
429 m_nBufferIndex = nRealStart + 2;
430 while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex])
431 ++m_nBufferIndex;
432 bCont = true;
436 } while (bCont);
438 // set index of current token
439 m_nTokenIndex = m_nBufferIndex;
440 sal_uInt32 nCol = nRealStart - m_nColOff;
442 bool bHandled = true;
443 if (nRealStart >= nBufLen)
445 m_aCurToken.eType = TEND;
446 m_aCurToken.cMathChar = u"";
447 m_aCurToken.nGroup = TG::NONE;
448 m_aCurToken.nLevel = 0;
449 m_aCurToken.aText.clear();
451 else if (aRes.TokenType & KParseType::ANY_NUMBER)
453 assert(aRes.EndPos > 0);
454 if (m_aBufferString[aRes.EndPos - 1] == ',' && aRes.EndPos < nBufLen
455 && m_pSysCC->getType(m_aBufferString, aRes.EndPos) != UnicodeType::SPACE_SEPARATOR)
457 // Comma followed by a non-space char is unlikely for decimal/thousands separator.
458 --aRes.EndPos;
460 sal_Int32 n = aRes.EndPos - nRealStart;
461 assert(n >= 0);
462 m_aCurToken.eType = TNUMBER;
463 m_aCurToken.cMathChar = u"";
464 m_aCurToken.nGroup = TG::NONE;
465 m_aCurToken.nLevel = 5;
466 m_aCurToken.aText = m_aBufferString.copy(nRealStart, n);
468 SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos), "starmath",
469 "identifier really finished? (compatibility!)");
471 else if (aRes.TokenType & KParseType::DOUBLE_QUOTE_STRING)
473 m_aCurToken.eType = TTEXT;
474 m_aCurToken.cMathChar = u"";
475 m_aCurToken.nGroup = TG::NONE;
476 m_aCurToken.nLevel = 5;
477 m_aCurToken.aText = aRes.DequotedNameOrString;
478 nCol++;
480 else if (aRes.TokenType & KParseType::IDENTNAME)
482 sal_Int32 n = aRes.EndPos - nRealStart;
483 assert(n >= 0);
484 OUString aName(m_aBufferString.copy(nRealStart, n));
485 const SmTokenTableEntry* pEntry = GetTokenTableEntry(aName);
487 if (pEntry)
489 m_aCurToken.eType = pEntry->eType;
490 m_aCurToken.setChar(pEntry->cMathChar);
491 m_aCurToken.nGroup = pEntry->nGroup;
492 m_aCurToken.nLevel = pEntry->nLevel;
493 m_aCurToken.aText = pEntry->aIdent;
495 else
497 m_aCurToken.eType = TIDENT;
498 m_aCurToken.cMathChar = u"";
499 m_aCurToken.nGroup = TG::NONE;
500 m_aCurToken.nLevel = 5;
501 m_aCurToken.aText = aName;
503 SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos), "starmath",
504 "identifier really finished? (compatibility!)");
507 else if (aRes.TokenType == 0 && '_' == m_aBufferString[nRealStart])
509 m_aCurToken.eType = TRSUB;
510 m_aCurToken.cMathChar = u"";
511 m_aCurToken.nGroup = TG::Power;
512 m_aCurToken.nLevel = 0;
513 m_aCurToken.aText = "_";
515 aRes.EndPos = nRealStart + 1;
517 else if (aRes.TokenType & KParseType::BOOLEAN)
519 sal_Int32& rnEndPos = aRes.EndPos;
520 if (rnEndPos - nRealStart <= 2)
522 sal_Unicode ch = m_aBufferString[nRealStart];
523 switch (ch)
525 case '<':
527 if (m_aBufferString.match("<<", nRealStart))
529 m_aCurToken.eType = TLL;
530 m_aCurToken.setChar(MS_LL);
531 m_aCurToken.nGroup = TG::Relation;
532 m_aCurToken.nLevel = 0;
533 m_aCurToken.aText = "<<";
535 rnEndPos = nRealStart + 2;
537 else if (m_aBufferString.match("<=", nRealStart))
539 m_aCurToken.eType = TLE;
540 m_aCurToken.setChar(MS_LE);
541 m_aCurToken.nGroup = TG::Relation;
542 m_aCurToken.nLevel = 0;
543 m_aCurToken.aText = "<=";
545 rnEndPos = nRealStart + 2;
547 else if (m_aBufferString.match("<-", nRealStart))
549 m_aCurToken.eType = TLEFTARROW;
550 m_aCurToken.setChar(MS_LEFTARROW);
551 m_aCurToken.nGroup = TG::Standalone;
552 m_aCurToken.nLevel = 5;
553 m_aCurToken.aText = "<-";
555 rnEndPos = nRealStart + 2;
557 else if (m_aBufferString.match("<>", nRealStart))
559 m_aCurToken.eType = TNEQ;
560 m_aCurToken.setChar(MS_NEQ);
561 m_aCurToken.nGroup = TG::Relation;
562 m_aCurToken.nLevel = 0;
563 m_aCurToken.aText = "<>";
565 rnEndPos = nRealStart + 2;
567 else if (m_aBufferString.match("<?>", nRealStart))
569 m_aCurToken.eType = TPLACE;
570 m_aCurToken.setChar(MS_PLACE);
571 m_aCurToken.nGroup = TG::NONE;
572 m_aCurToken.nLevel = 5;
573 m_aCurToken.aText = "<?>";
575 rnEndPos = nRealStart + 3;
577 else
579 m_aCurToken.eType = TLT;
580 m_aCurToken.setChar(MS_LT);
581 m_aCurToken.nGroup = TG::Relation;
582 m_aCurToken.nLevel = 0;
583 m_aCurToken.aText = "<";
586 break;
587 case '>':
589 if (m_aBufferString.match(">=", nRealStart))
591 m_aCurToken.eType = TGE;
592 m_aCurToken.setChar(MS_GE);
593 m_aCurToken.nGroup = TG::Relation;
594 m_aCurToken.nLevel = 0;
595 m_aCurToken.aText = ">=";
597 rnEndPos = nRealStart + 2;
599 else if (m_aBufferString.match(">>", nRealStart))
601 m_aCurToken.eType = TGG;
602 m_aCurToken.setChar(MS_GG);
603 m_aCurToken.nGroup = TG::Relation;
604 m_aCurToken.nLevel = 0;
605 m_aCurToken.aText = ">>";
607 rnEndPos = nRealStart + 2;
609 else
611 m_aCurToken.eType = TGT;
612 m_aCurToken.setChar(MS_GT);
613 m_aCurToken.nGroup = TG::Relation;
614 m_aCurToken.nLevel = 0;
615 m_aCurToken.aText = ">";
618 break;
619 default:
620 bHandled = false;
624 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
626 sal_Int32& rnEndPos = aRes.EndPos;
627 if (rnEndPos - nRealStart == 1)
629 sal_Unicode ch = m_aBufferString[nRealStart];
630 switch (ch)
632 case '%':
634 //! modifies aRes.EndPos
636 OSL_ENSURE(rnEndPos >= nBufLen || '%' != m_aBufferString[rnEndPos],
637 "unexpected comment start");
639 // get identifier of user-defined character
640 ParseResult aTmpRes = m_pSysCC->parseAnyToken(m_aBufferString, rnEndPos,
641 KParseTokens::ANY_LETTER, "",
642 coUserDefinedCharContFlags, "");
644 sal_Int32 nTmpStart = rnEndPos + aTmpRes.LeadingWhiteSpace;
646 // default setting for the case that no identifier
647 // i.e. a valid symbol-name is following the '%'
648 // character
649 m_aCurToken.eType = TTEXT;
650 m_aCurToken.cMathChar = u"";
651 m_aCurToken.nGroup = TG::NONE;
652 m_aCurToken.nLevel = 5;
653 m_aCurToken.aText = "%";
655 if (aTmpRes.TokenType & KParseType::IDENTNAME)
657 sal_Int32 n = aTmpRes.EndPos - nTmpStart;
658 m_aCurToken.eType = TSPECIAL;
659 m_aCurToken.aText = m_aBufferString.copy(nTmpStart - 1, n + 1);
661 OSL_ENSURE(aTmpRes.EndPos > rnEndPos, "empty identifier");
662 if (aTmpRes.EndPos > rnEndPos)
663 rnEndPos = aTmpRes.EndPos;
664 else
665 ++rnEndPos;
668 // if no symbol-name was found we start-over with
669 // finding the next token right after the '%' sign.
670 // I.e. we leave rnEndPos unmodified.
672 break;
673 case '[':
675 m_aCurToken.eType = TLBRACKET;
676 m_aCurToken.setChar(MS_LBRACKET);
677 m_aCurToken.nGroup = TG::LBrace;
678 m_aCurToken.nLevel = 5;
679 m_aCurToken.aText = "[";
681 break;
682 case '\\':
684 m_aCurToken.eType = TESCAPE;
685 m_aCurToken.cMathChar = u"";
686 m_aCurToken.nGroup = TG::NONE;
687 m_aCurToken.nLevel = 5;
688 m_aCurToken.aText = "\\";
690 break;
691 case ']':
693 m_aCurToken.eType = TRBRACKET;
694 m_aCurToken.setChar(MS_RBRACKET);
695 m_aCurToken.nGroup = TG::RBrace;
696 m_aCurToken.nLevel = 0;
697 m_aCurToken.aText = "]";
699 break;
700 case '^':
702 m_aCurToken.eType = TRSUP;
703 m_aCurToken.cMathChar = u"";
704 m_aCurToken.nGroup = TG::Power;
705 m_aCurToken.nLevel = 0;
706 m_aCurToken.aText = "^";
708 break;
709 case '`':
711 m_aCurToken.eType = TSBLANK;
712 m_aCurToken.cMathChar = u"";
713 m_aCurToken.nGroup = TG::Blank;
714 m_aCurToken.nLevel = 5;
715 m_aCurToken.aText = "`";
717 break;
718 case '{':
720 m_aCurToken.eType = TLGROUP;
721 m_aCurToken.setChar(MS_LBRACE);
722 m_aCurToken.nGroup = TG::NONE;
723 m_aCurToken.nLevel = 5;
724 m_aCurToken.aText = "{";
726 break;
727 case '|':
729 m_aCurToken.eType = TOR;
730 m_aCurToken.setChar(MS_OR);
731 m_aCurToken.nGroup = TG::Sum;
732 m_aCurToken.nLevel = 0;
733 m_aCurToken.aText = "|";
735 break;
736 case '}':
738 m_aCurToken.eType = TRGROUP;
739 m_aCurToken.setChar(MS_RBRACE);
740 m_aCurToken.nGroup = TG::NONE;
741 m_aCurToken.nLevel = 0;
742 m_aCurToken.aText = "}";
744 break;
745 case '~':
747 m_aCurToken.eType = TBLANK;
748 m_aCurToken.cMathChar = u"";
749 m_aCurToken.nGroup = TG::Blank;
750 m_aCurToken.nLevel = 5;
751 m_aCurToken.aText = "~";
753 break;
754 case '#':
756 if (m_aBufferString.match("##", nRealStart))
758 m_aCurToken.eType = TDPOUND;
759 m_aCurToken.cMathChar = u"";
760 m_aCurToken.nGroup = TG::NONE;
761 m_aCurToken.nLevel = 0;
762 m_aCurToken.aText = "##";
764 rnEndPos = nRealStart + 2;
766 else
768 m_aCurToken.eType = TPOUND;
769 m_aCurToken.cMathChar = u"";
770 m_aCurToken.nGroup = TG::NONE;
771 m_aCurToken.nLevel = 0;
772 m_aCurToken.aText = "#";
775 break;
776 case '&':
778 m_aCurToken.eType = TAND;
779 m_aCurToken.setChar(MS_AND);
780 m_aCurToken.nGroup = TG::Product;
781 m_aCurToken.nLevel = 0;
782 m_aCurToken.aText = "&";
784 break;
785 case '(':
787 m_aCurToken.eType = TLPARENT;
788 m_aCurToken.setChar(MS_LPARENT);
789 m_aCurToken.nGroup = TG::LBrace;
790 m_aCurToken.nLevel = 5; //! 0 to continue expression
791 m_aCurToken.aText = "(";
793 break;
794 case ')':
796 m_aCurToken.eType = TRPARENT;
797 m_aCurToken.setChar(MS_RPARENT);
798 m_aCurToken.nGroup = TG::RBrace;
799 m_aCurToken.nLevel = 0; //! 0 to terminate expression
800 m_aCurToken.aText = ")";
802 break;
803 case '*':
805 m_aCurToken.eType = TMULTIPLY;
806 m_aCurToken.setChar(MS_MULTIPLY);
807 m_aCurToken.nGroup = TG::Product;
808 m_aCurToken.nLevel = 0;
809 m_aCurToken.aText = "*";
811 break;
812 case '+':
814 if (m_aBufferString.match("+-", nRealStart))
816 m_aCurToken.eType = TPLUSMINUS;
817 m_aCurToken.setChar(MS_PLUSMINUS);
818 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
819 m_aCurToken.nLevel = 5;
820 m_aCurToken.aText = "+-";
822 rnEndPos = nRealStart + 2;
824 else
826 m_aCurToken.eType = TPLUS;
827 m_aCurToken.setChar(MS_PLUS);
828 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
829 m_aCurToken.nLevel = 5;
830 m_aCurToken.aText = "+";
833 break;
834 case '-':
836 if (m_aBufferString.match("-+", nRealStart))
838 m_aCurToken.eType = TMINUSPLUS;
839 m_aCurToken.setChar(MS_MINUSPLUS);
840 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
841 m_aCurToken.nLevel = 5;
842 m_aCurToken.aText = "-+";
844 rnEndPos = nRealStart + 2;
846 else if (m_aBufferString.match("->", nRealStart))
848 m_aCurToken.eType = TRIGHTARROW;
849 m_aCurToken.setChar(MS_RIGHTARROW);
850 m_aCurToken.nGroup = TG::Standalone;
851 m_aCurToken.nLevel = 5;
852 m_aCurToken.aText = "->";
854 rnEndPos = nRealStart + 2;
856 else
858 m_aCurToken.eType = TMINUS;
859 m_aCurToken.setChar(MS_MINUS);
860 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
861 m_aCurToken.nLevel = 5;
862 m_aCurToken.aText = "-";
865 break;
866 case '.':
868 // Only one character? Then it can't be a number.
869 if (m_nBufferIndex < m_aBufferString.getLength() - 1)
871 // for compatibility with SO5.2
872 // texts like .34 ...56 ... h ...78..90
873 // will be treated as numbers
874 m_aCurToken.eType = TNUMBER;
875 m_aCurToken.cMathChar = u"";
876 m_aCurToken.nGroup = TG::NONE;
877 m_aCurToken.nLevel = 5;
879 sal_Int32 nTxtStart = m_nBufferIndex;
880 sal_Unicode cChar;
881 // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only
884 cChar = m_aBufferString[++m_nBufferIndex];
885 } while ((cChar == '.' || rtl::isAsciiDigit(cChar))
886 && (m_nBufferIndex < m_aBufferString.getLength() - 1));
888 m_aCurToken.aText
889 = m_aBufferString.copy(nTxtStart, m_nBufferIndex - nTxtStart);
890 aRes.EndPos = m_nBufferIndex;
892 else
893 bHandled = false;
895 break;
896 case '/':
898 m_aCurToken.eType = TDIVIDEBY;
899 m_aCurToken.setChar(MS_SLASH);
900 m_aCurToken.nGroup = TG::Product;
901 m_aCurToken.nLevel = 0;
902 m_aCurToken.aText = "/";
904 break;
905 case '=':
907 m_aCurToken.eType = TASSIGN;
908 m_aCurToken.setChar(MS_ASSIGN);
909 m_aCurToken.nGroup = TG::Relation;
910 m_aCurToken.nLevel = 0;
911 m_aCurToken.aText = "=";
913 break;
914 default:
915 bHandled = false;
919 else
920 bHandled = false;
922 if (!bHandled)
924 m_aCurToken.eType = TCHARACTER;
925 m_aCurToken.cMathChar = u"";
926 m_aCurToken.nGroup = TG::NONE;
927 m_aCurToken.nLevel = 5;
929 // tdf#129372: we may have to deal with surrogate pairs
930 // (see https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates)
931 // in this case, we must read 2 sal_Unicode instead of 1
932 int nOffset(rtl::isSurrogate(m_aBufferString[nRealStart]) ? 2 : 1);
933 m_aCurToken.aText = m_aBufferString.copy(nRealStart, nOffset);
935 aRes.EndPos = nRealStart + nOffset;
937 m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength());
939 if (TEND != m_aCurToken.eType)
940 m_nBufferIndex = aRes.EndPos;
943 void SmParser5::NextTokenColor(SmTokenType dvipload)
945 sal_Int32 nBufLen = m_aBufferString.getLength();
946 ParseResult aRes;
947 sal_Int32 nRealStart;
948 bool bCont;
952 // skip white spaces
953 while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex))
954 ++m_nBufferIndex;
955 //parse, there are few options, so less strict.
956 aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, "",
957 coContFlags, "");
958 nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
959 m_nBufferIndex = nRealStart;
960 bCont = false;
961 if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart])
963 // keep data needed for tokens row and col entry up to date
964 ++m_nRow;
965 m_nBufferIndex = m_nColOff = nRealStart + 1;
966 bCont = true;
968 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
970 if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
972 //SkipComment
973 m_nBufferIndex = nRealStart + 2;
974 while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex])
975 ++m_nBufferIndex;
976 bCont = true;
979 } while (bCont);
981 // set index of current token
982 m_nTokenIndex = m_nBufferIndex;
983 sal_uInt32 nCol = nRealStart - m_nColOff;
985 if (nRealStart >= nBufLen)
986 m_aCurToken.eType = TEND;
987 else if (aRes.TokenType & KParseType::IDENTNAME)
989 sal_Int32 n = aRes.EndPos - nRealStart;
990 assert(n >= 0);
991 OUString aName(m_aBufferString.copy(nRealStart, n));
992 switch (dvipload)
994 case TCOLOR:
995 m_aCurToken = starmathdatabase::Identify_ColorName_Parser(aName);
996 break;
997 case TDVIPSNAMESCOL:
998 m_aCurToken = starmathdatabase::Identify_ColorName_DVIPSNAMES(aName);
999 break;
1000 default:
1001 m_aCurToken = starmathdatabase::Identify_ColorName_Parser(aName);
1002 break;
1005 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
1007 if (m_aBufferString[nRealStart] == '#' && !m_aBufferString.match("##", nRealStart))
1009 m_aCurToken.eType = THEX;
1010 m_aCurToken.cMathChar = u"";
1011 m_aCurToken.nGroup = TG::Color;
1012 m_aCurToken.nLevel = 0;
1013 m_aCurToken.aText = "hex";
1016 else
1017 m_aCurToken.eType = TNONE;
1019 m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength());
1020 if (TEND != m_aCurToken.eType)
1021 m_nBufferIndex = aRes.EndPos;
1024 void SmParser5::NextTokenFontSize()
1026 sal_Int32 nBufLen = m_aBufferString.getLength();
1027 ParseResult aRes;
1028 sal_Int32 nRealStart;
1029 bool bCont;
1030 bool hex = false;
1034 // skip white spaces
1035 while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex))
1036 ++m_nBufferIndex;
1037 //hexadecimal parser
1038 aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coNum16StartFlags, ".",
1039 coNum16ContFlags, ".,");
1040 if (aRes.TokenType == 0)
1042 // Try again with the default token parsing.
1043 aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, "",
1044 coContFlags, "");
1046 else
1047 hex = true;
1048 nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
1049 m_nBufferIndex = nRealStart;
1050 bCont = false;
1051 if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart])
1053 // keep data needed for tokens row and col entry up to date
1054 ++m_nRow;
1055 m_nBufferIndex = m_nColOff = nRealStart + 1;
1056 bCont = true;
1058 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
1060 if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
1062 //SkipComment
1063 m_nBufferIndex = nRealStart + 2;
1064 while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex])
1065 ++m_nBufferIndex;
1066 bCont = true;
1069 } while (bCont);
1071 // set index of current token
1072 m_nTokenIndex = m_nBufferIndex;
1073 sal_uInt32 nCol = nRealStart - m_nColOff;
1075 if (nRealStart >= nBufLen)
1076 m_aCurToken.eType = TEND;
1077 else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
1079 if (aRes.EndPos - nRealStart == 1)
1081 switch (m_aBufferString[nRealStart])
1083 case '*':
1084 m_aCurToken.eType = TMULTIPLY;
1085 m_aCurToken.setChar(MS_MULTIPLY);
1086 m_aCurToken.nGroup = TG::Product;
1087 m_aCurToken.nLevel = 0;
1088 m_aCurToken.aText = "*";
1089 break;
1090 case '+':
1091 m_aCurToken.eType = TPLUS;
1092 m_aCurToken.setChar(MS_PLUS);
1093 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
1094 m_aCurToken.nLevel = 5;
1095 m_aCurToken.aText = "+";
1096 break;
1097 case '-':
1098 m_aCurToken.eType = TMINUS;
1099 m_aCurToken.setChar(MS_MINUS);
1100 m_aCurToken.nGroup = TG::UnOper | TG::Sum;
1101 m_aCurToken.nLevel = 5;
1102 m_aCurToken.aText = "-";
1103 break;
1104 case '/':
1105 m_aCurToken.eType = TDIVIDEBY;
1106 m_aCurToken.setChar(MS_SLASH);
1107 m_aCurToken.nGroup = TG::Product;
1108 m_aCurToken.nLevel = 0;
1109 m_aCurToken.aText = "/";
1110 break;
1111 default:
1112 m_aCurToken.eType = TNONE;
1113 break;
1116 else
1117 m_aCurToken.eType = TNONE;
1119 else if (hex)
1121 assert(aRes.EndPos > 0);
1122 sal_Int32 n = aRes.EndPos - nRealStart;
1123 assert(n >= 0);
1124 m_aCurToken.eType = THEX;
1125 m_aCurToken.cMathChar = u"";
1126 m_aCurToken.nGroup = TG::NONE;
1127 m_aCurToken.nLevel = 5;
1128 m_aCurToken.aText = m_aBufferString.copy(nRealStart, n);
1130 else
1131 m_aCurToken.eType = TNONE;
1133 m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength());
1134 if (TEND != m_aCurToken.eType)
1135 m_nBufferIndex = aRes.EndPos;
1138 namespace
1140 SmNodeArray buildNodeArray(std::vector<std::unique_ptr<SmNode>>& rSubNodes)
1142 SmNodeArray aSubArray(rSubNodes.size());
1143 for (size_t i = 0; i < rSubNodes.size(); ++i)
1144 aSubArray[i] = rSubNodes[i].release();
1145 return aSubArray;
1147 } //end namespace
1149 // grammar
1150 /*************************************************************************************************/
1152 std::unique_ptr<SmTableNode> SmParser5::DoTable()
1154 DepthProtect aDepthGuard(m_nParseDepth);
1156 std::vector<std::unique_ptr<SmNode>> aLineArray;
1157 aLineArray.push_back(DoLine());
1158 while (m_aCurToken.eType == TNEWLINE)
1160 NextToken();
1161 aLineArray.push_back(DoLine());
1163 assert(m_aCurToken.eType == TEND);
1164 std::unique_ptr<SmTableNode> xSNode(new SmTableNode(m_aCurToken));
1165 xSNode->SetSelection(m_aCurESelection);
1166 xSNode->SetSubNodes(buildNodeArray(aLineArray));
1167 return xSNode;
1170 std::unique_ptr<SmNode> SmParser5::DoAlign(bool bUseExtraSpaces)
1171 // parse alignment info (if any), then go on with rest of expression
1173 DepthProtect aDepthGuard(m_nParseDepth);
1175 std::unique_ptr<SmStructureNode> xSNode;
1177 if (TokenInGroup(TG::Align))
1179 xSNode.reset(new SmAlignNode(m_aCurToken));
1180 xSNode->SetSelection(m_aCurESelection);
1182 NextToken();
1184 // allow for just one align statement in 5.0
1185 if (TokenInGroup(TG::Align))
1186 return DoError(SmParseError::DoubleAlign);
1189 auto pNode = DoExpression(bUseExtraSpaces);
1191 if (xSNode)
1193 xSNode->SetSubNode(0, pNode.release());
1194 return xSNode;
1196 return pNode;
1199 // Postcondition: m_aCurToken.eType == TEND || m_aCurToken.eType == TNEWLINE
1200 std::unique_ptr<SmNode> SmParser5::DoLine()
1202 DepthProtect aDepthGuard(m_nParseDepth);
1204 std::vector<std::unique_ptr<SmNode>> ExpressionArray;
1206 // start with single expression that may have an alignment statement
1207 // (and go on with expressions that must not have alignment
1208 // statements in 'while' loop below. See also 'Expression()'.)
1209 if (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
1210 ExpressionArray.push_back(DoAlign());
1212 while (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
1213 ExpressionArray.push_back(DoExpression());
1215 //If there's no expression, add an empty one.
1216 //this is to avoid a formula tree without any caret
1217 //positions, in visual formula editor.
1218 if (ExpressionArray.empty())
1220 SmToken aTok;
1221 aTok.eType = TNEWLINE;
1222 ExpressionArray.emplace_back(std::unique_ptr<SmNode>(new SmExpressionNode(aTok)));
1225 auto xSNode = std::make_unique<SmLineNode>(m_aCurToken);
1226 xSNode->SetSelection(m_aCurESelection);
1227 xSNode->SetSubNodes(buildNodeArray(ExpressionArray));
1228 return xSNode;
1231 std::unique_ptr<SmNode> SmParser5::DoExpression(bool bUseExtraSpaces)
1233 DepthProtect aDepthGuard(m_nParseDepth);
1235 std::vector<std::unique_ptr<SmNode>> RelationArray;
1236 RelationArray.push_back(DoRelation());
1237 while (m_aCurToken.nLevel >= 4)
1238 RelationArray.push_back(DoRelation());
1240 if (RelationArray.size() > 1)
1242 std::unique_ptr<SmExpressionNode> xSNode(new SmExpressionNode(m_aCurToken));
1243 xSNode->SetSubNodes(buildNodeArray(RelationArray));
1244 xSNode->SetUseExtraSpaces(bUseExtraSpaces);
1245 return xSNode;
1247 else
1249 // This expression has only one node so just push this node.
1250 return std::move(RelationArray[0]);
1254 std::unique_ptr<SmNode> SmParser5::DoRelation()
1256 DepthProtect aDepthGuard(m_nParseDepth);
1258 int nDepthLimit = m_nParseDepth;
1260 auto xFirst = DoSum();
1261 while (TokenInGroup(TG::Relation))
1263 std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1264 xSNode->SetSelection(m_aCurESelection);
1265 auto xSecond = DoOpSubSup();
1266 auto xThird = DoSum();
1267 xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
1268 xFirst = std::move(xSNode);
1270 ++m_nParseDepth;
1271 DepthProtect bDepthGuard(m_nParseDepth);
1274 m_nParseDepth = nDepthLimit;
1276 return xFirst;
1279 std::unique_ptr<SmNode> SmParser5::DoSum()
1281 DepthProtect aDepthGuard(m_nParseDepth);
1283 int nDepthLimit = m_nParseDepth;
1285 auto xFirst = DoProduct();
1286 while (TokenInGroup(TG::Sum))
1288 std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1289 xSNode->SetSelection(m_aCurESelection);
1290 auto xSecond = DoOpSubSup();
1291 auto xThird = DoProduct();
1292 xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
1293 xFirst = std::move(xSNode);
1295 ++m_nParseDepth;
1296 DepthProtect bDepthGuard(m_nParseDepth);
1299 m_nParseDepth = nDepthLimit;
1301 return xFirst;
1304 std::unique_ptr<SmNode> SmParser5::DoProduct()
1306 DepthProtect aDepthGuard(m_nParseDepth);
1308 auto xFirst = DoPower();
1310 int nDepthLimit = 0;
1312 while (TokenInGroup(TG::Product))
1314 //this linear loop builds a recursive structure, if it gets
1315 //too deep then later processing, e.g. releasing the tree,
1316 //can exhaust stack
1317 if (m_nParseDepth + nDepthLimit > DEPTH_LIMIT)
1318 throw std::range_error("parser depth limit");
1320 std::unique_ptr<SmStructureNode> xSNode;
1321 std::unique_ptr<SmNode> xOper;
1323 SmTokenType eType = m_aCurToken.eType;
1324 switch (eType)
1326 case TOVER:
1327 xSNode.reset(new SmBinVerNode(m_aCurToken));
1328 xSNode->SetSelection(m_aCurESelection);
1329 xOper.reset(new SmRectangleNode(m_aCurToken));
1330 xOper->SetSelection(m_aCurESelection);
1331 NextToken();
1332 break;
1334 case TBOPER:
1335 xSNode.reset(new SmBinHorNode(m_aCurToken));
1337 NextToken();
1339 //Let the glyph node know it's a binary operation
1340 m_aCurToken.eType = TBOPER;
1341 m_aCurToken.nGroup = TG::Product;
1342 xOper = DoGlyphSpecial();
1343 break;
1345 case TOVERBRACE:
1346 case TUNDERBRACE:
1347 xSNode.reset(new SmVerticalBraceNode(m_aCurToken));
1348 xSNode->SetSelection(m_aCurESelection);
1349 xOper.reset(new SmMathSymbolNode(m_aCurToken));
1350 xOper->SetSelection(m_aCurESelection);
1352 NextToken();
1353 break;
1355 case TWIDEBACKSLASH:
1356 case TWIDESLASH:
1358 SmBinDiagonalNode* pSTmp = new SmBinDiagonalNode(m_aCurToken);
1359 pSTmp->SetAscending(eType == TWIDESLASH);
1360 xSNode.reset(pSTmp);
1362 xOper.reset(new SmPolyLineNode(m_aCurToken));
1363 xOper->SetSelection(m_aCurESelection);
1364 NextToken();
1366 break;
1369 default:
1370 xSNode.reset(new SmBinHorNode(m_aCurToken));
1371 xSNode->SetSelection(m_aCurESelection);
1373 xOper = DoOpSubSup();
1376 auto xArg = DoPower();
1377 xSNode->SetSubNodesBinMo(std::move(xFirst), std::move(xOper), std::move(xArg));
1378 xFirst = std::move(xSNode);
1379 ++nDepthLimit;
1381 return xFirst;
1384 std::unique_ptr<SmNode> SmParser5::DoSubSup(TG nActiveGroup, std::unique_ptr<SmNode> xGivenNode)
1386 DepthProtect aDepthGuard(m_nParseDepth);
1388 assert(nActiveGroup == TG::Power || nActiveGroup == TG::Limit);
1389 assert(m_aCurToken.nGroup == nActiveGroup);
1391 std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
1392 pNode->SetSelection(m_aCurESelection);
1393 //! Of course 'm_aCurToken' is just the first sub-/supscript token.
1394 //! It should be of no further interest. The positions of the
1395 //! sub-/supscripts will be identified by the corresponding subnodes
1396 //! index in the 'aSubNodes' array (enum value from 'SmSubSup').
1398 pNode->SetUseLimits(nActiveGroup == TG::Limit);
1400 // initialize subnodes array
1401 std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
1402 aSubNodes[0] = std::move(xGivenNode);
1404 // process all sub-/supscripts
1405 int nIndex = 0;
1406 while (TokenInGroup(nActiveGroup))
1408 SmTokenType eType(m_aCurToken.eType);
1410 switch (eType)
1412 case TRSUB:
1413 nIndex = static_cast<int>(RSUB);
1414 break;
1415 case TRSUP:
1416 nIndex = static_cast<int>(RSUP);
1417 break;
1418 case TFROM:
1419 case TCSUB:
1420 nIndex = static_cast<int>(CSUB);
1421 break;
1422 case TTO:
1423 case TCSUP:
1424 nIndex = static_cast<int>(CSUP);
1425 break;
1426 case TLSUB:
1427 nIndex = static_cast<int>(LSUB);
1428 break;
1429 case TLSUP:
1430 nIndex = static_cast<int>(LSUP);
1431 break;
1432 default:
1433 SAL_WARN("starmath", "unknown case");
1435 nIndex++;
1436 assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES);
1438 std::unique_ptr<SmNode> xENode;
1439 if (aSubNodes[nIndex]) // if already occupied at earlier iteration
1441 // forget the earlier one, remember an error instead
1442 aSubNodes[nIndex].reset();
1443 xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
1445 else
1447 // skip sub-/supscript token
1448 NextToken();
1451 // get sub-/supscript node
1452 // (even when we saw a double-sub/supscript error in the above
1453 // in order to minimize mess and continue parsing.)
1454 std::unique_ptr<SmNode> xSNode;
1455 if (eType == TFROM || eType == TTO)
1457 // parse limits in old 4.0 and 5.0 style
1458 xSNode = DoRelation();
1460 else
1461 xSNode = DoTerm(true);
1463 aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
1466 pNode->SetSubNodes(buildNodeArray(aSubNodes));
1467 return pNode;
1470 std::unique_ptr<SmNode> SmParser5::DoSubSupEvaluate(std::unique_ptr<SmNode> xGivenNode)
1472 DepthProtect aDepthGuard(m_nParseDepth);
1474 std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
1475 pNode->SetSelection(m_aCurESelection);
1476 pNode->SetUseLimits(true);
1478 // initialize subnodes array
1479 std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
1480 aSubNodes[0] = std::move(xGivenNode);
1482 // process all sub-/supscripts
1483 int nIndex = 0;
1484 while (TokenInGroup(TG::Limit))
1486 SmTokenType eType(m_aCurToken.eType);
1488 switch (eType)
1490 case TFROM:
1491 nIndex = static_cast<int>(RSUB);
1492 break;
1493 case TTO:
1494 nIndex = static_cast<int>(RSUP);
1495 break;
1496 default:
1497 SAL_WARN("starmath", "unknown case");
1499 nIndex++;
1500 assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES);
1502 std::unique_ptr<SmNode> xENode;
1503 if (aSubNodes[nIndex]) // if already occupied at earlier iteration
1505 // forget the earlier one, remember an error instead
1506 aSubNodes[nIndex].reset();
1507 xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
1509 else
1510 NextToken(); // skip sub-/supscript token
1512 // get sub-/supscript node
1513 std::unique_ptr<SmNode> xSNode;
1514 xSNode = DoTerm(true);
1516 aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
1519 pNode->SetSubNodes(buildNodeArray(aSubNodes));
1520 return pNode;
1523 std::unique_ptr<SmNode> SmParser5::DoOpSubSup()
1525 DepthProtect aDepthGuard(m_nParseDepth);
1527 // get operator symbol
1528 auto xNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1529 xNode->SetSelection(m_aCurESelection);
1530 // skip operator token
1531 NextToken();
1532 // get sub- supscripts if any
1533 if (m_aCurToken.nGroup == TG::Power)
1534 return DoSubSup(TG::Power, std::move(xNode));
1535 return xNode;
1538 std::unique_ptr<SmNode> SmParser5::DoPower()
1540 DepthProtect aDepthGuard(m_nParseDepth);
1542 // get body for sub- supscripts on top of stack
1543 std::unique_ptr<SmNode> xNode(DoTerm(false));
1545 if (m_aCurToken.nGroup == TG::Power)
1546 return DoSubSup(TG::Power, std::move(xNode));
1547 return xNode;
1550 std::unique_ptr<SmBlankNode> SmParser5::DoBlank()
1552 DepthProtect aDepthGuard(m_nParseDepth);
1554 assert(TokenInGroup(TG::Blank));
1555 std::unique_ptr<SmBlankNode> pBlankNode(new SmBlankNode(m_aCurToken));
1556 pBlankNode->SetSelection(m_aCurESelection);
1560 pBlankNode->IncreaseBy(m_aCurToken);
1561 NextToken();
1562 } while (TokenInGroup(TG::Blank));
1564 // Ignore trailing spaces, if corresponding option is set
1565 if (m_aCurToken.eType == TNEWLINE
1566 || (m_aCurToken.eType == TEND && !utl::ConfigManager::IsFuzzing()
1567 && SM_MOD()->GetConfig()->IsIgnoreSpacesRight()))
1569 pBlankNode->Clear();
1571 return pBlankNode;
1574 std::unique_ptr<SmNode> SmParser5::DoTerm(bool bGroupNumberIdent)
1576 DepthProtect aDepthGuard(m_nParseDepth);
1578 switch (m_aCurToken.eType)
1580 case TESCAPE:
1581 return DoEscape();
1583 case TNOSPACE:
1584 case TLGROUP:
1586 bool bNoSpace = m_aCurToken.eType == TNOSPACE;
1587 if (bNoSpace)
1588 NextToken();
1589 if (m_aCurToken.eType != TLGROUP)
1590 return DoTerm(false); // nospace is no longer concerned
1592 NextToken();
1594 // allow for empty group
1595 if (m_aCurToken.eType == TRGROUP)
1597 std::unique_ptr<SmStructureNode> xSNode(new SmExpressionNode(m_aCurToken));
1598 xSNode->SetSelection(m_aCurESelection);
1599 xSNode->SetSubNodes(nullptr, nullptr);
1601 NextToken();
1602 return std::unique_ptr<SmNode>(xSNode.release());
1605 auto pNode = DoAlign(!bNoSpace);
1606 if (m_aCurToken.eType == TRGROUP)
1608 NextToken();
1609 return pNode;
1611 auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
1612 xSNode->SetSelection(m_aCurESelection);
1613 std::unique_ptr<SmNode> xError(DoError(SmParseError::RgroupExpected));
1614 xSNode->SetSubNodes(std::move(pNode), std::move(xError));
1615 return std::unique_ptr<SmNode>(xSNode.release());
1618 case TLEFT:
1619 return DoBrace();
1620 case TEVALUATE:
1621 return DoEvaluate();
1623 case TBLANK:
1624 case TSBLANK:
1625 return DoBlank();
1627 case TTEXT:
1629 auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_TEXT);
1630 pNode->SetSelection(m_aCurESelection);
1631 NextToken();
1632 return std::unique_ptr<SmNode>(pNode.release());
1634 case TCHARACTER:
1636 auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_VARIABLE);
1637 pNode->SetSelection(m_aCurESelection);
1638 NextToken();
1639 return std::unique_ptr<SmNode>(pNode.release());
1641 case TIDENT:
1642 case TNUMBER:
1644 auto pTextNode = std::make_unique<SmTextNode>(
1645 m_aCurToken, m_aCurToken.eType == TNUMBER ? FNT_NUMBER : FNT_VARIABLE);
1646 pTextNode->SetSelection(m_aCurESelection);
1647 if (!bGroupNumberIdent)
1649 NextToken();
1650 return std::unique_ptr<SmNode>(pTextNode.release());
1652 std::vector<std::unique_ptr<SmNode>> aNodes;
1653 // Some people want to be able to write "x_2n" for "x_{2n}"
1654 // although e.g. LaTeX or AsciiMath interpret that as "x_2 n".
1655 // The tokenizer skips whitespaces so we need some additional
1656 // work to distinguish from "x_2 n".
1657 // See https://bz.apache.org/ooo/show_bug.cgi?id=11752 and
1658 // https://bugs.libreoffice.org/show_bug.cgi?id=55853
1659 sal_Int32 nBufLen = m_aBufferString.getLength();
1661 // We need to be careful to call NextToken() only after having
1662 // tested for a whitespace separator (otherwise it will be
1663 // skipped!)
1664 bool moveToNextToken = true;
1665 while (m_nBufferIndex < nBufLen
1666 && m_pSysCC->getType(m_aBufferString, m_nBufferIndex)
1667 != UnicodeType::SPACE_SEPARATOR)
1669 NextToken();
1670 if (m_aCurToken.eType != TNUMBER && m_aCurToken.eType != TIDENT)
1672 // Neither a number nor an identifier. We just moved to
1673 // the next token, so no need to do that again.
1674 moveToNextToken = false;
1675 break;
1677 aNodes.emplace_back(std::unique_ptr<SmNode>(new SmTextNode(
1678 m_aCurToken, m_aCurToken.eType == TNUMBER ? FNT_NUMBER : FNT_VARIABLE)));
1680 if (moveToNextToken)
1681 NextToken();
1682 if (aNodes.empty())
1683 return std::unique_ptr<SmNode>(pTextNode.release());
1684 // We have several concatenated identifiers and numbers.
1685 // Let's group them into one SmExpressionNode.
1686 aNodes.insert(aNodes.begin(), std::move(pTextNode));
1687 std::unique_ptr<SmExpressionNode> xNode(new SmExpressionNode(SmToken()));
1688 xNode->SetSubNodes(buildNodeArray(aNodes));
1689 return std::unique_ptr<SmNode>(xNode.release());
1691 case TLEFTARROW:
1692 case TRIGHTARROW:
1693 case TUPARROW:
1694 case TDOWNARROW:
1695 case TCIRC:
1696 case TDRARROW:
1697 case TDLARROW:
1698 case TDLRARROW:
1699 case TEXISTS:
1700 case TNOTEXISTS:
1701 case TFORALL:
1702 case TPARTIAL:
1703 case TNABLA:
1704 case TLAPLACE:
1705 case TFOURIER:
1706 case TTOWARD:
1707 case TDOTSAXIS:
1708 case TDOTSDIAG:
1709 case TDOTSDOWN:
1710 case TDOTSLOW:
1711 case TDOTSUP:
1712 case TDOTSVERT:
1714 auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1715 pNode->SetSelection(m_aCurESelection);
1716 NextToken();
1717 return std::unique_ptr<SmNode>(pNode.release());
1720 case TSETN:
1721 case TSETZ:
1722 case TSETQ:
1723 case TSETR:
1724 case TSETC:
1725 case THBAR:
1726 case TLAMBDABAR:
1727 case TBACKEPSILON:
1728 case TALEPH:
1729 case TIM:
1730 case TRE:
1731 case TWP:
1732 case TEMPTYSET:
1733 case TINFINITY:
1735 auto pNode = std::make_unique<SmMathIdentifierNode>(m_aCurToken);
1736 pNode->SetSelection(m_aCurESelection);
1737 NextToken();
1738 return std::unique_ptr<SmNode>(pNode.release());
1741 case TPLACE:
1743 auto pNode = std::make_unique<SmPlaceNode>(m_aCurToken);
1744 pNode->SetSelection(m_aCurESelection);
1745 NextToken();
1746 return std::unique_ptr<SmNode>(pNode.release());
1749 case TSPECIAL:
1750 return DoSpecial();
1752 case TBINOM:
1753 return DoBinom();
1755 case TFRAC:
1756 return DoFrac();
1758 case TSTACK:
1759 return DoStack();
1761 case TMATRIX:
1762 return DoMatrix();
1764 case THEX:
1765 NextTokenFontSize();
1766 if (m_aCurToken.eType == THEX)
1768 auto pTextNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_NUMBER);
1769 pTextNode->SetSelection(m_aCurESelection);
1770 NextToken();
1771 return pTextNode;
1773 else
1774 return DoError(SmParseError::NumberExpected);
1775 default:
1776 if (TokenInGroup(TG::LBrace))
1777 return DoBrace();
1778 if (TokenInGroup(TG::Oper))
1779 return DoOperator();
1780 if (TokenInGroup(TG::UnOper))
1781 return DoUnOper();
1782 if (TokenInGroup(TG::Attribute) || TokenInGroup(TG::FontAttr))
1784 std::stack<std::unique_ptr<SmStructureNode>,
1785 std::vector<std::unique_ptr<SmStructureNode>>>
1786 aStack;
1787 bool bIsAttr;
1788 for (;;)
1790 bIsAttr = TokenInGroup(TG::Attribute);
1791 if (!bIsAttr && !TokenInGroup(TG::FontAttr))
1792 break;
1793 aStack.push(bIsAttr ? DoAttribute() : DoFontAttribute());
1796 auto xFirstNode = DoPower();
1797 while (!aStack.empty())
1799 std::unique_ptr<SmStructureNode> xNode = std::move(aStack.top());
1800 aStack.pop();
1801 xNode->SetSubNodes(nullptr, std::move(xFirstNode));
1802 xFirstNode = std::move(xNode);
1804 return xFirstNode;
1806 if (TokenInGroup(TG::Function))
1807 return DoFunction();
1808 return DoError(SmParseError::UnexpectedChar);
1812 std::unique_ptr<SmNode> SmParser5::DoEscape()
1814 DepthProtect aDepthGuard(m_nParseDepth);
1816 NextToken();
1818 switch (m_aCurToken.eType)
1820 case TLPARENT:
1821 case TRPARENT:
1822 case TLBRACKET:
1823 case TRBRACKET:
1824 case TLDBRACKET:
1825 case TRDBRACKET:
1826 case TLBRACE:
1827 case TLGROUP:
1828 case TRBRACE:
1829 case TRGROUP:
1830 case TLANGLE:
1831 case TRANGLE:
1832 case TLCEIL:
1833 case TRCEIL:
1834 case TLFLOOR:
1835 case TRFLOOR:
1836 case TLLINE:
1837 case TRLINE:
1838 case TLDLINE:
1839 case TRDLINE:
1841 auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1842 pNode->SetSelection(m_aCurESelection);
1843 NextToken();
1844 return std::unique_ptr<SmNode>(pNode.release());
1846 default:
1847 return DoError(SmParseError::UnexpectedToken);
1851 std::unique_ptr<SmOperNode> SmParser5::DoOperator()
1853 DepthProtect aDepthGuard(m_nParseDepth);
1855 assert(TokenInGroup(TG::Oper));
1857 auto xSNode = std::make_unique<SmOperNode>(m_aCurToken);
1858 xSNode->SetSelection(m_aCurESelection);
1860 // get operator
1861 auto xOperator = DoOper();
1863 if (m_aCurToken.nGroup == TG::Limit || m_aCurToken.nGroup == TG::Power)
1864 xOperator = DoSubSup(m_aCurToken.nGroup, std::move(xOperator));
1866 // get argument
1867 auto xArg = DoPower();
1869 xSNode->SetSubNodes(std::move(xOperator), std::move(xArg));
1870 return xSNode;
1873 std::unique_ptr<SmNode> SmParser5::DoOper()
1875 DepthProtect aDepthGuard(m_nParseDepth);
1877 SmTokenType eType(m_aCurToken.eType);
1878 std::unique_ptr<SmNode> pNode;
1880 switch (eType)
1882 case TSUM:
1883 case TPROD:
1884 case TCOPROD:
1885 case TINT:
1886 case TINTD:
1887 case TIINT:
1888 case TIIINT:
1889 case TLINT:
1890 case TLLINT:
1891 case TLLLINT:
1892 pNode.reset(new SmMathSymbolNode(m_aCurToken));
1893 pNode->SetSelection(m_aCurESelection);
1894 break;
1896 case TLIM:
1897 case TLIMSUP:
1898 case TLIMINF:
1899 m_aCurToken.aText
1900 = eType == TLIMSUP ? u"lim sup" : eType == TLIMINF ? u"lim inf" : u"lim";
1901 pNode.reset(new SmTextNode(m_aCurToken, FNT_TEXT));
1902 pNode->SetSelection(m_aCurESelection);
1903 break;
1905 case TOPER:
1906 NextToken();
1907 OSL_ENSURE(m_aCurToken.eType == TSPECIAL, "Sm: wrong token");
1908 m_aCurToken.eType = TOPER;
1909 pNode.reset(new SmGlyphSpecialNode(m_aCurToken));
1910 pNode->SetSelection(m_aCurESelection);
1911 break;
1913 default:
1914 assert(false && "unknown case");
1917 NextToken();
1918 return pNode;
1921 std::unique_ptr<SmStructureNode> SmParser5::DoUnOper()
1923 DepthProtect aDepthGuard(m_nParseDepth);
1925 assert(TokenInGroup(TG::UnOper));
1927 SmToken aNodeToken = m_aCurToken;
1928 ESelection aESelection = m_aCurESelection;
1929 SmTokenType eType = m_aCurToken.eType;
1930 bool bIsPostfix = eType == TFACT;
1932 std::unique_ptr<SmStructureNode> xSNode;
1933 std::unique_ptr<SmNode> xOper;
1934 std::unique_ptr<SmNode> xExtra;
1935 std::unique_ptr<SmNode> xArg;
1937 switch (eType)
1939 case TABS:
1940 case TSQRT:
1941 NextToken();
1942 break;
1944 case TNROOT:
1945 NextToken();
1946 xExtra = DoPower();
1947 break;
1949 case TUOPER:
1950 NextToken();
1951 //Let the glyph know what it is...
1952 m_aCurToken.eType = TUOPER;
1953 m_aCurToken.nGroup = TG::UnOper;
1954 xOper = DoGlyphSpecial();
1955 break;
1957 case TPLUS:
1958 case TMINUS:
1959 case TPLUSMINUS:
1960 case TMINUSPLUS:
1961 case TNEG:
1962 case TFACT:
1963 xOper = DoOpSubSup();
1964 break;
1966 default:
1967 assert(false);
1970 // get argument
1971 xArg = DoPower();
1973 if (eType == TABS)
1975 xSNode.reset(new SmBraceNode(aNodeToken));
1976 xSNode->SetSelection(aESelection);
1977 xSNode->SetScaleMode(SmScaleMode::Height);
1979 // build nodes for left & right lines
1980 // (text, group, level of the used token are of no interest here)
1981 // we'll use row & column of the keyword for abs
1982 aNodeToken.eType = TABS;
1984 aNodeToken.setChar(MS_VERTLINE);
1985 std::unique_ptr<SmNode> xLeft(new SmMathSymbolNode(aNodeToken));
1986 xLeft->SetSelection(aESelection);
1987 std::unique_ptr<SmNode> xRight(new SmMathSymbolNode(aNodeToken));
1988 xRight->SetSelection(aESelection);
1990 xSNode->SetSubNodes(std::move(xLeft), std::move(xArg), std::move(xRight));
1992 else if (eType == TSQRT || eType == TNROOT)
1994 xSNode.reset(new SmRootNode(aNodeToken));
1995 xSNode->SetSelection(aESelection);
1996 xOper.reset(new SmRootSymbolNode(aNodeToken));
1997 xOper->SetSelection(aESelection);
1998 xSNode->SetSubNodes(std::move(xExtra), std::move(xOper), std::move(xArg));
2000 else
2002 xSNode.reset(new SmUnHorNode(aNodeToken));
2003 xSNode->SetSelection(aESelection);
2004 if (bIsPostfix)
2005 xSNode->SetSubNodes(std::move(xArg), std::move(xOper));
2006 else
2008 // prefix operator
2009 xSNode->SetSubNodes(std::move(xOper), std::move(xArg));
2012 return xSNode;
2015 std::unique_ptr<SmStructureNode> SmParser5::DoAttribute()
2017 DepthProtect aDepthGuard(m_nParseDepth);
2019 assert(TokenInGroup(TG::Attribute));
2021 auto xSNode = std::make_unique<SmAttributeNode>(m_aCurToken);
2022 xSNode->SetSelection(m_aCurESelection);
2023 std::unique_ptr<SmNode> xAttr;
2024 SmScaleMode eScaleMode = SmScaleMode::None;
2026 // get appropriate node for the attribute itself
2027 switch (m_aCurToken.eType)
2029 case TUNDERLINE:
2030 case TOVERLINE:
2031 case TOVERSTRIKE:
2032 xAttr.reset(new SmRectangleNode(m_aCurToken));
2033 xAttr->SetSelection(m_aCurESelection);
2034 eScaleMode = SmScaleMode::Width;
2035 break;
2037 case TWIDEVEC:
2038 case TWIDEHARPOON:
2039 case TWIDEHAT:
2040 case TWIDETILDE:
2041 xAttr.reset(new SmMathSymbolNode(m_aCurToken));
2042 xAttr->SetSelection(m_aCurESelection);
2043 eScaleMode = SmScaleMode::Width;
2044 break;
2046 default:
2047 xAttr.reset(new SmMathSymbolNode(m_aCurToken));
2048 xAttr->SetSelection(m_aCurESelection);
2051 NextToken();
2053 xSNode->SetSubNodes(std::move(xAttr), nullptr); // the body will be filled later
2054 xSNode->SetScaleMode(eScaleMode);
2055 return xSNode;
2058 std::unique_ptr<SmStructureNode> SmParser5::DoFontAttribute()
2060 DepthProtect aDepthGuard(m_nParseDepth);
2062 assert(TokenInGroup(TG::FontAttr));
2064 switch (m_aCurToken.eType)
2066 case TITALIC:
2067 case TNITALIC:
2068 case TBOLD:
2069 case TNBOLD:
2070 case TPHANTOM:
2072 auto pNode = std::make_unique<SmFontNode>(m_aCurToken);
2073 pNode->SetSelection(m_aCurESelection);
2074 NextToken();
2075 return pNode;
2078 case TSIZE:
2079 return DoFontSize();
2081 case TFONT:
2082 return DoFont();
2084 case TCOLOR:
2085 return DoColor();
2087 default:
2088 assert(false);
2089 return {};
2093 std::unique_ptr<SmStructureNode> SmParser5::DoColor()
2095 DepthProtect aDepthGuard(m_nParseDepth);
2097 assert(m_aCurToken.eType == TCOLOR);
2098 sal_Int32 nBufferIndex = m_nBufferIndex;
2099 NextTokenColor(TCOLOR);
2100 SmToken aToken;
2101 ESelection aESelection;
2103 if (m_aCurToken.eType == TDVIPSNAMESCOL)
2104 NextTokenColor(TDVIPSNAMESCOL);
2105 if (m_aCurToken.eType == TERROR)
2106 return DoError(SmParseError::ColorExpected);
2107 if (TokenInGroup(TG::Color))
2109 aToken = m_aCurToken;
2110 aESelection = m_aCurESelection;
2111 if (m_aCurToken.eType == TRGB) //loads r, g and b
2113 sal_uInt32 nr, ng, nb, nc;
2114 NextTokenFontSize();
2115 if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2116 return DoError(SmParseError::ColorExpected);
2117 nr = m_aCurToken.aText.toUInt32();
2118 if (nr > 255)
2119 return DoError(SmParseError::ColorExpected);
2120 NextTokenFontSize();
2121 if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2122 return DoError(SmParseError::ColorExpected);
2123 ng = m_aCurToken.aText.toUInt32();
2124 if (ng > 255)
2125 return DoError(SmParseError::ColorExpected);
2126 NextTokenFontSize();
2127 if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2128 return DoError(SmParseError::ColorExpected);
2129 nb = m_aCurToken.aText.toUInt32();
2130 if (nb > 255)
2131 return DoError(SmParseError::ColorExpected);
2132 nc = nb | ng << 8 | nr << 16 | sal_uInt32(0) << 24;
2133 aToken.cMathChar = OUString::number(nc, 16);
2135 else if (m_aCurToken.eType == TRGBA) //loads r, g and b
2137 sal_uInt32 nr, na, ng, nb, nc;
2138 NextTokenFontSize();
2139 if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2140 return DoError(SmParseError::ColorExpected);
2141 nr = m_aCurToken.aText.toUInt32();
2142 if (nr > 255)
2143 return DoError(SmParseError::ColorExpected);
2144 NextTokenFontSize();
2145 if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2146 return DoError(SmParseError::ColorExpected);
2147 ng = m_aCurToken.aText.toUInt32();
2148 if (ng > 255)
2149 return DoError(SmParseError::ColorExpected);
2150 NextTokenFontSize();
2151 if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2152 return DoError(SmParseError::ColorExpected);
2153 nb = m_aCurToken.aText.toUInt32();
2154 if (nb > 255)
2155 return DoError(SmParseError::ColorExpected);
2156 NextTokenFontSize();
2157 if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2158 return DoError(SmParseError::ColorExpected);
2159 na = m_aCurToken.aText.toUInt32();
2160 if (na > 255)
2161 return DoError(SmParseError::ColorExpected);
2162 nc = nb | ng << 8 | nr << 16 | na << 24;
2163 aToken.cMathChar = OUString::number(nc, 16);
2165 else if (m_aCurToken.eType == THEX) //loads hex code
2167 sal_uInt32 nc;
2168 NextTokenFontSize();
2169 if (lcl_IsNotWholeNumber16(m_aCurToken.aText))
2170 return DoError(SmParseError::ColorExpected);
2171 nc = m_aCurToken.aText.toUInt32(16);
2172 aToken.cMathChar = OUString::number(nc, 16);
2174 aToken.aText = m_aBufferString.subView(nBufferIndex, m_nBufferIndex - nBufferIndex);
2175 NextToken();
2177 else
2178 return DoError(SmParseError::ColorExpected);
2180 std::unique_ptr<SmStructureNode> xNode;
2181 xNode.reset(new SmFontNode(aToken));
2182 xNode->SetSelection(aESelection);
2183 return xNode;
2186 std::unique_ptr<SmStructureNode> SmParser5::DoFont()
2188 DepthProtect aDepthGuard(m_nParseDepth);
2190 assert(m_aCurToken.eType == TFONT);
2192 std::unique_ptr<SmStructureNode> xNode;
2193 // last font rules, get that one
2194 SmToken aToken;
2195 ESelection aESelection = m_aCurESelection;
2198 NextToken();
2200 if (TokenInGroup(TG::Font))
2202 aToken = m_aCurToken;
2203 NextToken();
2205 else
2207 return DoError(SmParseError::FontExpected);
2209 } while (m_aCurToken.eType == TFONT);
2211 xNode.reset(new SmFontNode(aToken));
2212 xNode->SetSelection(aESelection);
2213 return xNode;
2216 std::unique_ptr<SmStructureNode> SmParser5::DoFontSize()
2218 DepthProtect aDepthGuard(m_nParseDepth);
2219 std::unique_ptr<SmFontNode> pFontNode(new SmFontNode(m_aCurToken));
2220 pFontNode->SetSelection(m_aCurESelection);
2221 NextTokenFontSize();
2222 FontSizeType Type;
2224 switch (m_aCurToken.eType)
2226 case THEX:
2227 Type = FontSizeType::ABSOLUT;
2228 break;
2229 case TPLUS:
2230 Type = FontSizeType::PLUS;
2231 break;
2232 case TMINUS:
2233 Type = FontSizeType::MINUS;
2234 break;
2235 case TMULTIPLY:
2236 Type = FontSizeType::MULTIPLY;
2237 break;
2238 case TDIVIDEBY:
2239 Type = FontSizeType::DIVIDE;
2240 break;
2242 default:
2243 return DoError(SmParseError::SizeExpected);
2246 if (Type != FontSizeType::ABSOLUT)
2248 NextTokenFontSize();
2249 if (m_aCurToken.eType != THEX)
2250 return DoError(SmParseError::SizeExpected);
2253 // get number argument
2254 Fraction aValue(1);
2255 if (lcl_IsNumber(m_aCurToken.aText))
2257 aValue = m_aCurToken.aText.toDouble();
2258 //!! Reduce values in order to avoid numerical errors
2259 if (aValue.GetDenominator() > 1000)
2261 tools::Long nNum = aValue.GetNumerator();
2262 tools::Long nDenom = aValue.GetDenominator();
2263 while (nDenom > 1000) //remove big denominator
2265 nNum /= 10;
2266 nDenom /= 10;
2268 aValue = Fraction(nNum, nDenom);
2271 else
2272 return DoError(SmParseError::SizeExpected);
2274 pFontNode->SetSizeParameter(aValue, Type);
2275 NextToken();
2276 return pFontNode;
2279 std::unique_ptr<SmStructureNode> SmParser5::DoBrace()
2281 DepthProtect aDepthGuard(m_nParseDepth);
2283 assert(m_aCurToken.eType == TLEFT || TokenInGroup(TG::LBrace));
2285 std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
2286 xSNode->SetSelection(m_aCurESelection);
2287 std::unique_ptr<SmNode> pBody, pLeft, pRight;
2288 SmScaleMode eScaleMode = SmScaleMode::None;
2289 SmParseError eError = SmParseError::None;
2291 if (m_aCurToken.eType == TLEFT)
2293 NextToken();
2295 eScaleMode = SmScaleMode::Height;
2297 // check for left bracket
2298 if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
2300 pLeft.reset(new SmMathSymbolNode(m_aCurToken));
2301 pLeft->SetSelection(m_aCurESelection);
2303 NextToken();
2304 pBody = DoBracebody(true);
2306 if (m_aCurToken.eType == TRIGHT)
2308 NextToken();
2310 // check for right bracket
2311 if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
2313 pRight.reset(new SmMathSymbolNode(m_aCurToken));
2314 pRight->SetSelection(m_aCurESelection);
2315 NextToken();
2317 else
2318 eError = SmParseError::RbraceExpected;
2320 else
2321 eError = SmParseError::RightExpected;
2323 else
2324 eError = SmParseError::LbraceExpected;
2326 else
2328 assert(TokenInGroup(TG::LBrace));
2330 pLeft.reset(new SmMathSymbolNode(m_aCurToken));
2331 pLeft->SetSelection(m_aCurESelection);
2333 NextToken();
2334 pBody = DoBracebody(false);
2336 SmTokenType eExpectedType = TUNKNOWN;
2337 switch (pLeft->GetToken().eType)
2339 case TLPARENT:
2340 eExpectedType = TRPARENT;
2341 break;
2342 case TLBRACKET:
2343 eExpectedType = TRBRACKET;
2344 break;
2345 case TLBRACE:
2346 eExpectedType = TRBRACE;
2347 break;
2348 case TLDBRACKET:
2349 eExpectedType = TRDBRACKET;
2350 break;
2351 case TLLINE:
2352 eExpectedType = TRLINE;
2353 break;
2354 case TLDLINE:
2355 eExpectedType = TRDLINE;
2356 break;
2357 case TLANGLE:
2358 eExpectedType = TRANGLE;
2359 break;
2360 case TLFLOOR:
2361 eExpectedType = TRFLOOR;
2362 break;
2363 case TLCEIL:
2364 eExpectedType = TRCEIL;
2365 break;
2366 case TLRLINE:
2367 eExpectedType = TLRLINE;
2368 break;
2369 case TLRDLINE:
2370 eExpectedType = TLRDLINE;
2371 break;
2372 default:
2373 SAL_WARN("starmath", "unknown case");
2376 if (m_aCurToken.eType == eExpectedType)
2378 pRight.reset(new SmMathSymbolNode(m_aCurToken));
2379 pRight->SetSelection(m_aCurESelection);
2380 NextToken();
2382 else
2383 eError = SmParseError::ParentMismatch;
2386 if (eError == SmParseError::None)
2388 assert(pLeft);
2389 assert(pRight);
2390 xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight));
2391 xSNode->SetScaleMode(eScaleMode);
2392 return xSNode;
2394 return DoError(eError);
2397 std::unique_ptr<SmBracebodyNode> SmParser5::DoBracebody(bool bIsLeftRight)
2399 DepthProtect aDepthGuard(m_nParseDepth);
2401 auto pBody = std::make_unique<SmBracebodyNode>(m_aCurToken);
2402 pBody->SetSelection(m_aCurESelection);
2404 std::vector<std::unique_ptr<SmNode>> aNodes;
2405 // get body if any
2406 if (bIsLeftRight)
2410 if (m_aCurToken.eType == TMLINE)
2412 SmMathSymbolNode* pTempNode = new SmMathSymbolNode(m_aCurToken);
2413 pTempNode->SetSelection(m_aCurESelection);
2414 aNodes.emplace_back(std::unique_ptr<SmMathSymbolNode>(pTempNode));
2415 NextToken();
2417 else if (m_aCurToken.eType != TRIGHT)
2419 aNodes.push_back(DoAlign());
2420 if (m_aCurToken.eType != TMLINE && m_aCurToken.eType != TRIGHT)
2421 aNodes.emplace_back(DoError(SmParseError::RightExpected));
2423 } while (m_aCurToken.eType != TEND && m_aCurToken.eType != TRIGHT);
2425 else
2429 if (m_aCurToken.eType == TMLINE)
2431 SmMathSymbolNode* pTempNode = new SmMathSymbolNode(m_aCurToken);
2432 pTempNode->SetSelection(m_aCurESelection);
2433 aNodes.emplace_back(std::unique_ptr<SmMathSymbolNode>(pTempNode));
2434 NextToken();
2436 else if (!TokenInGroup(TG::RBrace))
2438 aNodes.push_back(DoAlign());
2439 if (m_aCurToken.eType != TMLINE && !TokenInGroup(TG::RBrace))
2440 aNodes.emplace_back(DoError(SmParseError::RbraceExpected));
2442 } while (m_aCurToken.eType != TEND && !TokenInGroup(TG::RBrace));
2445 pBody->SetSubNodes(buildNodeArray(aNodes));
2446 pBody->SetScaleMode(bIsLeftRight ? SmScaleMode::Height : SmScaleMode::None);
2447 return pBody;
2450 std::unique_ptr<SmNode> SmParser5::DoEvaluate()
2452 DepthProtect aDepthGuard(m_nParseDepth);
2454 // Create node
2455 std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
2456 xSNode->SetSelection(m_aCurESelection);
2457 SmToken aToken(TRLINE, MS_VERTLINE, "evaluate", TG::RBrace, 5);
2459 // Parse body && left none
2460 NextToken();
2461 std::unique_ptr<SmNode> pBody = DoPower();
2462 SmToken bToken(TNONE, '\0', "", TG::LBrace, 5);
2463 std::unique_ptr<SmNode> pLeft;
2464 pLeft.reset(new SmMathSymbolNode(bToken));
2466 // Mount nodes
2467 std::unique_ptr<SmNode> pRight;
2468 pRight.reset(new SmMathSymbolNode(aToken));
2469 xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight));
2470 xSNode->SetScaleMode(SmScaleMode::Height); // scalable line
2472 // Parse from to
2473 if (m_aCurToken.nGroup == TG::Limit)
2475 std::unique_ptr<SmNode> rSNode;
2476 rSNode = DoSubSupEvaluate(std::move(xSNode));
2477 rSNode->GetToken().eType = TEVALUATE;
2478 return rSNode;
2481 return xSNode;
2484 std::unique_ptr<SmTextNode> SmParser5::DoFunction()
2486 DepthProtect aDepthGuard(m_nParseDepth);
2488 if (m_aCurToken.eType == TFUNC)
2490 NextToken(); // skip "FUNC"-statement
2491 m_aCurToken.eType = TFUNC;
2492 m_aCurToken.nGroup = TG::Function;
2494 auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_FUNCTION);
2495 pNode->SetSelection(m_aCurESelection);
2496 NextToken();
2497 return pNode;
2500 std::unique_ptr<SmTableNode> SmParser5::DoBinom()
2502 DepthProtect aDepthGuard(m_nParseDepth);
2504 auto xSNode = std::make_unique<SmTableNode>(m_aCurToken);
2505 xSNode->SetSelection(m_aCurESelection);
2507 NextToken();
2509 auto xFirst = DoSum();
2510 auto xSecond = DoSum();
2511 xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond));
2512 return xSNode;
2515 std::unique_ptr<SmBinVerNode> SmParser5::DoFrac()
2517 DepthProtect aDepthGuard(m_nParseDepth);
2519 std::unique_ptr<SmBinVerNode> xSNode = std::make_unique<SmBinVerNode>(m_aCurToken);
2520 xSNode->SetSelection(m_aCurESelection);
2521 std::unique_ptr<SmNode> xOper = std::make_unique<SmRectangleNode>(m_aCurToken);
2522 xOper->SetSelection(m_aCurESelection);
2524 NextToken();
2526 auto xFirst = DoSum();
2527 auto xSecond = DoSum();
2528 xSNode->SetSubNodes(std::move(xFirst), std::move(xOper), std::move(xSecond));
2529 return xSNode;
2532 std::unique_ptr<SmStructureNode> SmParser5::DoStack()
2534 DepthProtect aDepthGuard(m_nParseDepth);
2536 std::unique_ptr<SmStructureNode> xSNode(new SmTableNode(m_aCurToken));
2537 xSNode->SetSelection(m_aCurESelection);
2538 NextToken();
2539 if (m_aCurToken.eType != TLGROUP)
2540 return DoError(SmParseError::LgroupExpected);
2541 std::vector<std::unique_ptr<SmNode>> aExprArr;
2544 NextToken();
2545 aExprArr.push_back(DoAlign());
2546 } while (m_aCurToken.eType == TPOUND);
2548 if (m_aCurToken.eType == TRGROUP)
2549 NextToken();
2550 else
2551 aExprArr.emplace_back(DoError(SmParseError::RgroupExpected));
2553 xSNode->SetSubNodes(buildNodeArray(aExprArr));
2554 return xSNode;
2557 std::unique_ptr<SmStructureNode> SmParser5::DoMatrix()
2559 DepthProtect aDepthGuard(m_nParseDepth);
2561 std::unique_ptr<SmMatrixNode> xMNode(new SmMatrixNode(m_aCurToken));
2562 xMNode->SetSelection(m_aCurESelection);
2563 NextToken();
2564 if (m_aCurToken.eType != TLGROUP)
2565 return DoError(SmParseError::LgroupExpected);
2567 std::vector<std::unique_ptr<SmNode>> aExprArr;
2570 NextToken();
2571 aExprArr.push_back(DoAlign());
2572 } while (m_aCurToken.eType == TPOUND);
2574 size_t nCol = aExprArr.size();
2575 size_t nRow = 1;
2576 while (m_aCurToken.eType == TDPOUND)
2578 NextToken();
2579 for (size_t i = 0; i < nCol; i++)
2581 auto xNode = DoAlign();
2582 if (i < (nCol - 1))
2584 if (m_aCurToken.eType == TPOUND)
2585 NextToken();
2586 else
2587 xNode = DoError(SmParseError::PoundExpected);
2589 aExprArr.emplace_back(std::move(xNode));
2591 ++nRow;
2594 if (m_aCurToken.eType == TRGROUP)
2595 NextToken();
2596 else
2598 std::unique_ptr<SmNode> xENode(DoError(SmParseError::RgroupExpected));
2599 if (aExprArr.empty())
2600 nRow = nCol = 1;
2601 else
2602 aExprArr.pop_back();
2603 aExprArr.emplace_back(std::move(xENode));
2606 xMNode->SetSubNodes(buildNodeArray(aExprArr));
2607 xMNode->SetRowCol(static_cast<sal_uInt16>(nRow), static_cast<sal_uInt16>(nCol));
2608 return std::unique_ptr<SmStructureNode>(xMNode.release());
2611 std::unique_ptr<SmSpecialNode> SmParser5::DoSpecial()
2613 DepthProtect aDepthGuard(m_nParseDepth);
2615 bool bReplace = false;
2616 OUString& rName = m_aCurToken.aText;
2617 OUString aNewName;
2619 // conversion of symbol names for 6.0 (XML) file format
2620 // (name change on import / export.
2621 // UI uses localized names XML file format does not.)
2622 if (rName.startsWith("%"))
2624 if (IsImportSymbolNames())
2626 aNewName = SmLocalizedSymbolData::GetUiSymbolName(rName.subView(1));
2627 bReplace = true;
2629 else if (IsExportSymbolNames())
2631 aNewName = SmLocalizedSymbolData::GetExportSymbolName(rName.subView(1));
2632 bReplace = true;
2635 if (!aNewName.isEmpty())
2636 aNewName = "%" + aNewName;
2638 if (bReplace && !aNewName.isEmpty() && rName != aNewName)
2640 Replace(GetTokenIndex(), rName.getLength(), aNewName);
2641 rName = aNewName;
2644 // add symbol name to list of used symbols
2645 const OUString aSymbolName(m_aCurToken.aText.copy(1));
2646 if (!aSymbolName.isEmpty())
2647 m_aUsedSymbols.insert(aSymbolName);
2649 auto pNode = std::make_unique<SmSpecialNode>(m_aCurToken);
2650 pNode->SetSelection(m_aCurESelection);
2651 NextToken();
2652 return pNode;
2655 std::unique_ptr<SmGlyphSpecialNode> SmParser5::DoGlyphSpecial()
2657 DepthProtect aDepthGuard(m_nParseDepth);
2659 auto pNode = std::make_unique<SmGlyphSpecialNode>(m_aCurToken);
2660 NextToken();
2661 return pNode;
2664 std::unique_ptr<SmExpressionNode> SmParser5::DoError(SmParseError eError)
2666 DepthProtect aDepthGuard(m_nParseDepth);
2668 // Identify error message
2669 OUString sStrBuf(SmResId(RID_ERR_IDENT) + starmathdatabase::getParseErrorDesc(eError));
2671 // Generate error node
2672 m_aCurToken.eType = TERROR;
2673 m_aCurToken.cMathChar = sStrBuf;
2674 auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
2675 SmErrorNode* pErr(new SmErrorNode(m_aCurToken));
2676 pErr->SetSelection(m_aCurESelection);
2677 xSNode->SetSubNode(0, pErr);
2679 // Append error to the error list
2680 SmErrorDesc aErrDesc(eError, xSNode.get(), m_aCurToken.cMathChar);
2681 m_aErrDescList.push_back(aErrDesc);
2683 NextToken();
2685 return xSNode;
2688 // end grammar
2690 SmParser5::SmParser5()
2691 : m_nCurError(0)
2692 , m_nBufferIndex(0)
2693 , m_nTokenIndex(0)
2694 , m_nRow(0)
2695 , m_nColOff(0)
2696 , m_bImportSymNames(false)
2697 , m_bExportSymNames(false)
2698 , m_nParseDepth(0)
2699 , m_aNumCC(LanguageTag(LANGUAGE_ENGLISH_US))
2700 , m_pSysCC(&SM_MOD()->GetSysLocale().GetCharClass())
2704 SmParser5::~SmParser5() {}
2706 std::unique_ptr<SmTableNode> SmParser5::Parse(const OUString& rBuffer)
2708 m_aUsedSymbols.clear();
2710 m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2711 m_nBufferIndex = 0;
2712 m_nTokenIndex = 0;
2713 m_nRow = 0;
2714 m_nColOff = 0;
2715 m_nCurError = -1;
2717 m_aErrDescList.clear();
2719 NextToken();
2720 return DoTable();
2723 std::unique_ptr<SmNode> SmParser5::ParseExpression(const OUString& rBuffer)
2725 m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2726 m_nBufferIndex = 0;
2727 m_nTokenIndex = 0;
2728 m_nRow = 0;
2729 m_nColOff = 0;
2730 m_nCurError = -1;
2732 m_aErrDescList.clear();
2734 NextToken();
2735 return DoExpression();
2738 const SmErrorDesc* SmParser5::NextError()
2740 if (!m_aErrDescList.empty())
2741 if (m_nCurError > 0)
2742 return &m_aErrDescList[--m_nCurError];
2743 else
2745 m_nCurError = 0;
2746 return &m_aErrDescList[m_nCurError];
2748 else
2749 return nullptr;
2752 const SmErrorDesc* SmParser5::PrevError()
2754 if (!m_aErrDescList.empty())
2755 if (m_nCurError < static_cast<int>(m_aErrDescList.size() - 1))
2756 return &m_aErrDescList[++m_nCurError];
2757 else
2759 m_nCurError = static_cast<int>(m_aErrDescList.size() - 1);
2760 return &m_aErrDescList[m_nCurError];
2762 else
2763 return nullptr;
2766 const SmErrorDesc* SmParser5::GetError() const
2768 if (m_aErrDescList.empty())
2769 return nullptr;
2770 return &m_aErrDescList.front();
2773 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */