starmath/source/parse5.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <com/sun/star/i18n/UnicodeType.hpp>
  21 #include <com/sun/star/i18n/KParseTokens.hpp>
  22 #include <com/sun/star/i18n/KParseType.hpp>
  23 #include <i18nlangtag/lang.h>
  24 #include <tools/lineend.hxx>
  25 #include <unotools/configmgr.hxx>
  26 #include <unotools/syslocale.hxx>
  27 #include <osl/diagnose.h>
  28 #include <rtl/character.hxx>
  29 #include <parse5.hxx>
  30 #include <strings.hrc>
  31 #include <smmod.hxx>
  32 #include <cfgitem.hxx>
  33 #include <starmathdatabase.hxx>
  34
  35 #include <stack>
  36
  37 using namespace ::com::sun::star::i18n;
  38
  39 //Definition of math keywords
  40 const SmTokenTableEntry aTokenTable[]
  41     = { { u"abs", TABS, '\0', TG::UnOper, 13 },
  42         { u"acute", TACUTE, MS_ACUTE, TG::Attribute, 5 },
  43         { u"aleph", TALEPH, MS_ALEPH, TG::Standalone, 5 },
  44         { u"alignb", TALIGNC, '\0', TG::Align, 0 },
  45         { u"alignc", TALIGNC, '\0', TG::Align, 0 },
  46         { u"alignl", TALIGNL, '\0', TG::Align, 0 },
  47         { u"alignm", TALIGNC, '\0', TG::Align, 0 },
  48         { u"alignr", TALIGNR, '\0', TG::Align, 0 },
  49         { u"alignt", TALIGNC, '\0', TG::Align, 0 },
  50         { u"and", TAND, MS_AND, TG::Product, 0 },
  51         { u"approx", TAPPROX, MS_APPROX, TG::Relation, 0 },
  52         { u"arccos", TACOS, '\0', TG::Function, 5 },
  53         { u"arccot", TACOT, '\0', TG::Function, 5 },
  54         { u"arcosh", TACOSH, '\0', TG::Function, 5 },
  55         { u"arcoth", TACOTH, '\0', TG::Function, 5 },
  56         { u"arcsin", TASIN, '\0', TG::Function, 5 },
  57         { u"arctan", TATAN, '\0', TG::Function, 5 },
  58         { u"arsinh", TASINH, '\0', TG::Function, 5 },
  59         { u"artanh", TATANH, '\0', TG::Function, 5 },
  60         { u"backepsilon", TBACKEPSILON, MS_BACKEPSILON, TG::Standalone, 5 },
  61         { u"bar", TBAR, MS_BAR, TG::Attribute, 5 },
  62         { u"binom", TBINOM, '\0', TG::NONE, 5 },
  63         { u"bold", TBOLD, '\0', TG::FontAttr, 5 },
  64         { u"boper", TBOPER, '\0', TG::Product, 0 },
  65         { u"breve", TBREVE, MS_BREVE, TG::Attribute, 5 },
  66         { u"bslash", TBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
  67         { u"cdot", TCDOT, MS_CDOT, TG::Product, 0 },
  68         { u"check", TCHECK, MS_CHECK, TG::Attribute, 5 },
  69         { u"circ", TCIRC, MS_CIRC, TG::Standalone, 5 },
  70         { u"circle", TCIRCLE, MS_CIRCLE, TG::Attribute, 5 },
  71         { u"color", TCOLOR, '\0', TG::FontAttr, 5 },
  72         { u"coprod", TCOPROD, MS_COPROD, TG::Oper, 5 },
  73         { u"cos", TCOS, '\0', TG::Function, 5 },
  74         { u"cosh", TCOSH, '\0', TG::Function, 5 },
  75         { u"cot", TCOT, '\0', TG::Function, 5 },
  76         { u"coth", TCOTH, '\0', TG::Function, 5 },
  77         { u"csub", TCSUB, '\0', TG::Power, 0 },
  78         { u"csup", TCSUP, '\0', TG::Power, 0 },
  79         { u"dddot", TDDDOT, MS_DDDOT, TG::Attribute, 5 },
  80         { u"ddot", TDDOT, MS_DDOT, TG::Attribute, 5 },
  81         { u"def", TDEF, MS_DEF, TG::Relation, 0 },
  82         { u"div", TDIV, MS_DIV, TG::Product, 0 },
  83         { u"divides", TDIVIDES, MS_LINE, TG::Relation, 0 },
  84         { u"dlarrow", TDLARROW, MS_DLARROW, TG::Standalone, 5 },
  85         { u"dlrarrow", TDLRARROW, MS_DLRARROW, TG::Standalone, 5 },
  86         { u"dot", TDOT, MS_DOT, TG::Attribute, 5 },
  87         { u"dotsaxis", TDOTSAXIS, MS_DOTSAXIS, TG::Standalone, 5 }, // 5 to continue expression
  88         { u"dotsdiag", TDOTSDIAG, MS_DOTSUP, TG::Standalone, 5 },
  89         { u"dotsdown", TDOTSDOWN, MS_DOTSDOWN, TG::Standalone, 5 },
  90         { u"dotslow", TDOTSLOW, MS_DOTSLOW, TG::Standalone, 5 },
  91         { u"dotsup", TDOTSUP, MS_DOTSUP, TG::Standalone, 5 },
  92         { u"dotsvert", TDOTSVERT, MS_DOTSVERT, TG::Standalone, 5 },
  93         { u"downarrow", TDOWNARROW, MS_DOWNARROW, TG::Standalone, 5 },
  94         { u"drarrow", TDRARROW, MS_DRARROW, TG::Standalone, 5 },
  95         { u"emptyset", TEMPTYSET, MS_EMPTYSET, TG::Standalone, 5 },
  96         { u"equiv", TEQUIV, MS_EQUIV, TG::Relation, 0 },
  97         { u"evaluate", TEVALUATE, '\0', TG::NONE, 0 },
  98         { u"exists", TEXISTS, MS_EXISTS, TG::Standalone, 5 },
  99         { u"exp", TEXP, '\0', TG::Function, 5 },
 100         { u"fact", TFACT, MS_FACT, TG::UnOper, 5 },
 101         { u"fixed", TFIXED, '\0', TG::Font, 0 },
 102         { u"font", TFONT, '\0', TG::FontAttr, 5 },
 103         { u"forall", TFORALL, MS_FORALL, TG::Standalone, 5 },
 104         { u"fourier", TFOURIER, MS_FOURIER, TG::Standalone, 5 },
 105         { u"frac", TFRAC, '\0', TG::NONE, 5 },
 106         { u"from", TFROM, '\0', TG::Limit, 0 },
 107         { u"func", TFUNC, '\0', TG::Function, 5 },
 108         { u"ge", TGE, MS_GE, TG::Relation, 0 },
 109         { u"geslant", TGESLANT, MS_GESLANT, TG::Relation, 0 },
 110         { u"gg", TGG, MS_GG, TG::Relation, 0 },
 111         { u"grave", TGRAVE, MS_GRAVE, TG::Attribute, 5 },
 112         { u"gt", TGT, MS_GT, TG::Relation, 0 },
 113         { u"harpoon", THARPOON, MS_HARPOON, TG::Attribute, 5 },
 114         { u"hat", THAT, MS_HAT, TG::Attribute, 5 },
 115         { u"hbar", THBAR, MS_HBAR, TG::Standalone, 5 },
 116         { u"hex", THEX, '\0', TG::NONE, 5 },
 117         { u"iiint", TIIINT, MS_IIINT, TG::Oper, 5 },
 118         { u"iint", TIINT, MS_IINT, TG::Oper, 5 },
 119         { u"im", TIM, MS_IM, TG::Standalone, 5 },
 120         { u"in", TIN, MS_IN, TG::Relation, 0 },
 121         { u"infinity", TINFINITY, MS_INFINITY, TG::Standalone, 5 },
 122         { u"infty", TINFINITY, MS_INFINITY, TG::Standalone, 5 },
 123         { u"int", TINT, MS_INT, TG::Oper, 5 },
 124         { u"intd", TINTD, MS_INT, TG::Oper, 5 },
 125         { u"intersection", TINTERSECT, MS_INTERSECT, TG::Product, 0 },
 126         { u"it", TIT, '\0', TG::Product, 0 },
 127         { u"ital", TITALIC, '\0', TG::FontAttr, 5 },
 128         { u"italic", TITALIC, '\0', TG::FontAttr, 5 },
 129         { u"lambdabar", TLAMBDABAR, MS_LAMBDABAR, TG::Standalone, 5 },
 130         { u"langle", TLANGLE, MS_LMATHANGLE, TG::LBrace, 5 },
 131         { u"laplace", TLAPLACE, MS_LAPLACE, TG::Standalone, 5 },
 132         { u"lbrace", TLBRACE, MS_LBRACE, TG::LBrace, 5 },
 133         { u"lceil", TLCEIL, MS_LCEIL, TG::LBrace, 5 },
 134         { u"ldbracket", TLDBRACKET, MS_LDBRACKET, TG::LBrace, 5 },
 135         { u"ldline", TLDLINE, MS_DVERTLINE, TG::LBrace, 5 },
 136         { u"le", TLE, MS_LE, TG::Relation, 0 },
 137         { u"left", TLEFT, '\0', TG::NONE, 5 },
 138         { u"leftarrow", TLEFTARROW, MS_LEFTARROW, TG::Standalone, 5 },
 139         { u"leslant", TLESLANT, MS_LESLANT, TG::Relation, 0 },
 140         { u"lfloor", TLFLOOR, MS_LFLOOR, TG::LBrace, 5 },
 141         { u"lim", TLIM, '\0', TG::Oper, 5 },
 142         { u"liminf", TLIMINF, '\0', TG::Oper, 5 },
 143         { u"limsup", TLIMSUP, '\0', TG::Oper, 5 },
 144         { u"lint", TLINT, MS_LINT, TG::Oper, 5 },
 145         { u"ll", TLL, MS_LL, TG::Relation, 0 },
 146         { u"lline", TLLINE, MS_VERTLINE, TG::LBrace, 5 },
 147         { u"llint", TLLINT, MS_LLINT, TG::Oper, 5 },
 148         { u"lllint", TLLLINT, MS_LLLINT, TG::Oper, 5 },
 149         { u"ln", TLN, '\0', TG::Function, 5 },
 150         { u"log", TLOG, '\0', TG::Function, 5 },
 151         { u"lrline", TLRLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5 },
 152         { u"lrdline", TLRDLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5 },
 153         { u"lsub", TLSUB, '\0', TG::Power, 0 },
 154         { u"lsup", TLSUP, '\0', TG::Power, 0 },
 155         { u"lt", TLT, MS_LT, TG::Relation, 0 },
 156         { u"matrix", TMATRIX, '\0', TG::NONE, 5 },
 157         { u"minusplus", TMINUSPLUS, MS_MINUSPLUS, TG::UnOper | TG::Sum, 5 },
 158         { u"mline", TMLINE, MS_VERTLINE, TG::NONE, 0 }, //! not in TG::RBrace, Level 0
 159         { u"nabla", TNABLA, MS_NABLA, TG::Standalone, 5 },
 160         { u"nbold", TNBOLD, '\0', TG::FontAttr, 5 },
 161         { u"ndivides", TNDIVIDES, MS_NDIVIDES, TG::Relation, 0 },
 162         { u"neg", TNEG, MS_NEG, TG::UnOper, 5 },
 163         { u"neq", TNEQ, MS_NEQ, TG::Relation, 0 },
 164         { u"newline", TNEWLINE, '\0', TG::NONE, 0 },
 165         { u"ni", TNI, MS_NI, TG::Relation, 0 },
 166         { u"nitalic", TNITALIC, '\0', TG::FontAttr, 5 },
 167         { u"none", TNONE, '\0', TG::LBrace | TG::RBrace, 0 },
 168         { u"nospace", TNOSPACE, '\0', TG::Standalone, 5 },
 169         { u"notexists", TNOTEXISTS, MS_NOTEXISTS, TG::Standalone, 5 },
 170         { u"notin", TNOTIN, MS_NOTIN, TG::Relation, 0 },
 171         { u"nprec", TNOTPRECEDES, MS_NOTPRECEDES, TG::Relation, 0 },
 172         { u"nroot", TNROOT, MS_SQRT, TG::UnOper, 5 },
 173         { u"nsubset", TNSUBSET, MS_NSUBSET, TG::Relation, 0 },
 174         { u"nsubseteq", TNSUBSETEQ, MS_NSUBSETEQ, TG::Relation, 0 },
 175         { u"nsucc", TNOTSUCCEEDS, MS_NOTSUCCEEDS, TG::Relation, 0 },
 176         { u"nsupset", TNSUPSET, MS_NSUPSET, TG::Relation, 0 },
 177         { u"nsupseteq", TNSUPSETEQ, MS_NSUPSETEQ, TG::Relation, 0 },
 178         { u"odivide", TODIVIDE, MS_ODIVIDE, TG::Product, 0 },
 179         { u"odot", TODOT, MS_ODOT, TG::Product, 0 },
 180         { u"ominus", TOMINUS, MS_OMINUS, TG::Sum, 0 },
 181         { u"oper", TOPER, '\0', TG::Oper, 5 },
 182         { u"oplus", TOPLUS, MS_OPLUS, TG::Sum, 0 },
 183         { u"or", TOR, MS_OR, TG::Sum, 0 },
 184         { u"ortho", TORTHO, MS_ORTHO, TG::Relation, 0 },
 185         { u"otimes", TOTIMES, MS_OTIMES, TG::Product, 0 },
 186         { u"over", TOVER, '\0', TG::Product, 0 },
 187         { u"overbrace", TOVERBRACE, MS_OVERBRACE, TG::Product, 5 },
 188         { u"overline", TOVERLINE, '\0', TG::Attribute, 5 },
 189         { u"overstrike", TOVERSTRIKE, '\0', TG::Attribute, 5 },
 190         { u"owns", TNI, MS_NI, TG::Relation, 0 },
 191         { u"parallel", TPARALLEL, MS_DLINE, TG::Relation, 0 },
 192         { u"partial", TPARTIAL, MS_PARTIAL, TG::Standalone, 5 },
 193         { u"phantom", TPHANTOM, '\0', TG::FontAttr, 5 },
 194         { u"plusminus", TPLUSMINUS, MS_PLUSMINUS, TG::UnOper | TG::Sum, 5 },
 195         { u"prec", TPRECEDES, MS_PRECEDES, TG::Relation, 0 },
 196         { u"preccurlyeq", TPRECEDESEQUAL, MS_PRECEDESEQUAL, TG::Relation, 0 },
 197         { u"precsim", TPRECEDESEQUIV, MS_PRECEDESEQUIV, TG::Relation, 0 },
 198         { u"prod", TPROD, MS_PROD, TG::Oper, 5 },
 199         { u"prop", TPROP, MS_PROP, TG::Relation, 0 },
 200         { u"rangle", TRANGLE, MS_RMATHANGLE, TG::RBrace, 0 }, //! 0 to terminate expression
 201         { u"rbrace", TRBRACE, MS_RBRACE, TG::RBrace, 0 },
 202         { u"rceil", TRCEIL, MS_RCEIL, TG::RBrace, 0 },
 203         { u"rdbracket", TRDBRACKET, MS_RDBRACKET, TG::RBrace, 0 },
 204         { u"rdline", TRDLINE, MS_DVERTLINE, TG::RBrace, 0 },
 205         { u"re", TRE, MS_RE, TG::Standalone, 5 },
 206         { u"rfloor", TRFLOOR, MS_RFLOOR, TG::RBrace, 0 }, //! 0 to terminate expression
 207         { u"right", TRIGHT, '\0', TG::NONE, 0 },
 208         { u"rightarrow", TRIGHTARROW, MS_RIGHTARROW, TG::Standalone, 5 },
 209         { u"rline", TRLINE, MS_VERTLINE, TG::RBrace, 0 }, //! 0 to terminate expression
 210         { u"rsub", TRSUB, '\0', TG::Power, 0 },
 211         { u"rsup", TRSUP, '\0', TG::Power, 0 },
 212         { u"sans", TSANS, '\0', TG::Font, 0 },
 213         { u"serif", TSERIF, '\0', TG::Font, 0 },
 214         { u"setC", TSETC, MS_SETC, TG::Standalone, 5 },
 215         { u"setminus", TSETMINUS, MS_BACKSLASH, TG::Product, 0 },
 216         { u"setN", TSETN, MS_SETN, TG::Standalone, 5 },
 217         { u"setQ", TSETQ, MS_SETQ, TG::Standalone, 5 },
 218         { u"setquotient", TSETQUOTIENT, MS_SLASH, TG::Product, 0 },
 219         { u"setR", TSETR, MS_SETR, TG::Standalone, 5 },
 220         { u"setZ", TSETZ, MS_SETZ, TG::Standalone, 5 },
 221         { u"sim", TSIM, MS_SIM, TG::Relation, 0 },
 222         { u"simeq", TSIMEQ, MS_SIMEQ, TG::Relation, 0 },
 223         { u"sin", TSIN, '\0', TG::Function, 5 },
 224         { u"sinh", TSINH, '\0', TG::Function, 5 },
 225         { u"size", TSIZE, '\0', TG::FontAttr, 5 },
 226         { u"slash", TSLASH, MS_SLASH, TG::Product, 0 },
 227         { u"sqrt", TSQRT, MS_SQRT, TG::UnOper, 5 },
 228         { u"stack", TSTACK, '\0', TG::NONE, 5 },
 229         { u"sub", TRSUB, '\0', TG::Power, 0 },
 230         { u"subset", TSUBSET, MS_SUBSET, TG::Relation, 0 },
 231         { u"subseteq", TSUBSETEQ, MS_SUBSETEQ, TG::Relation, 0 },
 232         { u"succ", TSUCCEEDS, MS_SUCCEEDS, TG::Relation, 0 },
 233         { u"succcurlyeq", TSUCCEEDSEQUAL, MS_SUCCEEDSEQUAL, TG::Relation, 0 },
 234         { u"succsim", TSUCCEEDSEQUIV, MS_SUCCEEDSEQUIV, TG::Relation, 0 },
 235         { u"sum", TSUM, MS_SUM, TG::Oper, 5 },
 236         { u"sup", TRSUP, '\0', TG::Power, 0 },
 237         { u"supset", TSUPSET, MS_SUPSET, TG::Relation, 0 },
 238         { u"supseteq", TSUPSETEQ, MS_SUPSETEQ, TG::Relation, 0 },
 239         { u"tan", TTAN, '\0', TG::Function, 5 },
 240         { u"tanh", TTANH, '\0', TG::Function, 5 },
 241         { u"tilde", TTILDE, MS_TILDE, TG::Attribute, 5 },
 242         { u"times", TTIMES, MS_TIMES, TG::Product, 0 },
 243         { u"to", TTO, '\0', TG::Limit, 0 },
 244         { u"toward", TTOWARD, MS_RIGHTARROW, TG::Relation, 0 },
 245         { u"transl", TTRANSL, MS_TRANSL, TG::Relation, 0 },
 246         { u"transr", TTRANSR, MS_TRANSR, TG::Relation, 0 },
 247         { u"underbrace", TUNDERBRACE, MS_UNDERBRACE, TG::Product, 5 },
 248         { u"underline", TUNDERLINE, '\0', TG::Attribute, 5 },
 249         { u"union", TUNION, MS_UNION, TG::Sum, 0 },
 250         { u"uoper", TUOPER, '\0', TG::UnOper, 5 },
 251         { u"uparrow", TUPARROW, MS_UPARROW, TG::Standalone, 5 },
 252         { u"vec", TVEC, MS_VEC, TG::Attribute, 5 },
 253         { u"widebslash", TWIDEBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
 254         { u"wideharpoon", TWIDEHARPOON, MS_HARPOON, TG::Attribute, 5 },
 255         { u"widehat", TWIDEHAT, MS_HAT, TG::Attribute, 5 },
 256         { u"wideslash", TWIDESLASH, MS_SLASH, TG::Product, 0 },
 257         { u"widetilde", TWIDETILDE, MS_TILDE, TG::Attribute, 5 },
 258         { u"widevec", TWIDEVEC, MS_VEC, TG::Attribute, 5 },
 259         { u"wp", TWP, MS_WP, TG::Standalone, 5 } };
 260
 261 // First character may be any alphabetic
 262 const sal_Int32 coStartFlags = KParseTokens::ANY_LETTER | KParseTokens::IGNORE_LEADING_WS;
 263
 264 // Continuing characters may be any alphabetic
 265 const sal_Int32 coContFlags = (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
 266                               | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
 267 // First character for numbers, may be any numeric or dot
 268 const sal_Int32 coNumStartFlags
 269     = KParseTokens::ASC_DIGIT | KParseTokens::ASC_DOT | KParseTokens::IGNORE_LEADING_WS;
 270 // Continuing characters for numbers, may be any numeric or dot or comma.
 271 // tdf#127873: additionally accept ',' comma group separator as too many
 272 // existing documents unwittingly may have used that as decimal separator
 273 // in such locales (though it never was as this is always the en-US locale
 274 // and the group separator is only parsed away).
 275 const sal_Int32 coNumContFlags = (coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
 276                                  | KParseTokens::GROUP_SEPARATOR_IN_NUMBER;
 277 // First character for numbers hexadecimal
 278 const sal_Int32 coNum16StartFlags
 279     = KParseTokens::ASC_DIGIT | KParseTokens::ASC_UPALPHA | KParseTokens::IGNORE_LEADING_WS;
 280
 281 // Continuing characters for numbers hexadecimal
 282 const sal_Int32 coNum16ContFlags = (coNum16StartFlags & ~KParseTokens::IGNORE_LEADING_WS);
 283 // user-defined char continuing characters may be any alphanumeric or dot.
 284 const sal_Int32 coUserDefinedCharContFlags = KParseTokens::ANY_LETTER_OR_NUMBER
 285                                              | KParseTokens::ASC_DOT
 286                                              | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
 287
 288 //Checks if keyword is in the list.
 289 static inline bool findCompare(const SmTokenTableEntry& lhs, const OUString& s)
 290 {
 291     return s.compareToIgnoreAsciiCase(lhs.aIdent) > 0;
 292 }
 293
 294 //Returns the SmTokenTableEntry for a keyword
 295 static const SmTokenTableEntry* GetTokenTableEntry(const OUString& rName)
 296 {
 297     if (rName.isEmpty())
 298         return nullptr; //avoid null pointer exceptions
 299     //Looks for the first keyword after or equal to rName in alphabetical order.
 300     auto findIter
 301         = std::lower_bound(std::begin(aTokenTable), std::end(aTokenTable), rName, findCompare);
 302     if (findIter != std::end(aTokenTable) && rName.equalsIgnoreAsciiCase(findIter->aIdent))
 303         return &*findIter; //check is equal
 304     return nullptr; //not found
 305 }
 306
 307 static bool IsDelimiter(const OUString& rTxt, sal_Int32 nPos)
 308 { // returns 'true' iff cChar is '\0' or a delimiter
 309
 310     assert(nPos <= rTxt.getLength()); //index out of range
 311     if (nPos == rTxt.getLength())
 312         return true; //This is EOF
 313     sal_Unicode cChar = rTxt[nPos];
 314
 315     // check if 'cChar' is in the delimiter table
 316     static const sal_Unicode aDelimiterTable[] = {
 317         ' ', '{', '}', '(', ')', '\t', '\n', '\r', '+', '-',  '*', '/', '=', '[',
 318         ']', '^', '_', '#', '%', '>',  '<',  '&',  '|', '\\', '"', '~', '`'
 319     }; //reordered by usage (by eye) for nanoseconds saving.
 320
 321     //checks the array
 322     for (auto const& cDelimiter : aDelimiterTable)
 323     {
 324         if (cDelimiter == cChar)
 325             return true;
 326     }
 327
 328     //special chars support
 329     sal_Int16 nTypJp = SM_MOD()->GetSysLocale().GetCharClass().getType(rTxt, nPos);
 330     return (nTypJp == css::i18n::UnicodeType::SPACE_SEPARATOR
 331             || nTypJp == css::i18n::UnicodeType::CONTROL);
 332 }
 333
 334 // checks number used as arguments in Math formulas (e.g. 'size' command)
 335 // Format: no negative numbers, must start with a digit, no exponent notation, ...
 336 static bool lcl_IsNumber(const OUString& rText)
 337 {
 338     bool bPoint = false;
 339     const sal_Unicode* pBuffer = rText.getStr();
 340     for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
 341     {
 342         const sal_Unicode cChar = *pBuffer;
 343         if (cChar == '.')
 344         {
 345             if (bPoint)
 346                 return false;
 347             else
 348                 bPoint = true;
 349         }
 350         else if (!rtl::isAsciiDigit(cChar))
 351             return false;
 352     }
 353     return true;
 354 }
 355 // checks number used as arguments in Math formulas (e.g. 'size' command)
 356 // Format: no negative numbers, must start with a digit, no exponent notation, ...
 357 static bool lcl_IsNotWholeNumber(const OUString& rText)
 358 {
 359     const sal_Unicode* pBuffer = rText.getStr();
 360     for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
 361         if (!rtl::isAsciiDigit(*pBuffer))
 362             return true;
 363     return false;
 364 }
 365 // checks hex number used as arguments in Math formulas (e.g. 'hex' command)
 366 // Format: no negative numbers, must start with a digit, no exponent notation, ...
 367 static bool lcl_IsNotWholeNumber16(const OUString& rText)
 368 {
 369     const sal_Unicode* pBuffer = rText.getStr();
 370     for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
 371         if (!rtl::isAsciiCanonicHexDigit(*pBuffer))
 372             return true;
 373     return false;
 374 }
 375
 376 //Text replace onto m_aBufferString
 377 void SmParser5::Replace(sal_Int32 nPos, sal_Int32 nLen, std::u16string_view aText)
 378 {
 379     assert(nPos + nLen <= m_aBufferString.getLength()); //checks if length allows text replace
 380
 381     m_aBufferString = m_aBufferString.replaceAt(nPos, nLen, aText); //replace and reindex
 382     sal_Int32 nChg = aText.size() - nLen;
 383     m_nBufferIndex = m_nBufferIndex + nChg;
 384     m_nTokenIndex = m_nTokenIndex + nChg;
 385 }
 386
 387 void SmParser5::NextToken() //Central part of the parser
 388 {
 389     sal_Int32 nBufLen = m_aBufferString.getLength();
 390     ParseResult aRes;
 391     sal_Int32 nRealStart;
 392     bool bCont;
 393     do
 394     {
 395         // skip white spaces
 396         while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex))
 397             ++m_nBufferIndex;
 398
 399         // Try to parse a number in a locale-independent manner using
 400         // '.' as decimal separator.
 401         // See https://bz.apache.org/ooo/show_bug.cgi?id=45779
 402         aRes
 403             = m_aNumCC.parsePredefinedToken(KParseType::ASC_NUMBER, m_aBufferString, m_nBufferIndex,
 404                                             coNumStartFlags, "", coNumContFlags, "");
 405
 406         if (aRes.TokenType == 0)
 407         {
 408             // Try again with the default token parsing.
 409             aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, "",
 410                                            coContFlags, "");
 411         }
 412
 413         nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
 414         m_nBufferIndex = nRealStart;
 415
 416         bCont = false;
 417         if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart])
 418         {
 419             // keep data needed for tokens row and col entry up to date
 420             ++m_nRow;
 421             m_nBufferIndex = m_nColOff = nRealStart + 1;
 422             bCont = true;
 423         }
 424         else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
 425         {
 426             if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
 427             {
 428                 //SkipComment
 429                 m_nBufferIndex = nRealStart + 2;
 430                 while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex])
 431                     ++m_nBufferIndex;
 432                 bCont = true;
 433             }
 434         }
 435
 436     } while (bCont);
 437
 438     // set index of current token
 439     m_nTokenIndex = m_nBufferIndex;
 440     sal_uInt32 nCol = nRealStart - m_nColOff;
 441
 442     bool bHandled = true;
 443     if (nRealStart >= nBufLen)
 444     {
 445         m_aCurToken.eType = TEND;
 446         m_aCurToken.cMathChar = u"";
 447         m_aCurToken.nGroup = TG::NONE;
 448         m_aCurToken.nLevel = 0;
 449         m_aCurToken.aText.clear();
 450     }
 451     else if (aRes.TokenType & KParseType::ANY_NUMBER)
 452     {
 453         assert(aRes.EndPos > 0);
 454         if (m_aBufferString[aRes.EndPos - 1] == ',' && aRes.EndPos < nBufLen
 455             && m_pSysCC->getType(m_aBufferString, aRes.EndPos) != UnicodeType::SPACE_SEPARATOR)
 456         {
 457             // Comma followed by a non-space char is unlikely for decimal/thousands separator.
 458             --aRes.EndPos;
 459         }
 460         sal_Int32 n = aRes.EndPos - nRealStart;
 461         assert(n >= 0);
 462         m_aCurToken.eType = TNUMBER;
 463         m_aCurToken.cMathChar = u"";
 464         m_aCurToken.nGroup = TG::NONE;
 465         m_aCurToken.nLevel = 5;
 466         m_aCurToken.aText = m_aBufferString.copy(nRealStart, n);
 467
 468         SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos), "starmath",
 469                     "identifier really finished? (compatibility!)");
 470     }
 471     else if (aRes.TokenType & KParseType::DOUBLE_QUOTE_STRING)
 472     {
 473         m_aCurToken.eType = TTEXT;
 474         m_aCurToken.cMathChar = u"";
 475         m_aCurToken.nGroup = TG::NONE;
 476         m_aCurToken.nLevel = 5;
 477         m_aCurToken.aText = aRes.DequotedNameOrString;
 478         nCol++;
 479     }
 480     else if (aRes.TokenType & KParseType::IDENTNAME)
 481     {
 482         sal_Int32 n = aRes.EndPos - nRealStart;
 483         assert(n >= 0);
 484         OUString aName(m_aBufferString.copy(nRealStart, n));
 485         const SmTokenTableEntry* pEntry = GetTokenTableEntry(aName);
 486
 487         if (pEntry)
 488         {
 489             m_aCurToken.eType = pEntry->eType;
 490             m_aCurToken.setChar(pEntry->cMathChar);
 491             m_aCurToken.nGroup = pEntry->nGroup;
 492             m_aCurToken.nLevel = pEntry->nLevel;
 493             m_aCurToken.aText = pEntry->aIdent;
 494         }
 495         else
 496         {
 497             m_aCurToken.eType = TIDENT;
 498             m_aCurToken.cMathChar = u"";
 499             m_aCurToken.nGroup = TG::NONE;
 500             m_aCurToken.nLevel = 5;
 501             m_aCurToken.aText = aName;
 502
 503             SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos), "starmath",
 504                         "identifier really finished? (compatibility!)");
 505         }
 506     }
 507     else if (aRes.TokenType == 0 && '_' == m_aBufferString[nRealStart])
 508     {
 509         m_aCurToken.eType = TRSUB;
 510         m_aCurToken.cMathChar = u"";
 511         m_aCurToken.nGroup = TG::Power;
 512         m_aCurToken.nLevel = 0;
 513         m_aCurToken.aText = "_";
 514
 515         aRes.EndPos = nRealStart + 1;
 516     }
 517     else if (aRes.TokenType & KParseType::BOOLEAN)
 518     {
 519         sal_Int32& rnEndPos = aRes.EndPos;
 520         if (rnEndPos - nRealStart <= 2)
 521         {
 522             sal_Unicode ch = m_aBufferString[nRealStart];
 523             switch (ch)
 524             {
 525                 case '<':
 526                 {
 527                     if (m_aBufferString.match("<<", nRealStart))
 528                     {
 529                         m_aCurToken.eType = TLL;
 530                         m_aCurToken.setChar(MS_LL);
 531                         m_aCurToken.nGroup = TG::Relation;
 532                         m_aCurToken.nLevel = 0;
 533                         m_aCurToken.aText = "<<";
 534
 535                         rnEndPos = nRealStart + 2;
 536                     }
 537                     else if (m_aBufferString.match("<=", nRealStart))
 538                     {
 539                         m_aCurToken.eType = TLE;
 540                         m_aCurToken.setChar(MS_LE);
 541                         m_aCurToken.nGroup = TG::Relation;
 542                         m_aCurToken.nLevel = 0;
 543                         m_aCurToken.aText = "<=";
 544
 545                         rnEndPos = nRealStart + 2;
 546                     }
 547                     else if (m_aBufferString.match("<-", nRealStart))
 548                     {
 549                         m_aCurToken.eType = TLEFTARROW;
 550                         m_aCurToken.setChar(MS_LEFTARROW);
 551                         m_aCurToken.nGroup = TG::Standalone;
 552                         m_aCurToken.nLevel = 5;
 553                         m_aCurToken.aText = "<-";
 554
 555                         rnEndPos = nRealStart + 2;
 556                     }
 557                     else if (m_aBufferString.match("<>", nRealStart))
 558                     {
 559                         m_aCurToken.eType = TNEQ;
 560                         m_aCurToken.setChar(MS_NEQ);
 561                         m_aCurToken.nGroup = TG::Relation;
 562                         m_aCurToken.nLevel = 0;
 563                         m_aCurToken.aText = "<>";
 564
 565                         rnEndPos = nRealStart + 2;
 566                     }
 567                     else if (m_aBufferString.match("<?>", nRealStart))
 568                     {
 569                         m_aCurToken.eType = TPLACE;
 570                         m_aCurToken.setChar(MS_PLACE);
 571                         m_aCurToken.nGroup = TG::NONE;
 572                         m_aCurToken.nLevel = 5;
 573                         m_aCurToken.aText = "<?>";
 574
 575                         rnEndPos = nRealStart + 3;
 576                     }
 577                     else
 578                     {
 579                         m_aCurToken.eType = TLT;
 580                         m_aCurToken.setChar(MS_LT);
 581                         m_aCurToken.nGroup = TG::Relation;
 582                         m_aCurToken.nLevel = 0;
 583                         m_aCurToken.aText = "<";
 584                     }
 585                 }
 586                 break;
 587                 case '>':
 588                 {
 589                     if (m_aBufferString.match(">=", nRealStart))
 590                     {
 591                         m_aCurToken.eType = TGE;
 592                         m_aCurToken.setChar(MS_GE);
 593                         m_aCurToken.nGroup = TG::Relation;
 594                         m_aCurToken.nLevel = 0;
 595                         m_aCurToken.aText = ">=";
 596
 597                         rnEndPos = nRealStart + 2;
 598                     }
 599                     else if (m_aBufferString.match(">>", nRealStart))
 600                     {
 601                         m_aCurToken.eType = TGG;
 602                         m_aCurToken.setChar(MS_GG);
 603                         m_aCurToken.nGroup = TG::Relation;
 604                         m_aCurToken.nLevel = 0;
 605                         m_aCurToken.aText = ">>";
 606
 607                         rnEndPos = nRealStart + 2;
 608                     }
 609                     else
 610                     {
 611                         m_aCurToken.eType = TGT;
 612                         m_aCurToken.setChar(MS_GT);
 613                         m_aCurToken.nGroup = TG::Relation;
 614                         m_aCurToken.nLevel = 0;
 615                         m_aCurToken.aText = ">";
 616                     }
 617                 }
 618                 break;
 619                 default:
 620                     bHandled = false;
 621             }
 622         }
 623     }
 624     else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
 625     {
 626         sal_Int32& rnEndPos = aRes.EndPos;
 627         if (rnEndPos - nRealStart == 1)
 628         {
 629             sal_Unicode ch = m_aBufferString[nRealStart];
 630             switch (ch)
 631             {
 632                 case '%':
 633                 {
 634                     //! modifies aRes.EndPos
 635
 636                     OSL_ENSURE(rnEndPos >= nBufLen || '%' != m_aBufferString[rnEndPos],
 637                                "unexpected comment start");
 638
 639                     // get identifier of user-defined character
 640                     ParseResult aTmpRes = m_pSysCC->parseAnyToken(m_aBufferString, rnEndPos,
 641                                                                   KParseTokens::ANY_LETTER, "",
 642                                                                   coUserDefinedCharContFlags, "");
 643
 644                     sal_Int32 nTmpStart = rnEndPos + aTmpRes.LeadingWhiteSpace;
 645
 646                     // default setting for the case that no identifier
 647                     // i.e. a valid symbol-name is following the '%'
 648                     // character
 649                     m_aCurToken.eType = TTEXT;
 650                     m_aCurToken.cMathChar = u"";
 651                     m_aCurToken.nGroup = TG::NONE;
 652                     m_aCurToken.nLevel = 5;
 653                     m_aCurToken.aText = "%";
 654
 655                     if (aTmpRes.TokenType & KParseType::IDENTNAME)
 656                     {
 657                         sal_Int32 n = aTmpRes.EndPos - nTmpStart;
 658                         m_aCurToken.eType = TSPECIAL;
 659                         m_aCurToken.aText = m_aBufferString.copy(nTmpStart - 1, n + 1);
 660
 661                         OSL_ENSURE(aTmpRes.EndPos > rnEndPos, "empty identifier");
 662                         if (aTmpRes.EndPos > rnEndPos)
 663                             rnEndPos = aTmpRes.EndPos;
 664                         else
 665                             ++rnEndPos;
 666                     }
 667
 668                     // if no symbol-name was found we start-over with
 669                     // finding the next token right after the '%' sign.
 670                     // I.e. we leave rnEndPos unmodified.
 671                 }
 672                 break;
 673                 case '[':
 674                 {
 675                     m_aCurToken.eType = TLBRACKET;
 676                     m_aCurToken.setChar(MS_LBRACKET);
 677                     m_aCurToken.nGroup = TG::LBrace;
 678                     m_aCurToken.nLevel = 5;
 679                     m_aCurToken.aText = "[";
 680                 }
 681                 break;
 682                 case '\\':
 683                 {
 684                     m_aCurToken.eType = TESCAPE;
 685                     m_aCurToken.cMathChar = u"";
 686                     m_aCurToken.nGroup = TG::NONE;
 687                     m_aCurToken.nLevel = 5;
 688                     m_aCurToken.aText = "\\";
 689                 }
 690                 break;
 691                 case ']':
 692                 {
 693                     m_aCurToken.eType = TRBRACKET;
 694                     m_aCurToken.setChar(MS_RBRACKET);
 695                     m_aCurToken.nGroup = TG::RBrace;
 696                     m_aCurToken.nLevel = 0;
 697                     m_aCurToken.aText = "]";
 698                 }
 699                 break;
 700                 case '^':
 701                 {
 702                     m_aCurToken.eType = TRSUP;
 703                     m_aCurToken.cMathChar = u"";
 704                     m_aCurToken.nGroup = TG::Power;
 705                     m_aCurToken.nLevel = 0;
 706                     m_aCurToken.aText = "^";
 707                 }
 708                 break;
 709                 case '`':
 710                 {
 711                     m_aCurToken.eType = TSBLANK;
 712                     m_aCurToken.cMathChar = u"";
 713                     m_aCurToken.nGroup = TG::Blank;
 714                     m_aCurToken.nLevel = 5;
 715                     m_aCurToken.aText = "`";
 716                 }
 717                 break;
 718                 case '{':
 719                 {
 720                     m_aCurToken.eType = TLGROUP;
 721                     m_aCurToken.setChar(MS_LBRACE);
 722                     m_aCurToken.nGroup = TG::NONE;
 723                     m_aCurToken.nLevel = 5;
 724                     m_aCurToken.aText = "{";
 725                 }
 726                 break;
 727                 case '|':
 728                 {
 729                     m_aCurToken.eType = TOR;
 730                     m_aCurToken.setChar(MS_OR);
 731                     m_aCurToken.nGroup = TG::Sum;
 732                     m_aCurToken.nLevel = 0;
 733                     m_aCurToken.aText = "|";
 734                 }
 735                 break;
 736                 case '}':
 737                 {
 738                     m_aCurToken.eType = TRGROUP;
 739                     m_aCurToken.setChar(MS_RBRACE);
 740                     m_aCurToken.nGroup = TG::NONE;
 741                     m_aCurToken.nLevel = 0;
 742                     m_aCurToken.aText = "}";
 743                 }
 744                 break;
 745                 case '~':
 746                 {
 747                     m_aCurToken.eType = TBLANK;
 748                     m_aCurToken.cMathChar = u"";
 749                     m_aCurToken.nGroup = TG::Blank;
 750                     m_aCurToken.nLevel = 5;
 751                     m_aCurToken.aText = "~";
 752                 }
 753                 break;
 754                 case '#':
 755                 {
 756                     if (m_aBufferString.match("##", nRealStart))
 757                     {
 758                         m_aCurToken.eType = TDPOUND;
 759                         m_aCurToken.cMathChar = u"";
 760                         m_aCurToken.nGroup = TG::NONE;
 761                         m_aCurToken.nLevel = 0;
 762                         m_aCurToken.aText = "##";
 763
 764                         rnEndPos = nRealStart + 2;
 765                     }
 766                     else
 767                     {
 768                         m_aCurToken.eType = TPOUND;
 769                         m_aCurToken.cMathChar = u"";
 770                         m_aCurToken.nGroup = TG::NONE;
 771                         m_aCurToken.nLevel = 0;
 772                         m_aCurToken.aText = "#";
 773                     }
 774                 }
 775                 break;
 776                 case '&':
 777                 {
 778                     m_aCurToken.eType = TAND;
 779                     m_aCurToken.setChar(MS_AND);
 780                     m_aCurToken.nGroup = TG::Product;
 781                     m_aCurToken.nLevel = 0;
 782                     m_aCurToken.aText = "&";
 783                 }
 784                 break;
 785                 case '(':
 786                 {
 787                     m_aCurToken.eType = TLPARENT;
 788                     m_aCurToken.setChar(MS_LPARENT);
 789                     m_aCurToken.nGroup = TG::LBrace;
 790                     m_aCurToken.nLevel = 5; //! 0 to continue expression
 791                     m_aCurToken.aText = "(";
 792                 }
 793                 break;
 794                 case ')':
 795                 {
 796                     m_aCurToken.eType = TRPARENT;
 797                     m_aCurToken.setChar(MS_RPARENT);
 798                     m_aCurToken.nGroup = TG::RBrace;
 799                     m_aCurToken.nLevel = 0; //! 0 to terminate expression
 800                     m_aCurToken.aText = ")";
 801                 }
 802                 break;
 803                 case '*':
 804                 {
 805                     m_aCurToken.eType = TMULTIPLY;
 806                     m_aCurToken.setChar(MS_MULTIPLY);
 807                     m_aCurToken.nGroup = TG::Product;
 808                     m_aCurToken.nLevel = 0;
 809                     m_aCurToken.aText = "*";
 810                 }
 811                 break;
 812                 case '+':
 813                 {
 814                     if (m_aBufferString.match("+-", nRealStart))
 815                     {
 816                         m_aCurToken.eType = TPLUSMINUS;
 817                         m_aCurToken.setChar(MS_PLUSMINUS);
 818                         m_aCurToken.nGroup = TG::UnOper | TG::Sum;
 819                         m_aCurToken.nLevel = 5;
 820                         m_aCurToken.aText = "+-";
 821
 822                         rnEndPos = nRealStart + 2;
 823                     }
 824                     else
 825                     {
 826                         m_aCurToken.eType = TPLUS;
 827                         m_aCurToken.setChar(MS_PLUS);
 828                         m_aCurToken.nGroup = TG::UnOper | TG::Sum;
 829                         m_aCurToken.nLevel = 5;
 830                         m_aCurToken.aText = "+";
 831                     }
 832                 }
 833                 break;
 834                 case '-':
 835                 {
 836                     if (m_aBufferString.match("-+", nRealStart))
 837                     {
 838                         m_aCurToken.eType = TMINUSPLUS;
 839                         m_aCurToken.setChar(MS_MINUSPLUS);
 840                         m_aCurToken.nGroup = TG::UnOper | TG::Sum;
 841                         m_aCurToken.nLevel = 5;
 842                         m_aCurToken.aText = "-+";
 843
 844                         rnEndPos = nRealStart + 2;
 845                     }
 846                     else if (m_aBufferString.match("->", nRealStart))
 847                     {
 848                         m_aCurToken.eType = TRIGHTARROW;
 849                         m_aCurToken.setChar(MS_RIGHTARROW);
 850                         m_aCurToken.nGroup = TG::Standalone;
 851                         m_aCurToken.nLevel = 5;
 852                         m_aCurToken.aText = "->";
 853
 854                         rnEndPos = nRealStart + 2;
 855                     }
 856                     else
 857                     {
 858                         m_aCurToken.eType = TMINUS;
 859                         m_aCurToken.setChar(MS_MINUS);
 860                         m_aCurToken.nGroup = TG::UnOper | TG::Sum;
 861                         m_aCurToken.nLevel = 5;
 862                         m_aCurToken.aText = "-";
 863                     }
 864                 }
 865                 break;
 866                 case '.':
 867                 {
 868                     // Only one character? Then it can't be a number.
 869                     if (m_nBufferIndex < m_aBufferString.getLength() - 1)
 870                     {
 871                         // for compatibility with SO5.2
 872                         // texts like .34 ...56 ... h ...78..90
 873                         // will be treated as numbers
 874                         m_aCurToken.eType = TNUMBER;
 875                         m_aCurToken.cMathChar = u"";
 876                         m_aCurToken.nGroup = TG::NONE;
 877                         m_aCurToken.nLevel = 5;
 878
 879                         sal_Int32 nTxtStart = m_nBufferIndex;
 880                         sal_Unicode cChar;
 881                         // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only
 882                         do
 883                         {
 884                             cChar = m_aBufferString[++m_nBufferIndex];
 885                         } while ((cChar == '.' || rtl::isAsciiDigit(cChar))
 886                                  && (m_nBufferIndex < m_aBufferString.getLength() - 1));
 887
 888                         m_aCurToken.aText
 889                             = m_aBufferString.copy(nTxtStart, m_nBufferIndex - nTxtStart);
 890                         aRes.EndPos = m_nBufferIndex;
 891                     }
 892                     else
 893                         bHandled = false;
 894                 }
 895                 break;
 896                 case '/':
 897                 {
 898                     m_aCurToken.eType = TDIVIDEBY;
 899                     m_aCurToken.setChar(MS_SLASH);
 900                     m_aCurToken.nGroup = TG::Product;
 901                     m_aCurToken.nLevel = 0;
 902                     m_aCurToken.aText = "/";
 903                 }
 904                 break;
 905                 case '=':
 906                 {
 907                     m_aCurToken.eType = TASSIGN;
 908                     m_aCurToken.setChar(MS_ASSIGN);
 909                     m_aCurToken.nGroup = TG::Relation;
 910                     m_aCurToken.nLevel = 0;
 911                     m_aCurToken.aText = "=";
 912                 }
 913                 break;
 914                 default:
 915                     bHandled = false;
 916             }
 917         }
 918     }
 919     else
 920         bHandled = false;
 921
 922     if (!bHandled)
 923     {
 924         m_aCurToken.eType = TCHARACTER;
 925         m_aCurToken.cMathChar = u"";
 926         m_aCurToken.nGroup = TG::NONE;
 927         m_aCurToken.nLevel = 5;
 928
 929         // tdf#129372: we may have to deal with surrogate pairs
 930         // (see https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates)
 931         // in this case, we must read 2 sal_Unicode instead of 1
 932         int nOffset(rtl::isSurrogate(m_aBufferString[nRealStart]) ? 2 : 1);
 933         m_aCurToken.aText = m_aBufferString.copy(nRealStart, nOffset);
 934
 935         aRes.EndPos = nRealStart + nOffset;
 936     }
 937     m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength());
 938
 939     if (TEND != m_aCurToken.eType)
 940         m_nBufferIndex = aRes.EndPos;
 941 }
 942
 943 void SmParser5::NextTokenColor(SmTokenType dvipload)
 944 {
 945     sal_Int32 nBufLen = m_aBufferString.getLength();
 946     ParseResult aRes;
 947     sal_Int32 nRealStart;
 948     bool bCont;
 949
 950     do
 951     {
 952         // skip white spaces
 953         while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex))
 954             ++m_nBufferIndex;
 955         //parse, there are few options, so less strict.
 956         aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, "",
 957                                        coContFlags, "");
 958         nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
 959         m_nBufferIndex = nRealStart;
 960         bCont = false;
 961         if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart])
 962         {
 963             // keep data needed for tokens row and col entry up to date
 964             ++m_nRow;
 965             m_nBufferIndex = m_nColOff = nRealStart + 1;
 966             bCont = true;
 967         }
 968         else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
 969         {
 970             if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
 971             {
 972                 //SkipComment
 973                 m_nBufferIndex = nRealStart + 2;
 974                 while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex])
 975                     ++m_nBufferIndex;
 976                 bCont = true;
 977             }
 978         }
 979     } while (bCont);
 980
 981     // set index of current token
 982     m_nTokenIndex = m_nBufferIndex;
 983     sal_uInt32 nCol = nRealStart - m_nColOff;
 984
 985     if (nRealStart >= nBufLen)
 986         m_aCurToken.eType = TEND;
 987     else if (aRes.TokenType & KParseType::IDENTNAME)
 988     {
 989         sal_Int32 n = aRes.EndPos - nRealStart;
 990         assert(n >= 0);
 991         OUString aName(m_aBufferString.copy(nRealStart, n));
 992         switch (dvipload)
 993         {
 994             case TCOLOR:
 995                 m_aCurToken = starmathdatabase::Identify_ColorName_Parser(aName);
 996                 break;
 997             case TDVIPSNAMESCOL:
 998                 m_aCurToken = starmathdatabase::Identify_ColorName_DVIPSNAMES(aName);
 999                 break;
1000             default:
1001                 m_aCurToken = starmathdatabase::Identify_ColorName_Parser(aName);
1002                 break;
1003         }
1004     }
1005     else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
1006     {
1007         if (m_aBufferString[nRealStart] == '#' && !m_aBufferString.match("##", nRealStart))
1008         {
1009             m_aCurToken.eType = THEX;
1010             m_aCurToken.cMathChar = u"";
1011             m_aCurToken.nGroup = TG::Color;
1012             m_aCurToken.nLevel = 0;
1013             m_aCurToken.aText = "hex";
1014         }
1015     }
1016     else
1017         m_aCurToken.eType = TNONE;
1018
1019     m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength());
1020     if (TEND != m_aCurToken.eType)
1021         m_nBufferIndex = aRes.EndPos;
1022 }
1023
1024 void SmParser5::NextTokenFontSize()
1025 {
1026     sal_Int32 nBufLen = m_aBufferString.getLength();
1027     ParseResult aRes;
1028     sal_Int32 nRealStart;
1029     bool bCont;
1030     bool hex = false;
1031
1032     do
1033     {
1034         // skip white spaces
1035         while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex))
1036             ++m_nBufferIndex;
1037         //hexadecimal parser
1038         aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coNum16StartFlags, ".",
1039                                        coNum16ContFlags, ".,");
1040         if (aRes.TokenType == 0)
1041         {
1042             // Try again with the default token parsing.
1043             aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, "",
1044                                            coContFlags, "");
1045         }
1046         else
1047             hex = true;
1048         nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
1049         m_nBufferIndex = nRealStart;
1050         bCont = false;
1051         if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart])
1052         {
1053             // keep data needed for tokens row and col entry up to date
1054             ++m_nRow;
1055             m_nBufferIndex = m_nColOff = nRealStart + 1;
1056             bCont = true;
1057         }
1058         else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
1059         {
1060             if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
1061             {
1062                 //SkipComment
1063                 m_nBufferIndex = nRealStart + 2;
1064                 while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex])
1065                     ++m_nBufferIndex;
1066                 bCont = true;
1067             }
1068         }
1069     } while (bCont);
1070
1071     // set index of current token
1072     m_nTokenIndex = m_nBufferIndex;
1073     sal_uInt32 nCol = nRealStart - m_nColOff;
1074
1075     if (nRealStart >= nBufLen)
1076         m_aCurToken.eType = TEND;
1077     else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
1078     {
1079         if (aRes.EndPos - nRealStart == 1)
1080         {
1081             switch (m_aBufferString[nRealStart])
1082             {
1083                 case '*':
1084                     m_aCurToken.eType = TMULTIPLY;
1085                     m_aCurToken.setChar(MS_MULTIPLY);
1086                     m_aCurToken.nGroup = TG::Product;
1087                     m_aCurToken.nLevel = 0;
1088                     m_aCurToken.aText = "*";
1089                     break;
1090                 case '+':
1091                     m_aCurToken.eType = TPLUS;
1092                     m_aCurToken.setChar(MS_PLUS);
1093                     m_aCurToken.nGroup = TG::UnOper | TG::Sum;
1094                     m_aCurToken.nLevel = 5;
1095                     m_aCurToken.aText = "+";
1096                     break;
1097                 case '-':
1098                     m_aCurToken.eType = TMINUS;
1099                     m_aCurToken.setChar(MS_MINUS);
1100                     m_aCurToken.nGroup = TG::UnOper | TG::Sum;
1101                     m_aCurToken.nLevel = 5;
1102                     m_aCurToken.aText = "-";
1103                     break;
1104                 case '/':
1105                     m_aCurToken.eType = TDIVIDEBY;
1106                     m_aCurToken.setChar(MS_SLASH);
1107                     m_aCurToken.nGroup = TG::Product;
1108                     m_aCurToken.nLevel = 0;
1109                     m_aCurToken.aText = "/";
1110                     break;
1111                 default:
1112                     m_aCurToken.eType = TNONE;
1113                     break;
1114             }
1115         }
1116         else
1117             m_aCurToken.eType = TNONE;
1118     }
1119     else if (hex)
1120     {
1121         assert(aRes.EndPos > 0);
1122         sal_Int32 n = aRes.EndPos - nRealStart;
1123         assert(n >= 0);
1124         m_aCurToken.eType = THEX;
1125         m_aCurToken.cMathChar = u"";
1126         m_aCurToken.nGroup = TG::NONE;
1127         m_aCurToken.nLevel = 5;
1128         m_aCurToken.aText = m_aBufferString.copy(nRealStart, n);
1129     }
1130     else
1131         m_aCurToken.eType = TNONE;
1132
1133     m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength());
1134     if (TEND != m_aCurToken.eType)
1135         m_nBufferIndex = aRes.EndPos;
1136 }
1137
1138 namespace
1139 {
1140 SmNodeArray buildNodeArray(std::vector<std::unique_ptr<SmNode>>& rSubNodes)
1141 {
1142     SmNodeArray aSubArray(rSubNodes.size());
1143     for (size_t i = 0; i < rSubNodes.size(); ++i)
1144         aSubArray[i] = rSubNodes[i].release();
1145     return aSubArray;
1146 }
1147 } //end namespace
1148
1149 // grammar
1150 /*************************************************************************************************/
1151
1152 std::unique_ptr<SmTableNode> SmParser5::DoTable()
1153 {
1154     DepthProtect aDepthGuard(m_nParseDepth);
1155
1156     std::vector<std::unique_ptr<SmNode>> aLineArray;
1157     aLineArray.push_back(DoLine());
1158     while (m_aCurToken.eType == TNEWLINE)
1159     {
1160         NextToken();
1161         aLineArray.push_back(DoLine());
1162     }
1163     assert(m_aCurToken.eType == TEND);
1164     std::unique_ptr<SmTableNode> xSNode(new SmTableNode(m_aCurToken));
1165     xSNode->SetSelection(m_aCurESelection);
1166     xSNode->SetSubNodes(buildNodeArray(aLineArray));
1167     return xSNode;
1168 }
1169
1170 std::unique_ptr<SmNode> SmParser5::DoAlign(bool bUseExtraSpaces)
1171 // parse alignment info (if any), then go on with rest of expression
1172 {
1173     DepthProtect aDepthGuard(m_nParseDepth);
1174
1175     std::unique_ptr<SmStructureNode> xSNode;
1176
1177     if (TokenInGroup(TG::Align))
1178     {
1179         xSNode.reset(new SmAlignNode(m_aCurToken));
1180         xSNode->SetSelection(m_aCurESelection);
1181
1182         NextToken();
1183
1184         // allow for just one align statement in 5.0
1185         if (TokenInGroup(TG::Align))
1186             return DoError(SmParseError::DoubleAlign);
1187     }
1188
1189     auto pNode = DoExpression(bUseExtraSpaces);
1190
1191     if (xSNode)
1192     {
1193         xSNode->SetSubNode(0, pNode.release());
1194         return xSNode;
1195     }
1196     return pNode;
1197 }
1198
1199 // Postcondition: m_aCurToken.eType == TEND || m_aCurToken.eType == TNEWLINE
1200 std::unique_ptr<SmNode> SmParser5::DoLine()
1201 {
1202     DepthProtect aDepthGuard(m_nParseDepth);
1203
1204     std::vector<std::unique_ptr<SmNode>> ExpressionArray;
1205
1206     // start with single expression that may have an alignment statement
1207     // (and go on with expressions that must not have alignment
1208     // statements in 'while' loop below. See also 'Expression()'.)
1209     if (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
1210         ExpressionArray.push_back(DoAlign());
1211
1212     while (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
1213         ExpressionArray.push_back(DoExpression());
1214
1215     //If there's no expression, add an empty one.
1216     //this is to avoid a formula tree without any caret
1217     //positions, in visual formula editor.
1218     if (ExpressionArray.empty())
1219     {
1220         SmToken aTok;
1221         aTok.eType = TNEWLINE;
1222         ExpressionArray.emplace_back(std::unique_ptr<SmNode>(new SmExpressionNode(aTok)));
1223     }
1224
1225     auto xSNode = std::make_unique<SmLineNode>(m_aCurToken);
1226     xSNode->SetSelection(m_aCurESelection);
1227     xSNode->SetSubNodes(buildNodeArray(ExpressionArray));
1228     return xSNode;
1229 }
1230
1231 std::unique_ptr<SmNode> SmParser5::DoExpression(bool bUseExtraSpaces)
1232 {
1233     DepthProtect aDepthGuard(m_nParseDepth);
1234
1235     std::vector<std::unique_ptr<SmNode>> RelationArray;
1236     RelationArray.push_back(DoRelation());
1237     while (m_aCurToken.nLevel >= 4)
1238         RelationArray.push_back(DoRelation());
1239
1240     if (RelationArray.size() > 1)
1241     {
1242         std::unique_ptr<SmExpressionNode> xSNode(new SmExpressionNode(m_aCurToken));
1243         xSNode->SetSubNodes(buildNodeArray(RelationArray));
1244         xSNode->SetUseExtraSpaces(bUseExtraSpaces);
1245         return xSNode;
1246     }
1247     else
1248     {
1249         // This expression has only one node so just push this node.
1250         return std::move(RelationArray[0]);
1251     }
1252 }
1253
1254 std::unique_ptr<SmNode> SmParser5::DoRelation()
1255 {
1256     DepthProtect aDepthGuard(m_nParseDepth);
1257
1258     int nDepthLimit = m_nParseDepth;
1259
1260     auto xFirst = DoSum();
1261     while (TokenInGroup(TG::Relation))
1262     {
1263         std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1264         xSNode->SetSelection(m_aCurESelection);
1265         auto xSecond = DoOpSubSup();
1266         auto xThird = DoSum();
1267         xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
1268         xFirst = std::move(xSNode);
1269
1270         ++m_nParseDepth;
1271         DepthProtect bDepthGuard(m_nParseDepth);
1272     }
1273
1274     m_nParseDepth = nDepthLimit;
1275
1276     return xFirst;
1277 }
1278
1279 std::unique_ptr<SmNode> SmParser5::DoSum()
1280 {
1281     DepthProtect aDepthGuard(m_nParseDepth);
1282
1283     int nDepthLimit = m_nParseDepth;
1284
1285     auto xFirst = DoProduct();
1286     while (TokenInGroup(TG::Sum))
1287     {
1288         std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1289         xSNode->SetSelection(m_aCurESelection);
1290         auto xSecond = DoOpSubSup();
1291         auto xThird = DoProduct();
1292         xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
1293         xFirst = std::move(xSNode);
1294
1295         ++m_nParseDepth;
1296         DepthProtect bDepthGuard(m_nParseDepth);
1297     }
1298
1299     m_nParseDepth = nDepthLimit;
1300
1301     return xFirst;
1302 }
1303
1304 std::unique_ptr<SmNode> SmParser5::DoProduct()
1305 {
1306     DepthProtect aDepthGuard(m_nParseDepth);
1307
1308     auto xFirst = DoPower();
1309
1310     int nDepthLimit = 0;
1311
1312     while (TokenInGroup(TG::Product))
1313     {
1314         //this linear loop builds a recursive structure, if it gets
1315         //too deep then later processing, e.g. releasing the tree,
1316         //can exhaust stack
1317         if (m_nParseDepth + nDepthLimit > DEPTH_LIMIT)
1318             throw std::range_error("parser depth limit");
1319
1320         std::unique_ptr<SmStructureNode> xSNode;
1321         std::unique_ptr<SmNode> xOper;
1322
1323         SmTokenType eType = m_aCurToken.eType;
1324         switch (eType)
1325         {
1326             case TOVER:
1327                 xSNode.reset(new SmBinVerNode(m_aCurToken));
1328                 xSNode->SetSelection(m_aCurESelection);
1329                 xOper.reset(new SmRectangleNode(m_aCurToken));
1330                 xOper->SetSelection(m_aCurESelection);
1331                 NextToken();
1332                 break;
1333
1334             case TBOPER:
1335                 xSNode.reset(new SmBinHorNode(m_aCurToken));
1336
1337                 NextToken();
1338
1339                 //Let the glyph node know it's a binary operation
1340                 m_aCurToken.eType = TBOPER;
1341                 m_aCurToken.nGroup = TG::Product;
1342                 xOper = DoGlyphSpecial();
1343                 break;
1344
1345             case TOVERBRACE:
1346             case TUNDERBRACE:
1347                 xSNode.reset(new SmVerticalBraceNode(m_aCurToken));
1348                 xSNode->SetSelection(m_aCurESelection);
1349                 xOper.reset(new SmMathSymbolNode(m_aCurToken));
1350                 xOper->SetSelection(m_aCurESelection);
1351
1352                 NextToken();
1353                 break;
1354
1355             case TWIDEBACKSLASH:
1356             case TWIDESLASH:
1357             {
1358                 SmBinDiagonalNode* pSTmp = new SmBinDiagonalNode(m_aCurToken);
1359                 pSTmp->SetAscending(eType == TWIDESLASH);
1360                 xSNode.reset(pSTmp);
1361
1362                 xOper.reset(new SmPolyLineNode(m_aCurToken));
1363                 xOper->SetSelection(m_aCurESelection);
1364                 NextToken();
1365
1366                 break;
1367             }
1368
1369             default:
1370                 xSNode.reset(new SmBinHorNode(m_aCurToken));
1371                 xSNode->SetSelection(m_aCurESelection);
1372
1373                 xOper = DoOpSubSup();
1374         }
1375
1376         auto xArg = DoPower();
1377         xSNode->SetSubNodesBinMo(std::move(xFirst), std::move(xOper), std::move(xArg));
1378         xFirst = std::move(xSNode);
1379         ++nDepthLimit;
1380     }
1381     return xFirst;
1382 }
1383
1384 std::unique_ptr<SmNode> SmParser5::DoSubSup(TG nActiveGroup, std::unique_ptr<SmNode> xGivenNode)
1385 {
1386     DepthProtect aDepthGuard(m_nParseDepth);
1387
1388     assert(nActiveGroup == TG::Power || nActiveGroup == TG::Limit);
1389     assert(m_aCurToken.nGroup == nActiveGroup);
1390
1391     std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
1392     pNode->SetSelection(m_aCurESelection);
1393     //! Of course 'm_aCurToken' is just the first sub-/supscript token.
1394     //! It should be of no further interest. The positions of the
1395     //! sub-/supscripts will be identified by the corresponding subnodes
1396     //! index in the 'aSubNodes' array (enum value from 'SmSubSup').
1397
1398     pNode->SetUseLimits(nActiveGroup == TG::Limit);
1399
1400     // initialize subnodes array
1401     std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
1402     aSubNodes[0] = std::move(xGivenNode);
1403
1404     // process all sub-/supscripts
1405     int nIndex = 0;
1406     while (TokenInGroup(nActiveGroup))
1407     {
1408         SmTokenType eType(m_aCurToken.eType);
1409
1410         switch (eType)
1411         {
1412             case TRSUB:
1413                 nIndex = static_cast<int>(RSUB);
1414                 break;
1415             case TRSUP:
1416                 nIndex = static_cast<int>(RSUP);
1417                 break;
1418             case TFROM:
1419             case TCSUB:
1420                 nIndex = static_cast<int>(CSUB);
1421                 break;
1422             case TTO:
1423             case TCSUP:
1424                 nIndex = static_cast<int>(CSUP);
1425                 break;
1426             case TLSUB:
1427                 nIndex = static_cast<int>(LSUB);
1428                 break;
1429             case TLSUP:
1430                 nIndex = static_cast<int>(LSUP);
1431                 break;
1432             default:
1433                 SAL_WARN("starmath", "unknown case");
1434         }
1435         nIndex++;
1436         assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES);
1437
1438         std::unique_ptr<SmNode> xENode;
1439         if (aSubNodes[nIndex]) // if already occupied at earlier iteration
1440         {
1441             // forget the earlier one, remember an error instead
1442             aSubNodes[nIndex].reset();
1443             xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
1444         }
1445         else
1446         {
1447             // skip sub-/supscript token
1448             NextToken();
1449         }
1450
1451         // get sub-/supscript node
1452         // (even when we saw a double-sub/supscript error in the above
1453         // in order to minimize mess and continue parsing.)
1454         std::unique_ptr<SmNode> xSNode;
1455         if (eType == TFROM || eType == TTO)
1456         {
1457             // parse limits in old 4.0 and 5.0 style
1458             xSNode = DoRelation();
1459         }
1460         else
1461             xSNode = DoTerm(true);
1462
1463         aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
1464     }
1465
1466     pNode->SetSubNodes(buildNodeArray(aSubNodes));
1467     return pNode;
1468 }
1469
1470 std::unique_ptr<SmNode> SmParser5::DoSubSupEvaluate(std::unique_ptr<SmNode> xGivenNode)
1471 {
1472     DepthProtect aDepthGuard(m_nParseDepth);
1473
1474     std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
1475     pNode->SetSelection(m_aCurESelection);
1476     pNode->SetUseLimits(true);
1477
1478     // initialize subnodes array
1479     std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
1480     aSubNodes[0] = std::move(xGivenNode);
1481
1482     // process all sub-/supscripts
1483     int nIndex = 0;
1484     while (TokenInGroup(TG::Limit))
1485     {
1486         SmTokenType eType(m_aCurToken.eType);
1487
1488         switch (eType)
1489         {
1490             case TFROM:
1491                 nIndex = static_cast<int>(RSUB);
1492                 break;
1493             case TTO:
1494                 nIndex = static_cast<int>(RSUP);
1495                 break;
1496             default:
1497                 SAL_WARN("starmath", "unknown case");
1498         }
1499         nIndex++;
1500         assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES);
1501
1502         std::unique_ptr<SmNode> xENode;
1503         if (aSubNodes[nIndex]) // if already occupied at earlier iteration
1504         {
1505             // forget the earlier one, remember an error instead
1506             aSubNodes[nIndex].reset();
1507             xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
1508         }
1509         else
1510             NextToken(); // skip sub-/supscript token
1511
1512         // get sub-/supscript node
1513         std::unique_ptr<SmNode> xSNode;
1514         xSNode = DoTerm(true);
1515
1516         aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
1517     }
1518
1519     pNode->SetSubNodes(buildNodeArray(aSubNodes));
1520     return pNode;
1521 }
1522
1523 std::unique_ptr<SmNode> SmParser5::DoOpSubSup()
1524 {
1525     DepthProtect aDepthGuard(m_nParseDepth);
1526
1527     // get operator symbol
1528     auto xNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1529     xNode->SetSelection(m_aCurESelection);
1530     // skip operator token
1531     NextToken();
1532     // get sub- supscripts if any
1533     if (m_aCurToken.nGroup == TG::Power)
1534         return DoSubSup(TG::Power, std::move(xNode));
1535     return xNode;
1536 }
1537
1538 std::unique_ptr<SmNode> SmParser5::DoPower()
1539 {
1540     DepthProtect aDepthGuard(m_nParseDepth);
1541
1542     // get body for sub- supscripts on top of stack
1543     std::unique_ptr<SmNode> xNode(DoTerm(false));
1544
1545     if (m_aCurToken.nGroup == TG::Power)
1546         return DoSubSup(TG::Power, std::move(xNode));
1547     return xNode;
1548 }
1549
1550 std::unique_ptr<SmBlankNode> SmParser5::DoBlank()
1551 {
1552     DepthProtect aDepthGuard(m_nParseDepth);
1553
1554     assert(TokenInGroup(TG::Blank));
1555     std::unique_ptr<SmBlankNode> pBlankNode(new SmBlankNode(m_aCurToken));
1556     pBlankNode->SetSelection(m_aCurESelection);
1557
1558     do
1559     {
1560         pBlankNode->IncreaseBy(m_aCurToken);
1561         NextToken();
1562     } while (TokenInGroup(TG::Blank));
1563
1564     // Ignore trailing spaces, if corresponding option is set
1565     if (m_aCurToken.eType == TNEWLINE
1566         || (m_aCurToken.eType == TEND && !utl::ConfigManager::IsFuzzing()
1567             && SM_MOD()->GetConfig()->IsIgnoreSpacesRight()))
1568     {
1569         pBlankNode->Clear();
1570     }
1571     return pBlankNode;
1572 }
1573
1574 std::unique_ptr<SmNode> SmParser5::DoTerm(bool bGroupNumberIdent)
1575 {
1576     DepthProtect aDepthGuard(m_nParseDepth);
1577
1578     switch (m_aCurToken.eType)
1579     {
1580         case TESCAPE:
1581             return DoEscape();
1582
1583         case TNOSPACE:
1584         case TLGROUP:
1585         {
1586             bool bNoSpace = m_aCurToken.eType == TNOSPACE;
1587             if (bNoSpace)
1588                 NextToken();
1589             if (m_aCurToken.eType != TLGROUP)
1590                 return DoTerm(false); // nospace is no longer concerned
1591
1592             NextToken();
1593
1594             // allow for empty group
1595             if (m_aCurToken.eType == TRGROUP)
1596             {
1597                 std::unique_ptr<SmStructureNode> xSNode(new SmExpressionNode(m_aCurToken));
1598                 xSNode->SetSelection(m_aCurESelection);
1599                 xSNode->SetSubNodes(nullptr, nullptr);
1600
1601                 NextToken();
1602                 return std::unique_ptr<SmNode>(xSNode.release());
1603             }
1604
1605             auto pNode = DoAlign(!bNoSpace);
1606             if (m_aCurToken.eType == TRGROUP)
1607             {
1608                 NextToken();
1609                 return pNode;
1610             }
1611             auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
1612             xSNode->SetSelection(m_aCurESelection);
1613             std::unique_ptr<SmNode> xError(DoError(SmParseError::RgroupExpected));
1614             xSNode->SetSubNodes(std::move(pNode), std::move(xError));
1615             return std::unique_ptr<SmNode>(xSNode.release());
1616         }
1617
1618         case TLEFT:
1619             return DoBrace();
1620         case TEVALUATE:
1621             return DoEvaluate();
1622
1623         case TBLANK:
1624         case TSBLANK:
1625             return DoBlank();
1626
1627         case TTEXT:
1628         {
1629             auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_TEXT);
1630             pNode->SetSelection(m_aCurESelection);
1631             NextToken();
1632             return std::unique_ptr<SmNode>(pNode.release());
1633         }
1634         case TCHARACTER:
1635         {
1636             auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_VARIABLE);
1637             pNode->SetSelection(m_aCurESelection);
1638             NextToken();
1639             return std::unique_ptr<SmNode>(pNode.release());
1640         }
1641         case TIDENT:
1642         case TNUMBER:
1643         {
1644             auto pTextNode = std::make_unique<SmTextNode>(
1645                 m_aCurToken, m_aCurToken.eType == TNUMBER ? FNT_NUMBER : FNT_VARIABLE);
1646             pTextNode->SetSelection(m_aCurESelection);
1647             if (!bGroupNumberIdent)
1648             {
1649                 NextToken();
1650                 return std::unique_ptr<SmNode>(pTextNode.release());
1651             }
1652             std::vector<std::unique_ptr<SmNode>> aNodes;
1653             // Some people want to be able to write "x_2n" for "x_{2n}"
1654             // although e.g. LaTeX or AsciiMath interpret that as "x_2 n".
1655             // The tokenizer skips whitespaces so we need some additional
1656             // work to distinguish from "x_2 n".
1657             // See https://bz.apache.org/ooo/show_bug.cgi?id=11752 and
1658             // https://bugs.libreoffice.org/show_bug.cgi?id=55853
1659             sal_Int32 nBufLen = m_aBufferString.getLength();
1660
1661             // We need to be careful to call NextToken() only after having
1662             // tested for a whitespace separator (otherwise it will be
1663             // skipped!)
1664             bool moveToNextToken = true;
1665             while (m_nBufferIndex < nBufLen
1666                    && m_pSysCC->getType(m_aBufferString, m_nBufferIndex)
1667                           != UnicodeType::SPACE_SEPARATOR)
1668             {
1669                 NextToken();
1670                 if (m_aCurToken.eType != TNUMBER && m_aCurToken.eType != TIDENT)
1671                 {
1672                     // Neither a number nor an identifier. We just moved to
1673                     // the next token, so no need to do that again.
1674                     moveToNextToken = false;
1675                     break;
1676                 }
1677                 aNodes.emplace_back(std::unique_ptr<SmNode>(new SmTextNode(
1678                     m_aCurToken, m_aCurToken.eType == TNUMBER ? FNT_NUMBER : FNT_VARIABLE)));
1679             }
1680             if (moveToNextToken)
1681                 NextToken();
1682             if (aNodes.empty())
1683                 return std::unique_ptr<SmNode>(pTextNode.release());
1684             // We have several concatenated identifiers and numbers.
1685             // Let's group them into one SmExpressionNode.
1686             aNodes.insert(aNodes.begin(), std::move(pTextNode));
1687             std::unique_ptr<SmExpressionNode> xNode(new SmExpressionNode(SmToken()));
1688             xNode->SetSubNodes(buildNodeArray(aNodes));
1689             return std::unique_ptr<SmNode>(xNode.release());
1690         }
1691         case TLEFTARROW:
1692         case TRIGHTARROW:
1693         case TUPARROW:
1694         case TDOWNARROW:
1695         case TCIRC:
1696         case TDRARROW:
1697         case TDLARROW:
1698         case TDLRARROW:
1699         case TEXISTS:
1700         case TNOTEXISTS:
1701         case TFORALL:
1702         case TPARTIAL:
1703         case TNABLA:
1704         case TLAPLACE:
1705         case TFOURIER:
1706         case TTOWARD:
1707         case TDOTSAXIS:
1708         case TDOTSDIAG:
1709         case TDOTSDOWN:
1710         case TDOTSLOW:
1711         case TDOTSUP:
1712         case TDOTSVERT:
1713         {
1714             auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1715             pNode->SetSelection(m_aCurESelection);
1716             NextToken();
1717             return std::unique_ptr<SmNode>(pNode.release());
1718         }
1719
1720         case TSETN:
1721         case TSETZ:
1722         case TSETQ:
1723         case TSETR:
1724         case TSETC:
1725         case THBAR:
1726         case TLAMBDABAR:
1727         case TBACKEPSILON:
1728         case TALEPH:
1729         case TIM:
1730         case TRE:
1731         case TWP:
1732         case TEMPTYSET:
1733         case TINFINITY:
1734         {
1735             auto pNode = std::make_unique<SmMathIdentifierNode>(m_aCurToken);
1736             pNode->SetSelection(m_aCurESelection);
1737             NextToken();
1738             return std::unique_ptr<SmNode>(pNode.release());
1739         }
1740
1741         case TPLACE:
1742         {
1743             auto pNode = std::make_unique<SmPlaceNode>(m_aCurToken);
1744             pNode->SetSelection(m_aCurESelection);
1745             NextToken();
1746             return std::unique_ptr<SmNode>(pNode.release());
1747         }
1748
1749         case TSPECIAL:
1750             return DoSpecial();
1751
1752         case TBINOM:
1753             return DoBinom();
1754
1755         case TFRAC:
1756             return DoFrac();
1757
1758         case TSTACK:
1759             return DoStack();
1760
1761         case TMATRIX:
1762             return DoMatrix();
1763
1764         case THEX:
1765             NextTokenFontSize();
1766             if (m_aCurToken.eType == THEX)
1767             {
1768                 auto pTextNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_NUMBER);
1769                 pTextNode->SetSelection(m_aCurESelection);
1770                 NextToken();
1771                 return pTextNode;
1772             }
1773             else
1774                 return DoError(SmParseError::NumberExpected);
1775         default:
1776             if (TokenInGroup(TG::LBrace))
1777                 return DoBrace();
1778             if (TokenInGroup(TG::Oper))
1779                 return DoOperator();
1780             if (TokenInGroup(TG::UnOper))
1781                 return DoUnOper();
1782             if (TokenInGroup(TG::Attribute) || TokenInGroup(TG::FontAttr))
1783             {
1784                 std::stack<std::unique_ptr<SmStructureNode>,
1785                            std::vector<std::unique_ptr<SmStructureNode>>>
1786                     aStack;
1787                 bool bIsAttr;
1788                 for (;;)
1789                 {
1790                     bIsAttr = TokenInGroup(TG::Attribute);
1791                     if (!bIsAttr && !TokenInGroup(TG::FontAttr))
1792                         break;
1793                     aStack.push(bIsAttr ? DoAttribute() : DoFontAttribute());
1794                 }
1795
1796                 auto xFirstNode = DoPower();
1797                 while (!aStack.empty())
1798                 {
1799                     std::unique_ptr<SmStructureNode> xNode = std::move(aStack.top());
1800                     aStack.pop();
1801                     xNode->SetSubNodes(nullptr, std::move(xFirstNode));
1802                     xFirstNode = std::move(xNode);
1803                 }
1804                 return xFirstNode;
1805             }
1806             if (TokenInGroup(TG::Function))
1807                 return DoFunction();
1808             return DoError(SmParseError::UnexpectedChar);
1809     }
1810 }
1811
1812 std::unique_ptr<SmNode> SmParser5::DoEscape()
1813 {
1814     DepthProtect aDepthGuard(m_nParseDepth);
1815
1816     NextToken();
1817
1818     switch (m_aCurToken.eType)
1819     {
1820         case TLPARENT:
1821         case TRPARENT:
1822         case TLBRACKET:
1823         case TRBRACKET:
1824         case TLDBRACKET:
1825         case TRDBRACKET:
1826         case TLBRACE:
1827         case TLGROUP:
1828         case TRBRACE:
1829         case TRGROUP:
1830         case TLANGLE:
1831         case TRANGLE:
1832         case TLCEIL:
1833         case TRCEIL:
1834         case TLFLOOR:
1835         case TRFLOOR:
1836         case TLLINE:
1837         case TRLINE:
1838         case TLDLINE:
1839         case TRDLINE:
1840         {
1841             auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1842             pNode->SetSelection(m_aCurESelection);
1843             NextToken();
1844             return std::unique_ptr<SmNode>(pNode.release());
1845         }
1846         default:
1847             return DoError(SmParseError::UnexpectedToken);
1848     }
1849 }
1850
1851 std::unique_ptr<SmOperNode> SmParser5::DoOperator()
1852 {
1853     DepthProtect aDepthGuard(m_nParseDepth);
1854
1855     assert(TokenInGroup(TG::Oper));
1856
1857     auto xSNode = std::make_unique<SmOperNode>(m_aCurToken);
1858     xSNode->SetSelection(m_aCurESelection);
1859
1860     // get operator
1861     auto xOperator = DoOper();
1862
1863     if (m_aCurToken.nGroup == TG::Limit || m_aCurToken.nGroup == TG::Power)
1864         xOperator = DoSubSup(m_aCurToken.nGroup, std::move(xOperator));
1865
1866     // get argument
1867     auto xArg = DoPower();
1868
1869     xSNode->SetSubNodes(std::move(xOperator), std::move(xArg));
1870     return xSNode;
1871 }
1872
1873 std::unique_ptr<SmNode> SmParser5::DoOper()
1874 {
1875     DepthProtect aDepthGuard(m_nParseDepth);
1876
1877     SmTokenType eType(m_aCurToken.eType);
1878     std::unique_ptr<SmNode> pNode;
1879
1880     switch (eType)
1881     {
1882         case TSUM:
1883         case TPROD:
1884         case TCOPROD:
1885         case TINT:
1886         case TINTD:
1887         case TIINT:
1888         case TIIINT:
1889         case TLINT:
1890         case TLLINT:
1891         case TLLLINT:
1892             pNode.reset(new SmMathSymbolNode(m_aCurToken));
1893             pNode->SetSelection(m_aCurESelection);
1894             break;
1895
1896         case TLIM:
1897         case TLIMSUP:
1898         case TLIMINF:
1899             m_aCurToken.aText
1900                 = eType == TLIMSUP ? u"lim sup" : eType == TLIMINF ? u"lim inf" : u"lim";
1901             pNode.reset(new SmTextNode(m_aCurToken, FNT_TEXT));
1902             pNode->SetSelection(m_aCurESelection);
1903             break;
1904
1905         case TOPER:
1906             NextToken();
1907             OSL_ENSURE(m_aCurToken.eType == TSPECIAL, "Sm: wrong token");
1908             m_aCurToken.eType = TOPER;
1909             pNode.reset(new SmGlyphSpecialNode(m_aCurToken));
1910             pNode->SetSelection(m_aCurESelection);
1911             break;
1912
1913         default:
1914             assert(false && "unknown case");
1915     }
1916
1917     NextToken();
1918     return pNode;
1919 }
1920
1921 std::unique_ptr<SmStructureNode> SmParser5::DoUnOper()
1922 {
1923     DepthProtect aDepthGuard(m_nParseDepth);
1924
1925     assert(TokenInGroup(TG::UnOper));
1926
1927     SmToken aNodeToken = m_aCurToken;
1928     ESelection aESelection = m_aCurESelection;
1929     SmTokenType eType = m_aCurToken.eType;
1930     bool bIsPostfix = eType == TFACT;
1931
1932     std::unique_ptr<SmStructureNode> xSNode;
1933     std::unique_ptr<SmNode> xOper;
1934     std::unique_ptr<SmNode> xExtra;
1935     std::unique_ptr<SmNode> xArg;
1936
1937     switch (eType)
1938     {
1939         case TABS:
1940         case TSQRT:
1941             NextToken();
1942             break;
1943
1944         case TNROOT:
1945             NextToken();
1946             xExtra = DoPower();
1947             break;
1948
1949         case TUOPER:
1950             NextToken();
1951             //Let the glyph know what it is...
1952             m_aCurToken.eType = TUOPER;
1953             m_aCurToken.nGroup = TG::UnOper;
1954             xOper = DoGlyphSpecial();
1955             break;
1956
1957         case TPLUS:
1958         case TMINUS:
1959         case TPLUSMINUS:
1960         case TMINUSPLUS:
1961         case TNEG:
1962         case TFACT:
1963             xOper = DoOpSubSup();
1964             break;
1965
1966         default:
1967             assert(false);
1968     }
1969
1970     // get argument
1971     xArg = DoPower();
1972
1973     if (eType == TABS)
1974     {
1975         xSNode.reset(new SmBraceNode(aNodeToken));
1976         xSNode->SetSelection(aESelection);
1977         xSNode->SetScaleMode(SmScaleMode::Height);
1978
1979         // build nodes for left & right lines
1980         // (text, group, level of the used token are of no interest here)
1981         // we'll use row & column of the keyword for abs
1982         aNodeToken.eType = TABS;
1983
1984         aNodeToken.setChar(MS_VERTLINE);
1985         std::unique_ptr<SmNode> xLeft(new SmMathSymbolNode(aNodeToken));
1986         xLeft->SetSelection(aESelection);
1987         std::unique_ptr<SmNode> xRight(new SmMathSymbolNode(aNodeToken));
1988         xRight->SetSelection(aESelection);
1989
1990         xSNode->SetSubNodes(std::move(xLeft), std::move(xArg), std::move(xRight));
1991     }
1992     else if (eType == TSQRT || eType == TNROOT)
1993     {
1994         xSNode.reset(new SmRootNode(aNodeToken));
1995         xSNode->SetSelection(aESelection);
1996         xOper.reset(new SmRootSymbolNode(aNodeToken));
1997         xOper->SetSelection(aESelection);
1998         xSNode->SetSubNodes(std::move(xExtra), std::move(xOper), std::move(xArg));
1999     }
2000     else
2001     {
2002         xSNode.reset(new SmUnHorNode(aNodeToken));
2003         xSNode->SetSelection(aESelection);
2004         if (bIsPostfix)
2005             xSNode->SetSubNodes(std::move(xArg), std::move(xOper));
2006         else
2007         {
2008             // prefix operator
2009             xSNode->SetSubNodes(std::move(xOper), std::move(xArg));
2010         }
2011     }
2012     return xSNode;
2013 }
2014
2015 std::unique_ptr<SmStructureNode> SmParser5::DoAttribute()
2016 {
2017     DepthProtect aDepthGuard(m_nParseDepth);
2018
2019     assert(TokenInGroup(TG::Attribute));
2020
2021     auto xSNode = std::make_unique<SmAttributeNode>(m_aCurToken);
2022     xSNode->SetSelection(m_aCurESelection);
2023     std::unique_ptr<SmNode> xAttr;
2024     SmScaleMode eScaleMode = SmScaleMode::None;
2025
2026     // get appropriate node for the attribute itself
2027     switch (m_aCurToken.eType)
2028     {
2029         case TUNDERLINE:
2030         case TOVERLINE:
2031         case TOVERSTRIKE:
2032             xAttr.reset(new SmRectangleNode(m_aCurToken));
2033             xAttr->SetSelection(m_aCurESelection);
2034             eScaleMode = SmScaleMode::Width;
2035             break;
2036
2037         case TWIDEVEC:
2038         case TWIDEHARPOON:
2039         case TWIDEHAT:
2040         case TWIDETILDE:
2041             xAttr.reset(new SmMathSymbolNode(m_aCurToken));
2042             xAttr->SetSelection(m_aCurESelection);
2043             eScaleMode = SmScaleMode::Width;
2044             break;
2045
2046         default:
2047             xAttr.reset(new SmMathSymbolNode(m_aCurToken));
2048             xAttr->SetSelection(m_aCurESelection);
2049     }
2050
2051     NextToken();
2052
2053     xSNode->SetSubNodes(std::move(xAttr), nullptr); // the body will be filled later
2054     xSNode->SetScaleMode(eScaleMode);
2055     return xSNode;
2056 }
2057
2058 std::unique_ptr<SmStructureNode> SmParser5::DoFontAttribute()
2059 {
2060     DepthProtect aDepthGuard(m_nParseDepth);
2061
2062     assert(TokenInGroup(TG::FontAttr));
2063
2064     switch (m_aCurToken.eType)
2065     {
2066         case TITALIC:
2067         case TNITALIC:
2068         case TBOLD:
2069         case TNBOLD:
2070         case TPHANTOM:
2071         {
2072             auto pNode = std::make_unique<SmFontNode>(m_aCurToken);
2073             pNode->SetSelection(m_aCurESelection);
2074             NextToken();
2075             return pNode;
2076         }
2077
2078         case TSIZE:
2079             return DoFontSize();
2080
2081         case TFONT:
2082             return DoFont();
2083
2084         case TCOLOR:
2085             return DoColor();
2086
2087         default:
2088             assert(false);
2089             return {};
2090     }
2091 }
2092
2093 std::unique_ptr<SmStructureNode> SmParser5::DoColor()
2094 {
2095     DepthProtect aDepthGuard(m_nParseDepth);
2096
2097     assert(m_aCurToken.eType == TCOLOR);
2098     sal_Int32 nBufferIndex = m_nBufferIndex;
2099     NextTokenColor(TCOLOR);
2100     SmToken aToken;
2101     ESelection aESelection;
2102
2103     if (m_aCurToken.eType == TDVIPSNAMESCOL)
2104         NextTokenColor(TDVIPSNAMESCOL);
2105     if (m_aCurToken.eType == TERROR)
2106         return DoError(SmParseError::ColorExpected);
2107     if (TokenInGroup(TG::Color))
2108     {
2109         aToken = m_aCurToken;
2110         aESelection = m_aCurESelection;
2111         if (m_aCurToken.eType == TRGB) //loads r, g and b
2112         {
2113             sal_uInt32 nr, ng, nb, nc;
2114             NextTokenFontSize();
2115             if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2116                 return DoError(SmParseError::ColorExpected);
2117             nr = m_aCurToken.aText.toUInt32();
2118             if (nr > 255)
2119                 return DoError(SmParseError::ColorExpected);
2120             NextTokenFontSize();
2121             if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2122                 return DoError(SmParseError::ColorExpected);
2123             ng = m_aCurToken.aText.toUInt32();
2124             if (ng > 255)
2125                 return DoError(SmParseError::ColorExpected);
2126             NextTokenFontSize();
2127             if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2128                 return DoError(SmParseError::ColorExpected);
2129             nb = m_aCurToken.aText.toUInt32();
2130             if (nb > 255)
2131                 return DoError(SmParseError::ColorExpected);
2132             nc = nb | ng << 8 | nr << 16 | sal_uInt32(0) << 24;
2133             aToken.cMathChar = OUString::number(nc, 16);
2134         }
2135         else if (m_aCurToken.eType == TRGBA) //loads r, g and b
2136         {
2137             sal_uInt32 nr, na, ng, nb, nc;
2138             NextTokenFontSize();
2139             if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2140                 return DoError(SmParseError::ColorExpected);
2141             nr = m_aCurToken.aText.toUInt32();
2142             if (nr > 255)
2143                 return DoError(SmParseError::ColorExpected);
2144             NextTokenFontSize();
2145             if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2146                 return DoError(SmParseError::ColorExpected);
2147             ng = m_aCurToken.aText.toUInt32();
2148             if (ng > 255)
2149                 return DoError(SmParseError::ColorExpected);
2150             NextTokenFontSize();
2151             if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2152                 return DoError(SmParseError::ColorExpected);
2153             nb = m_aCurToken.aText.toUInt32();
2154             if (nb > 255)
2155                 return DoError(SmParseError::ColorExpected);
2156             NextTokenFontSize();
2157             if (lcl_IsNotWholeNumber(m_aCurToken.aText))
2158                 return DoError(SmParseError::ColorExpected);
2159             na = m_aCurToken.aText.toUInt32();
2160             if (na > 255)
2161                 return DoError(SmParseError::ColorExpected);
2162             nc = nb | ng << 8 | nr << 16 | na << 24;
2163             aToken.cMathChar = OUString::number(nc, 16);
2164         }
2165         else if (m_aCurToken.eType == THEX) //loads hex code
2166         {
2167             sal_uInt32 nc;
2168             NextTokenFontSize();
2169             if (lcl_IsNotWholeNumber16(m_aCurToken.aText))
2170                 return DoError(SmParseError::ColorExpected);
2171             nc = m_aCurToken.aText.toUInt32(16);
2172             aToken.cMathChar = OUString::number(nc, 16);
2173         }
2174         aToken.aText = m_aBufferString.subView(nBufferIndex, m_nBufferIndex - nBufferIndex);
2175         NextToken();
2176     }
2177     else
2178         return DoError(SmParseError::ColorExpected);
2179
2180     std::unique_ptr<SmStructureNode> xNode;
2181     xNode.reset(new SmFontNode(aToken));
2182     xNode->SetSelection(aESelection);
2183     return xNode;
2184 }
2185
2186 std::unique_ptr<SmStructureNode> SmParser5::DoFont()
2187 {
2188     DepthProtect aDepthGuard(m_nParseDepth);
2189
2190     assert(m_aCurToken.eType == TFONT);
2191
2192     std::unique_ptr<SmStructureNode> xNode;
2193     // last font rules, get that one
2194     SmToken aToken;
2195     ESelection aESelection = m_aCurESelection;
2196     do
2197     {
2198         NextToken();
2199
2200         if (TokenInGroup(TG::Font))
2201         {
2202             aToken = m_aCurToken;
2203             NextToken();
2204         }
2205         else
2206         {
2207             return DoError(SmParseError::FontExpected);
2208         }
2209     } while (m_aCurToken.eType == TFONT);
2210
2211     xNode.reset(new SmFontNode(aToken));
2212     xNode->SetSelection(aESelection);
2213     return xNode;
2214 }
2215
2216 std::unique_ptr<SmStructureNode> SmParser5::DoFontSize()
2217 {
2218     DepthProtect aDepthGuard(m_nParseDepth);
2219     std::unique_ptr<SmFontNode> pFontNode(new SmFontNode(m_aCurToken));
2220     pFontNode->SetSelection(m_aCurESelection);
2221     NextTokenFontSize();
2222     FontSizeType Type;
2223
2224     switch (m_aCurToken.eType)
2225     {
2226         case THEX:
2227             Type = FontSizeType::ABSOLUT;
2228             break;
2229         case TPLUS:
2230             Type = FontSizeType::PLUS;
2231             break;
2232         case TMINUS:
2233             Type = FontSizeType::MINUS;
2234             break;
2235         case TMULTIPLY:
2236             Type = FontSizeType::MULTIPLY;
2237             break;
2238         case TDIVIDEBY:
2239             Type = FontSizeType::DIVIDE;
2240             break;
2241
2242         default:
2243             return DoError(SmParseError::SizeExpected);
2244     }
2245
2246     if (Type != FontSizeType::ABSOLUT)
2247     {
2248         NextTokenFontSize();
2249         if (m_aCurToken.eType != THEX)
2250             return DoError(SmParseError::SizeExpected);
2251     }
2252
2253     // get number argument
2254     Fraction aValue(1);
2255     if (lcl_IsNumber(m_aCurToken.aText))
2256     {
2257         aValue = m_aCurToken.aText.toDouble();
2258         //!! Reduce values in order to avoid numerical errors
2259         if (aValue.GetDenominator() > 1000)
2260         {
2261             tools::Long nNum = aValue.GetNumerator();
2262             tools::Long nDenom = aValue.GetDenominator();
2263             while (nDenom > 1000) //remove big denominator
2264             {
2265                 nNum /= 10;
2266                 nDenom /= 10;
2267             }
2268             aValue = Fraction(nNum, nDenom);
2269         }
2270     }
2271     else
2272         return DoError(SmParseError::SizeExpected);
2273
2274     pFontNode->SetSizeParameter(aValue, Type);
2275     NextToken();
2276     return pFontNode;
2277 }
2278
2279 std::unique_ptr<SmStructureNode> SmParser5::DoBrace()
2280 {
2281     DepthProtect aDepthGuard(m_nParseDepth);
2282
2283     assert(m_aCurToken.eType == TLEFT || TokenInGroup(TG::LBrace));
2284
2285     std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
2286     xSNode->SetSelection(m_aCurESelection);
2287     std::unique_ptr<SmNode> pBody, pLeft, pRight;
2288     SmScaleMode eScaleMode = SmScaleMode::None;
2289     SmParseError eError = SmParseError::None;
2290
2291     if (m_aCurToken.eType == TLEFT)
2292     {
2293         NextToken();
2294
2295         eScaleMode = SmScaleMode::Height;
2296
2297         // check for left bracket
2298         if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
2299         {
2300             pLeft.reset(new SmMathSymbolNode(m_aCurToken));
2301             pLeft->SetSelection(m_aCurESelection);
2302
2303             NextToken();
2304             pBody = DoBracebody(true);
2305
2306             if (m_aCurToken.eType == TRIGHT)
2307             {
2308                 NextToken();
2309
2310                 // check for right bracket
2311                 if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
2312                 {
2313                     pRight.reset(new SmMathSymbolNode(m_aCurToken));
2314                     pRight->SetSelection(m_aCurESelection);
2315                     NextToken();
2316                 }
2317                 else
2318                     eError = SmParseError::RbraceExpected;
2319             }
2320             else
2321                 eError = SmParseError::RightExpected;
2322         }
2323         else
2324             eError = SmParseError::LbraceExpected;
2325     }
2326     else
2327     {
2328         assert(TokenInGroup(TG::LBrace));
2329
2330         pLeft.reset(new SmMathSymbolNode(m_aCurToken));
2331         pLeft->SetSelection(m_aCurESelection);
2332
2333         NextToken();
2334         pBody = DoBracebody(false);
2335
2336         SmTokenType eExpectedType = TUNKNOWN;
2337         switch (pLeft->GetToken().eType)
2338         {
2339             case TLPARENT:
2340                 eExpectedType = TRPARENT;
2341                 break;
2342             case TLBRACKET:
2343                 eExpectedType = TRBRACKET;
2344                 break;
2345             case TLBRACE:
2346                 eExpectedType = TRBRACE;
2347                 break;
2348             case TLDBRACKET:
2349                 eExpectedType = TRDBRACKET;
2350                 break;
2351             case TLLINE:
2352                 eExpectedType = TRLINE;
2353                 break;
2354             case TLDLINE:
2355                 eExpectedType = TRDLINE;
2356                 break;
2357             case TLANGLE:
2358                 eExpectedType = TRANGLE;
2359                 break;
2360             case TLFLOOR:
2361                 eExpectedType = TRFLOOR;
2362                 break;
2363             case TLCEIL:
2364                 eExpectedType = TRCEIL;
2365                 break;
2366             case TLRLINE:
2367                 eExpectedType = TLRLINE;
2368                 break;
2369             case TLRDLINE:
2370                 eExpectedType = TLRDLINE;
2371                 break;
2372             default:
2373                 SAL_WARN("starmath", "unknown case");
2374         }
2375
2376         if (m_aCurToken.eType == eExpectedType)
2377         {
2378             pRight.reset(new SmMathSymbolNode(m_aCurToken));
2379             pRight->SetSelection(m_aCurESelection);
2380             NextToken();
2381         }
2382         else
2383             eError = SmParseError::ParentMismatch;
2384     }
2385
2386     if (eError == SmParseError::None)
2387     {
2388         assert(pLeft);
2389         assert(pRight);
2390         xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight));
2391         xSNode->SetScaleMode(eScaleMode);
2392         return xSNode;
2393     }
2394     return DoError(eError);
2395 }
2396
2397 std::unique_ptr<SmBracebodyNode> SmParser5::DoBracebody(bool bIsLeftRight)
2398 {
2399     DepthProtect aDepthGuard(m_nParseDepth);
2400
2401     auto pBody = std::make_unique<SmBracebodyNode>(m_aCurToken);
2402     pBody->SetSelection(m_aCurESelection);
2403
2404     std::vector<std::unique_ptr<SmNode>> aNodes;
2405     // get body if any
2406     if (bIsLeftRight)
2407     {
2408         do
2409         {
2410             if (m_aCurToken.eType == TMLINE)
2411             {
2412                 SmMathSymbolNode* pTempNode = new SmMathSymbolNode(m_aCurToken);
2413                 pTempNode->SetSelection(m_aCurESelection);
2414                 aNodes.emplace_back(std::unique_ptr<SmMathSymbolNode>(pTempNode));
2415                 NextToken();
2416             }
2417             else if (m_aCurToken.eType != TRIGHT)
2418             {
2419                 aNodes.push_back(DoAlign());
2420                 if (m_aCurToken.eType != TMLINE && m_aCurToken.eType != TRIGHT)
2421                     aNodes.emplace_back(DoError(SmParseError::RightExpected));
2422             }
2423         } while (m_aCurToken.eType != TEND && m_aCurToken.eType != TRIGHT);
2424     }
2425     else
2426     {
2427         do
2428         {
2429             if (m_aCurToken.eType == TMLINE)
2430             {
2431                 SmMathSymbolNode* pTempNode = new SmMathSymbolNode(m_aCurToken);
2432                 pTempNode->SetSelection(m_aCurESelection);
2433                 aNodes.emplace_back(std::unique_ptr<SmMathSymbolNode>(pTempNode));
2434                 NextToken();
2435             }
2436             else if (!TokenInGroup(TG::RBrace))
2437             {
2438                 aNodes.push_back(DoAlign());
2439                 if (m_aCurToken.eType != TMLINE && !TokenInGroup(TG::RBrace))
2440                     aNodes.emplace_back(DoError(SmParseError::RbraceExpected));
2441             }
2442         } while (m_aCurToken.eType != TEND && !TokenInGroup(TG::RBrace));
2443     }
2444
2445     pBody->SetSubNodes(buildNodeArray(aNodes));
2446     pBody->SetScaleMode(bIsLeftRight ? SmScaleMode::Height : SmScaleMode::None);
2447     return pBody;
2448 }
2449
2450 std::unique_ptr<SmNode> SmParser5::DoEvaluate()
2451 {
2452     DepthProtect aDepthGuard(m_nParseDepth);
2453
2454     // Create node
2455     std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
2456     xSNode->SetSelection(m_aCurESelection);
2457     SmToken aToken(TRLINE, MS_VERTLINE, "evaluate", TG::RBrace, 5);
2458
2459     // Parse body && left none
2460     NextToken();
2461     std::unique_ptr<SmNode> pBody = DoPower();
2462     SmToken bToken(TNONE, '\0', "", TG::LBrace, 5);
2463     std::unique_ptr<SmNode> pLeft;
2464     pLeft.reset(new SmMathSymbolNode(bToken));
2465
2466     // Mount nodes
2467     std::unique_ptr<SmNode> pRight;
2468     pRight.reset(new SmMathSymbolNode(aToken));
2469     xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight));
2470     xSNode->SetScaleMode(SmScaleMode::Height); // scalable line
2471
2472     // Parse from to
2473     if (m_aCurToken.nGroup == TG::Limit)
2474     {
2475         std::unique_ptr<SmNode> rSNode;
2476         rSNode = DoSubSupEvaluate(std::move(xSNode));
2477         rSNode->GetToken().eType = TEVALUATE;
2478         return rSNode;
2479     }
2480
2481     return xSNode;
2482 }
2483
2484 std::unique_ptr<SmTextNode> SmParser5::DoFunction()
2485 {
2486     DepthProtect aDepthGuard(m_nParseDepth);
2487
2488     if (m_aCurToken.eType == TFUNC)
2489     {
2490         NextToken(); // skip "FUNC"-statement
2491         m_aCurToken.eType = TFUNC;
2492         m_aCurToken.nGroup = TG::Function;
2493     }
2494     auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_FUNCTION);
2495     pNode->SetSelection(m_aCurESelection);
2496     NextToken();
2497     return pNode;
2498 }
2499
2500 std::unique_ptr<SmTableNode> SmParser5::DoBinom()
2501 {
2502     DepthProtect aDepthGuard(m_nParseDepth);
2503
2504     auto xSNode = std::make_unique<SmTableNode>(m_aCurToken);
2505     xSNode->SetSelection(m_aCurESelection);
2506
2507     NextToken();
2508
2509     auto xFirst = DoSum();
2510     auto xSecond = DoSum();
2511     xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond));
2512     return xSNode;
2513 }
2514
2515 std::unique_ptr<SmBinVerNode> SmParser5::DoFrac()
2516 {
2517     DepthProtect aDepthGuard(m_nParseDepth);
2518
2519     std::unique_ptr<SmBinVerNode> xSNode = std::make_unique<SmBinVerNode>(m_aCurToken);
2520     xSNode->SetSelection(m_aCurESelection);
2521     std::unique_ptr<SmNode> xOper = std::make_unique<SmRectangleNode>(m_aCurToken);
2522     xOper->SetSelection(m_aCurESelection);
2523
2524     NextToken();
2525
2526     auto xFirst = DoSum();
2527     auto xSecond = DoSum();
2528     xSNode->SetSubNodes(std::move(xFirst), std::move(xOper), std::move(xSecond));
2529     return xSNode;
2530 }
2531
2532 std::unique_ptr<SmStructureNode> SmParser5::DoStack()
2533 {
2534     DepthProtect aDepthGuard(m_nParseDepth);
2535
2536     std::unique_ptr<SmStructureNode> xSNode(new SmTableNode(m_aCurToken));
2537     xSNode->SetSelection(m_aCurESelection);
2538     NextToken();
2539     if (m_aCurToken.eType != TLGROUP)
2540         return DoError(SmParseError::LgroupExpected);
2541     std::vector<std::unique_ptr<SmNode>> aExprArr;
2542     do
2543     {
2544         NextToken();
2545         aExprArr.push_back(DoAlign());
2546     } while (m_aCurToken.eType == TPOUND);
2547
2548     if (m_aCurToken.eType == TRGROUP)
2549         NextToken();
2550     else
2551         aExprArr.emplace_back(DoError(SmParseError::RgroupExpected));
2552
2553     xSNode->SetSubNodes(buildNodeArray(aExprArr));
2554     return xSNode;
2555 }
2556
2557 std::unique_ptr<SmStructureNode> SmParser5::DoMatrix()
2558 {
2559     DepthProtect aDepthGuard(m_nParseDepth);
2560
2561     std::unique_ptr<SmMatrixNode> xMNode(new SmMatrixNode(m_aCurToken));
2562     xMNode->SetSelection(m_aCurESelection);
2563     NextToken();
2564     if (m_aCurToken.eType != TLGROUP)
2565         return DoError(SmParseError::LgroupExpected);
2566
2567     std::vector<std::unique_ptr<SmNode>> aExprArr;
2568     do
2569     {
2570         NextToken();
2571         aExprArr.push_back(DoAlign());
2572     } while (m_aCurToken.eType == TPOUND);
2573
2574     size_t nCol = aExprArr.size();
2575     size_t nRow = 1;
2576     while (m_aCurToken.eType == TDPOUND)
2577     {
2578         NextToken();
2579         for (size_t i = 0; i < nCol; i++)
2580         {
2581             auto xNode = DoAlign();
2582             if (i < (nCol - 1))
2583             {
2584                 if (m_aCurToken.eType == TPOUND)
2585                     NextToken();
2586                 else
2587                     xNode = DoError(SmParseError::PoundExpected);
2588             }
2589             aExprArr.emplace_back(std::move(xNode));
2590         }
2591         ++nRow;
2592     }
2593
2594     if (m_aCurToken.eType == TRGROUP)
2595         NextToken();
2596     else
2597     {
2598         std::unique_ptr<SmNode> xENode(DoError(SmParseError::RgroupExpected));
2599         if (aExprArr.empty())
2600             nRow = nCol = 1;
2601         else
2602             aExprArr.pop_back();
2603         aExprArr.emplace_back(std::move(xENode));
2604     }
2605
2606     xMNode->SetSubNodes(buildNodeArray(aExprArr));
2607     xMNode->SetRowCol(static_cast<sal_uInt16>(nRow), static_cast<sal_uInt16>(nCol));
2608     return std::unique_ptr<SmStructureNode>(xMNode.release());
2609 }
2610
2611 std::unique_ptr<SmSpecialNode> SmParser5::DoSpecial()
2612 {
2613     DepthProtect aDepthGuard(m_nParseDepth);
2614
2615     bool bReplace = false;
2616     OUString& rName = m_aCurToken.aText;
2617     OUString aNewName;
2618
2619     // conversion of symbol names for 6.0 (XML) file format
2620     // (name change on import / export.
2621     // UI uses localized names XML file format does not.)
2622     if (rName.startsWith("%"))
2623     {
2624         if (IsImportSymbolNames())
2625         {
2626             aNewName = SmLocalizedSymbolData::GetUiSymbolName(rName.subView(1));
2627             bReplace = true;
2628         }
2629         else if (IsExportSymbolNames())
2630         {
2631             aNewName = SmLocalizedSymbolData::GetExportSymbolName(rName.subView(1));
2632             bReplace = true;
2633         }
2634     }
2635     if (!aNewName.isEmpty())
2636         aNewName = "%" + aNewName;
2637
2638     if (bReplace && !aNewName.isEmpty() && rName != aNewName)
2639     {
2640         Replace(GetTokenIndex(), rName.getLength(), aNewName);
2641         rName = aNewName;
2642     }
2643
2644     // add symbol name to list of used symbols
2645     const OUString aSymbolName(m_aCurToken.aText.copy(1));
2646     if (!aSymbolName.isEmpty())
2647         m_aUsedSymbols.insert(aSymbolName);
2648
2649     auto pNode = std::make_unique<SmSpecialNode>(m_aCurToken);
2650     pNode->SetSelection(m_aCurESelection);
2651     NextToken();
2652     return pNode;
2653 }
2654
2655 std::unique_ptr<SmGlyphSpecialNode> SmParser5::DoGlyphSpecial()
2656 {
2657     DepthProtect aDepthGuard(m_nParseDepth);
2658
2659     auto pNode = std::make_unique<SmGlyphSpecialNode>(m_aCurToken);
2660     NextToken();
2661     return pNode;
2662 }
2663
2664 std::unique_ptr<SmExpressionNode> SmParser5::DoError(SmParseError eError)
2665 {
2666     DepthProtect aDepthGuard(m_nParseDepth);
2667
2668     // Identify error message
2669     OUString sStrBuf(SmResId(RID_ERR_IDENT) + starmathdatabase::getParseErrorDesc(eError));
2670
2671     // Generate error node
2672     m_aCurToken.eType = TERROR;
2673     m_aCurToken.cMathChar = sStrBuf;
2674     auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
2675     SmErrorNode* pErr(new SmErrorNode(m_aCurToken));
2676     pErr->SetSelection(m_aCurESelection);
2677     xSNode->SetSubNode(0, pErr);
2678
2679     // Append error to the error list
2680     SmErrorDesc aErrDesc(eError, xSNode.get(), m_aCurToken.cMathChar);
2681     m_aErrDescList.push_back(aErrDesc);
2682
2683     NextToken();
2684
2685     return xSNode;
2686 }
2687
2688 // end grammar
2689
2690 SmParser5::SmParser5()
2691     : m_nCurError(0)
2692     , m_nBufferIndex(0)
2693     , m_nTokenIndex(0)
2694     , m_nRow(0)
2695     , m_nColOff(0)
2696     , m_bImportSymNames(false)
2697     , m_bExportSymNames(false)
2698     , m_nParseDepth(0)
2699     , m_aNumCC(LanguageTag(LANGUAGE_ENGLISH_US))
2700     , m_pSysCC(&SM_MOD()->GetSysLocale().GetCharClass())
2701 {
2702 }
2703
2704 SmParser5::~SmParser5() {}
2705
2706 std::unique_ptr<SmTableNode> SmParser5::Parse(const OUString& rBuffer)
2707 {
2708     m_aUsedSymbols.clear();
2709
2710     m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2711     m_nBufferIndex = 0;
2712     m_nTokenIndex = 0;
2713     m_nRow = 0;
2714     m_nColOff = 0;
2715     m_nCurError = -1;
2716
2717     m_aErrDescList.clear();
2718
2719     NextToken();
2720     return DoTable();
2721 }
2722
2723 std::unique_ptr<SmNode> SmParser5::ParseExpression(const OUString& rBuffer)
2724 {
2725     m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2726     m_nBufferIndex = 0;
2727     m_nTokenIndex = 0;
2728     m_nRow = 0;
2729     m_nColOff = 0;
2730     m_nCurError = -1;
2731
2732     m_aErrDescList.clear();
2733
2734     NextToken();
2735     return DoExpression();
2736 }
2737
2738 const SmErrorDesc* SmParser5::NextError()
2739 {
2740     if (!m_aErrDescList.empty())
2741         if (m_nCurError > 0)
2742             return &m_aErrDescList[--m_nCurError];
2743         else
2744         {
2745             m_nCurError = 0;
2746             return &m_aErrDescList[m_nCurError];
2747         }
2748     else
2749         return nullptr;
2750 }
2751
2752 const SmErrorDesc* SmParser5::PrevError()
2753 {
2754     if (!m_aErrDescList.empty())
2755         if (m_nCurError < static_cast<int>(m_aErrDescList.size() - 1))
2756             return &m_aErrDescList[++m_nCurError];
2757         else
2758         {
2759             m_nCurError = static_cast<int>(m_aErrDescList.size() - 1);
2760             return &m_aErrDescList[m_nCurError];
2761         }
2762     else
2763         return nullptr;
2764 }
2765
2766 const SmErrorDesc* SmParser5::GetError() const
2767 {
2768     if (m_aErrDescList.empty())
2769         return nullptr;
2770     return &m_aErrDescList.front();
2771 }
2772
2773 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */