starmath/source/parse.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <memory>
  21 #include <com/sun/star/i18n/UnicodeType.hpp>
  22 #include <com/sun/star/i18n/KParseTokens.hpp>
  23 #include <com/sun/star/i18n/KParseType.hpp>
  24 #include <i18nlangtag/lang.h>
  25 #include <tools/lineend.hxx>
  26 #include <unotools/configmgr.hxx>
  27 #include <unotools/syslocale.hxx>
  28 #include <sal/log.hxx>
  29 #include <osl/diagnose.h>
  30 #include <rtl/character.hxx>
  31 #include <node.hxx>
  32 #include <parse.hxx>
  33 #include <strings.hrc>
  34 #include <smmod.hxx>
  35 #include "cfgitem.hxx"
  36 #include <cassert>
  37 #include <stack>
  38 #include <starmathdatabase.hxx>
  39
  40 using namespace ::com::sun::star::i18n;
  41
  42 //Definition of math keywords
  43 const SmTokenTableEntry aTokenTable[] =
  44 {
  45     { "abs", TABS, '\0', TG::UnOper, 13 },
  46     { "acute", TACUTE, MS_ACUTE, TG::Attribute, 5 },
  47     { "aleph" , TALEPH, MS_ALEPH, TG::Standalone, 5 },
  48     { "alignb", TALIGNC, '\0', TG::Align, 0},
  49     { "alignc", TALIGNC, '\0', TG::Align, 0},
  50     { "alignl", TALIGNL, '\0', TG::Align, 0},
  51     { "alignm", TALIGNC, '\0', TG::Align, 0},
  52     { "alignr", TALIGNR, '\0', TG::Align, 0},
  53     { "alignt", TALIGNC, '\0', TG::Align, 0},
  54     { "and", TAND, MS_AND, TG::Product, 0},
  55     { "approx", TAPPROX, MS_APPROX, TG::Relation, 0},
  56     { "arccos", TACOS, '\0', TG::Function, 5},
  57     { "arccot", TACOT, '\0', TG::Function, 5},
  58     { "arcosh", TACOSH, '\0', TG::Function, 5 },
  59     { "arcoth", TACOTH, '\0', TG::Function, 5 },
  60     { "arcsin", TASIN, '\0', TG::Function, 5},
  61     { "arctan", TATAN, '\0', TG::Function, 5},
  62     { "arsinh", TASINH, '\0', TG::Function, 5},
  63     { "artanh", TATANH, '\0', TG::Function, 5},
  64     { "backepsilon" , TBACKEPSILON, MS_BACKEPSILON, TG::Standalone, 5},
  65     { "bar", TBAR, MS_BAR, TG::Attribute, 5},
  66     { "binom", TBINOM, '\0', TG::NONE, 5 },
  67     { "bold", TBOLD, '\0', TG::FontAttr, 5},
  68     { "boper", TBOPER, '\0', TG::Product, 0},
  69     { "breve", TBREVE, MS_BREVE, TG::Attribute, 5},
  70     { "bslash", TBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
  71     { "cdot", TCDOT, MS_CDOT, TG::Product, 0},
  72     { "check", TCHECK, MS_CHECK, TG::Attribute, 5},
  73     { "circ" , TCIRC, MS_CIRC, TG::Standalone, 5},
  74     { "circle", TCIRCLE, MS_CIRCLE, TG::Attribute, 5},
  75     { "color", TCOLOR, '\0', TG::FontAttr, 5},
  76     { "coprod", TCOPROD, MS_COPROD, TG::Oper, 5},
  77     { "cos", TCOS, '\0', TG::Function, 5},
  78     { "cosh", TCOSH, '\0', TG::Function, 5},
  79     { "cot", TCOT, '\0', TG::Function, 5},
  80     { "coth", TCOTH, '\0', TG::Function, 5},
  81     { "csub", TCSUB, '\0', TG::Power, 0},
  82     { "csup", TCSUP, '\0', TG::Power, 0},
  83     { "dddot", TDDDOT, MS_DDDOT, TG::Attribute, 5},
  84     { "ddot", TDDOT, MS_DDOT, TG::Attribute, 5},
  85     { "def", TDEF, MS_DEF, TG::Relation, 0},
  86     { "div", TDIV, MS_DIV, TG::Product, 0},
  87     { "divides", TDIVIDES, MS_LINE, TG::Relation, 0},
  88     { "dlarrow" , TDLARROW, MS_DLARROW, TG::Standalone, 5},
  89     { "dlrarrow" , TDLRARROW, MS_DLRARROW, TG::Standalone, 5},
  90     { "dot", TDOT, MS_DOT, TG::Attribute, 5},
  91     { "dotsaxis", TDOTSAXIS, MS_DOTSAXIS, TG::Standalone, 5}, // 5 to continue expression
  92     { "dotsdiag", TDOTSDIAG, MS_DOTSUP, TG::Standalone, 5},
  93     { "dotsdown", TDOTSDOWN, MS_DOTSDOWN, TG::Standalone, 5},
  94     { "dotslow", TDOTSLOW, MS_DOTSLOW, TG::Standalone, 5},
  95     { "dotsup", TDOTSUP, MS_DOTSUP, TG::Standalone, 5},
  96     { "dotsvert", TDOTSVERT, MS_DOTSVERT, TG::Standalone, 5},
  97     { "downarrow" , TDOWNARROW, MS_DOWNARROW, TG::Standalone, 5},
  98     { "drarrow" , TDRARROW, MS_DRARROW, TG::Standalone, 5},
  99     { "emptyset" , TEMPTYSET, MS_EMPTYSET, TG::Standalone, 5},
 100     { "equiv", TEQUIV, MS_EQUIV, TG::Relation, 0},
 101     { "evaluate", TEVALUATE, '\0', TG::NONE, 0},
 102     { "exists", TEXISTS, MS_EXISTS, TG::Standalone, 5},
 103     { "exp", TEXP, '\0', TG::Function, 5},
 104     { "fact", TFACT, MS_FACT, TG::UnOper, 5},
 105     { "fixed", TFIXED, '\0', TG::Font, 0},
 106     { "font", TFONT, '\0', TG::FontAttr, 5},
 107     { "forall", TFORALL, MS_FORALL, TG::Standalone, 5},
 108     { "fourier", TFOURIER, MS_FOURIER, TG::Standalone, 5},
 109     { "frac", TFRAC, '\0', TG::NONE, 5},
 110     { "from", TFROM, '\0', TG::Limit, 0},
 111     { "func", TFUNC, '\0', TG::Function, 5},
 112     { "ge", TGE, MS_GE, TG::Relation, 0},
 113     { "geslant", TGESLANT, MS_GESLANT, TG::Relation, 0 },
 114     { "gg", TGG, MS_GG, TG::Relation, 0},
 115     { "grave", TGRAVE, MS_GRAVE, TG::Attribute, 5},
 116     { "gt", TGT, MS_GT, TG::Relation, 0},
 117     { "harpoon", THARPOON, MS_HARPOON, TG::Attribute, 5},
 118     { "hat", THAT, MS_HAT, TG::Attribute, 5},
 119     { "hbar" , THBAR, MS_HBAR, TG::Standalone, 5},
 120     { "iiint", TIIINT, MS_IIINT, TG::Oper, 5},
 121     { "iint", TIINT, MS_IINT, TG::Oper, 5},
 122     { "im" , TIM, MS_IM, TG::Standalone, 5 },
 123     { "in", TIN, MS_IN, TG::Relation, 0},
 124     { "infinity" , TINFINITY, MS_INFINITY, TG::Standalone, 5},
 125     { "infty" , TINFINITY, MS_INFINITY, TG::Standalone, 5},
 126     { "int", TINT, MS_INT, TG::Oper, 5},
 127     { "intd", TINTD, MS_INT, TG::Oper, 5},
 128     { "intersection", TINTERSECT, MS_INTERSECT, TG::Product, 0},
 129     { "it", TIT, '\0', TG::Product, 0},
 130     { "ital", TITALIC, '\0', TG::FontAttr, 5},
 131     { "italic", TITALIC, '\0', TG::FontAttr, 5},
 132     { "lambdabar" , TLAMBDABAR, MS_LAMBDABAR, TG::Standalone, 5},
 133     { "langle", TLANGLE, MS_LMATHANGLE, TG::LBrace, 5},
 134     { "laplace", TLAPLACE, MS_LAPLACE, TG::Standalone, 5},
 135     { "lbrace", TLBRACE, MS_LBRACE, TG::LBrace, 5},
 136     { "lceil", TLCEIL, MS_LCEIL, TG::LBrace, 5},
 137     { "ldbracket", TLDBRACKET, MS_LDBRACKET, TG::LBrace, 5},
 138     { "ldline", TLDLINE, MS_DVERTLINE, TG::LBrace, 5},
 139     { "le", TLE, MS_LE, TG::Relation, 0},
 140     { "left", TLEFT, '\0', TG::NONE, 5},
 141     { "leftarrow" , TLEFTARROW, MS_LEFTARROW, TG::Standalone, 5},
 142     { "leslant", TLESLANT, MS_LESLANT, TG::Relation, 0 },
 143     { "lfloor", TLFLOOR, MS_LFLOOR, TG::LBrace, 5},
 144     { "lim", TLIM, '\0', TG::Oper, 5},
 145     { "liminf", TLIMINF, '\0', TG::Oper, 5},
 146     { "limsup", TLIMSUP, '\0', TG::Oper, 5},
 147     { "lint", TLINT, MS_LINT, TG::Oper, 5},
 148     { "ll", TLL, MS_LL, TG::Relation, 0},
 149     { "lline", TLLINE, MS_VERTLINE, TG::LBrace, 5},
 150     { "llint", TLLINT, MS_LLINT, TG::Oper, 5},
 151     { "lllint", TLLLINT, MS_LLLINT, TG::Oper, 5},
 152     { "ln", TLN, '\0', TG::Function, 5},
 153     { "log", TLOG, '\0', TG::Function, 5},
 154     { "lrline", TLRLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5},
 155     { "lrdline", TLRDLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5},
 156     { "lsub", TLSUB, '\0', TG::Power, 0},
 157     { "lsup", TLSUP, '\0', TG::Power, 0},
 158     { "lt", TLT, MS_LT, TG::Relation, 0},
 159     { "matrix", TMATRIX, '\0', TG::NONE, 5},
 160     { "minusplus", TMINUSPLUS, MS_MINUSPLUS, TG::UnOper | TG::Sum, 5},
 161     { "mline", TMLINE, MS_VERTLINE, TG::NONE, 0},      //! not in TG::RBrace, Level 0
 162     { "nabla", TNABLA, MS_NABLA, TG::Standalone, 5},
 163     { "nbold", TNBOLD, '\0', TG::FontAttr, 5},
 164     { "ndivides", TNDIVIDES, MS_NDIVIDES, TG::Relation, 0},
 165     { "neg", TNEG, MS_NEG, TG::UnOper, 5 },
 166     { "neq", TNEQ, MS_NEQ, TG::Relation, 0},
 167     { "newline", TNEWLINE, '\0', TG::NONE, 0},
 168     { "ni", TNI, MS_NI, TG::Relation, 0},
 169     { "nitalic", TNITALIC, '\0', TG::FontAttr, 5},
 170     { "none", TNONE, '\0', TG::LBrace | TG::RBrace, 0},
 171     { "nospace", TNOSPACE, '\0', TG::Standalone, 5},
 172     { "notexists", TNOTEXISTS, MS_NOTEXISTS, TG::Standalone, 5},
 173     { "notin", TNOTIN, MS_NOTIN, TG::Relation, 0},
 174     { "nprec", TNOTPRECEDES, MS_NOTPRECEDES, TG::Relation, 0 },
 175     { "nroot", TNROOT, MS_SQRT, TG::UnOper, 5},
 176     { "nsubset", TNSUBSET, MS_NSUBSET, TG::Relation, 0 },
 177     { "nsubseteq", TNSUBSETEQ, MS_NSUBSETEQ, TG::Relation, 0 },
 178     { "nsucc", TNOTSUCCEEDS, MS_NOTSUCCEEDS, TG::Relation, 0 },
 179     { "nsupset", TNSUPSET, MS_NSUPSET, TG::Relation, 0 },
 180     { "nsupseteq", TNSUPSETEQ, MS_NSUPSETEQ, TG::Relation, 0 },
 181     { "odivide", TODIVIDE, MS_ODIVIDE, TG::Product, 0},
 182     { "odot", TODOT, MS_ODOT, TG::Product, 0},
 183     { "ominus", TOMINUS, MS_OMINUS, TG::Sum, 0},
 184     { "oper", TOPER, '\0', TG::Oper, 5},
 185     { "oplus", TOPLUS, MS_OPLUS, TG::Sum, 0},
 186     { "or", TOR, MS_OR, TG::Sum, 0},
 187     { "ortho", TORTHO, MS_ORTHO, TG::Relation, 0},
 188     { "otimes", TOTIMES, MS_OTIMES, TG::Product, 0},
 189     { "over", TOVER, '\0', TG::Product, 0},
 190     { "overbrace", TOVERBRACE, MS_OVERBRACE, TG::Product, 5},
 191     { "overline", TOVERLINE, '\0', TG::Attribute, 5},
 192     { "overstrike", TOVERSTRIKE, '\0', TG::Attribute, 5},
 193     { "owns", TNI, MS_NI, TG::Relation, 0},
 194     { "parallel", TPARALLEL, MS_DLINE, TG::Relation, 0},
 195     { "partial", TPARTIAL, MS_PARTIAL, TG::Standalone, 5 },
 196     { "phantom", TPHANTOM, '\0', TG::FontAttr, 5},
 197     { "plusminus", TPLUSMINUS, MS_PLUSMINUS, TG::UnOper | TG::Sum, 5},
 198     { "prec", TPRECEDES, MS_PRECEDES, TG::Relation, 0 },
 199     { "preccurlyeq", TPRECEDESEQUAL, MS_PRECEDESEQUAL, TG::Relation, 0 },
 200     { "precsim", TPRECEDESEQUIV, MS_PRECEDESEQUIV, TG::Relation, 0 },
 201     { "prod", TPROD, MS_PROD, TG::Oper, 5},
 202     { "prop", TPROP, MS_PROP, TG::Relation, 0},
 203     { "rangle", TRANGLE, MS_RMATHANGLE, TG::RBrace, 0},  //! 0 to terminate expression
 204     { "rbrace", TRBRACE, MS_RBRACE, TG::RBrace, 0},
 205     { "rceil", TRCEIL, MS_RCEIL, TG::RBrace, 0},
 206     { "rdbracket", TRDBRACKET, MS_RDBRACKET, TG::RBrace, 0},
 207     { "rdline", TRDLINE, MS_DVERTLINE, TG::RBrace, 0},
 208     { "re" , TRE, MS_RE, TG::Standalone, 5 },
 209     { "rfloor", TRFLOOR, MS_RFLOOR, TG::RBrace, 0},  //! 0 to terminate expression
 210     { "right", TRIGHT, '\0', TG::NONE, 0},
 211     { "rightarrow" , TRIGHTARROW, MS_RIGHTARROW, TG::Standalone, 5},
 212     { "rline", TRLINE, MS_VERTLINE, TG::RBrace, 0},  //! 0 to terminate expression
 213     { "rsub", TRSUB, '\0', TG::Power, 0},
 214     { "rsup", TRSUP, '\0', TG::Power, 0},
 215     { "sans", TSANS, '\0', TG::Font, 0},
 216     { "serif", TSERIF, '\0', TG::Font, 0},
 217     { "setC" , TSETC, MS_SETC, TG::Standalone, 5},
 218     { "setminus", TSETMINUS, MS_BACKSLASH, TG::Product, 0 },
 219     { "setN" , TSETN, MS_SETN, TG::Standalone, 5},
 220     { "setQ" , TSETQ, MS_SETQ, TG::Standalone, 5},
 221     { "setquotient", TSETQUOTIENT, MS_SLASH, TG::Product, 0 },
 222     { "setR" , TSETR, MS_SETR, TG::Standalone, 5},
 223     { "setZ" , TSETZ, MS_SETZ, TG::Standalone, 5},
 224     { "sim", TSIM, MS_SIM, TG::Relation, 0},
 225     { "simeq", TSIMEQ, MS_SIMEQ, TG::Relation, 0},
 226     { "sin", TSIN, '\0', TG::Function, 5},
 227     { "sinh", TSINH, '\0', TG::Function, 5},
 228     { "size", TSIZE, '\0', TG::FontAttr, 5},
 229     { "slash", TSLASH, MS_SLASH, TG::Product, 0 },
 230     { "sqrt", TSQRT, MS_SQRT, TG::UnOper, 5},
 231     { "stack", TSTACK, '\0', TG::NONE, 5},
 232     { "sub", TRSUB, '\0', TG::Power, 0},
 233     { "subset", TSUBSET, MS_SUBSET, TG::Relation, 0},
 234     { "subseteq", TSUBSETEQ, MS_SUBSETEQ, TG::Relation, 0},
 235     { "succ", TSUCCEEDS, MS_SUCCEEDS, TG::Relation, 0 },
 236     { "succcurlyeq", TSUCCEEDSEQUAL, MS_SUCCEEDSEQUAL, TG::Relation, 0 },
 237     { "succsim", TSUCCEEDSEQUIV, MS_SUCCEEDSEQUIV, TG::Relation, 0 },
 238     { "sum", TSUM, MS_SUM, TG::Oper, 5},
 239     { "sup", TRSUP, '\0', TG::Power, 0},
 240     { "supset", TSUPSET, MS_SUPSET, TG::Relation, 0},
 241     { "supseteq", TSUPSETEQ, MS_SUPSETEQ, TG::Relation, 0},
 242     { "tan", TTAN, '\0', TG::Function, 5},
 243     { "tanh", TTANH, '\0', TG::Function, 5},
 244     { "tilde", TTILDE, MS_TILDE, TG::Attribute, 5},
 245     { "times", TTIMES, MS_TIMES, TG::Product, 0},
 246     { "to", TTO, '\0', TG::Limit, 0},
 247     { "toward", TTOWARD, MS_RIGHTARROW, TG::Relation, 0},
 248     { "transl", TTRANSL, MS_TRANSL, TG::Relation, 0},
 249     { "transr", TTRANSR, MS_TRANSR, TG::Relation, 0},
 250     { "underbrace", TUNDERBRACE, MS_UNDERBRACE, TG::Product, 5},
 251     { "underline", TUNDERLINE, '\0', TG::Attribute, 5},
 252     { "union", TUNION, MS_UNION, TG::Sum, 0},
 253     { "uoper", TUOPER, '\0', TG::UnOper, 5},
 254     { "uparrow" , TUPARROW, MS_UPARROW, TG::Standalone, 5},
 255     { "vec", TVEC, MS_VEC, TG::Attribute, 5},
 256     { "widebslash", TWIDEBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
 257     { "wideharpoon", TWIDEHARPOON, MS_HARPOON, TG::Attribute, 5},
 258     { "widehat", TWIDEHAT, MS_HAT, TG::Attribute, 5},
 259     { "wideslash", TWIDESLASH, MS_SLASH, TG::Product, 0 },
 260     { "widetilde", TWIDETILDE, MS_TILDE, TG::Attribute, 5},
 261     { "widevec", TWIDEVEC, MS_VEC, TG::Attribute, 5},
 262     { "wp" , TWP, MS_WP, TG::Standalone, 5}
 263 };
 264
 265 // First character may be any alphabetic
 266 const sal_Int32 coStartFlags = KParseTokens::ANY_LETTER | KParseTokens::IGNORE_LEADING_WS;
 267
 268 // Continuing characters may be any alphabetic
 269 const sal_Int32 coContFlags = (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
 270                               | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
 271 // First character for numbers, may be any numeric or dot
 272 const sal_Int32 coNumStartFlags = KParseTokens::ASC_DIGIT | KParseTokens::ASC_DOT
 273                                   | KParseTokens::IGNORE_LEADING_WS;
 274 // Continuing characters for numbers, may be any numeric or dot or comma.
 275 // tdf#127873: additionally accept ',' comma group separator as too many
 276 // existing documents unwittingly may have used that as decimal separator
 277 // in such locales (though it never was as this is always the en-US locale
 278 // and the group separator is only parsed away).
 279 const sal_Int32 coNumContFlags = (coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
 280                                  | KParseTokens::GROUP_SEPARATOR_IN_NUMBER;
 281 // First character for numbers hexadecimal
 282 const sal_Int32 coNum16StartFlags = KParseTokens::ASC_DIGIT | KParseTokens::ASC_UPALPHA
 283                                     | KParseTokens::IGNORE_LEADING_WS;
 284
 285 // Continuing characters for numbers hexadecimal
 286 const sal_Int32 coNum16ContFlags = (coNum16StartFlags & ~KParseTokens::IGNORE_LEADING_WS);
 287 // user-defined char continuing characters may be any alphanumeric or dot.
 288 const sal_Int32 coUserDefinedCharContFlags = KParseTokens::ANY_LETTER_OR_NUMBER
 289                                              | KParseTokens::ASC_DOT
 290                                              | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
 291
 292 //Checks if keyword is in the list.
 293 static inline bool findCompare(const SmTokenTableEntry & lhs, const OUString & s)
 294 {
 295     return s.compareToIgnoreAsciiCaseAscii(lhs.pIdent) > 0;
 296 }
 297
 298 //Returns the SmTokenTableEntry for a keyword
 299 static const SmTokenTableEntry * GetTokenTableEntry( const OUString &rName )
 300 {
 301     if (rName.isEmpty())return nullptr; //avoid null pointer exceptions
 302     //Looks for the first keyword after or equal to rName in alphabetical order.
 303     auto findIter = std::lower_bound( std::begin(aTokenTable),
 304                                       std::end(aTokenTable), rName, findCompare );
 305     if ( findIter != std::end(aTokenTable) && rName.equalsIgnoreAsciiCaseAscii( findIter->pIdent ))
 306         return &*findIter; //check is equal
 307     return nullptr; //not found
 308 }
 309
 310 static bool IsDelimiter( const OUString &rTxt, sal_Int32 nPos )
 311 {   // returns 'true' iff cChar is '\0' or a delimiter
 312
 313     assert(nPos <= rTxt.getLength()); //index out of range
 314     if (nPos == rTxt.getLength())return true; //This is EOF
 315     sal_Unicode cChar = rTxt[nPos];
 316
 317     // check if 'cChar' is in the delimiter table
 318     static const sal_Unicode aDelimiterTable[] =
 319     {
 320         ' ', '{',  '}',  '(',  ')', '\t', '\n', '\r', '+',  '-',
 321         '*',  '/',  '=',  '[',  ']',  '^',  '_',  '#',
 322         '%',  '>',  '<',  '&',  '|', '\\', '"',  '~',  '`'
 323     };//reordered by usage (by eye) for nanoseconds saving.
 324
 325     //checks the array
 326     for (auto const &cDelimiter : aDelimiterTable)
 327     {
 328         if (cDelimiter == cChar)return true;
 329     }
 330
 331     //special chars support
 332     sal_Int16 nTypJp = SM_MOD()->GetSysLocale().GetCharClass().getType( rTxt, nPos );
 333     return ( nTypJp == css::i18n::UnicodeType::SPACE_SEPARATOR ||
 334              nTypJp == css::i18n::UnicodeType::CONTROL);
 335 }
 336
 337 // checks number used as arguments in Math formulas (e.g. 'size' command)
 338 // Format: no negative numbers, must start with a digit, no exponent notation, ...
 339 static bool lcl_IsNumber(const OUString& rText)
 340 {
 341     bool bPoint = false;
 342     const sal_Unicode* pBuffer = rText.getStr();
 343     for(sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
 344     {
 345         const sal_Unicode cChar = *pBuffer;
 346         if(cChar == '.')
 347         {
 348             if(bPoint) return false;
 349             else bPoint = true;
 350         }
 351         else if ( !rtl::isAsciiDigit( cChar ) ) return false;
 352     }
 353     return true;
 354 }
 355 // checks number used as arguments in Math formulas (e.g. 'size' command)
 356 // Format: no negative numbers, must start with a digit, no exponent notation, ...
 357 static bool lcl_IsNotWholeNumber(const OUString& rText)
 358 {
 359     const sal_Unicode* pBuffer = rText.getStr();
 360     for(sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
 361         if ( !rtl::isAsciiDigit( *pBuffer ) ) return true;
 362     return false;
 363 }
 364 // checks hex number used as arguments in Math formulas (e.g. 'hex' command)
 365 // Format: no negative numbers, must start with a digit, no exponent notation, ...
 366 static bool lcl_IsNotWholeNumber16(const OUString& rText)
 367 {
 368     const sal_Unicode* pBuffer = rText.getStr();
 369     for(sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
 370         if ( !rtl::isAsciiCanonicHexDigit( *pBuffer ) ) return true;
 371     return false;
 372 }
 373
 374 //Text replace onto m_aBufferString
 375 void SmParser::Replace( sal_Int32 nPos, sal_Int32 nLen, const OUString &rText )
 376 {
 377     assert( nPos + nLen <= m_aBufferString.getLength() ); //checks if length allows text replace
 378
 379     m_aBufferString = m_aBufferString.replaceAt( nPos, nLen, rText ); //replace and reindex
 380     sal_Int32 nChg = rText.getLength() - nLen;
 381     m_nBufferIndex = m_nBufferIndex + nChg;
 382     m_nTokenIndex = m_nTokenIndex + nChg;
 383 }
 384
 385 void SmParser::NextToken() //Central part of the parser
 386 {
 387
 388     sal_Int32   nBufLen = m_aBufferString.getLength();
 389     ParseResult aRes;
 390     sal_Int32   nRealStart;
 391     bool        bCont;
 392     do
 393     {
 394         // skip white spaces
 395         while (UnicodeType::SPACE_SEPARATOR ==
 396                         m_pSysCC->getType( m_aBufferString, m_nBufferIndex ))
 397            ++m_nBufferIndex;
 398
 399         // Try to parse a number in a locale-independent manner using
 400         // '.' as decimal separator.
 401         // See https://bz.apache.org/ooo/show_bug.cgi?id=45779
 402         aRes = m_aNumCC.parsePredefinedToken(KParseType::ASC_NUMBER,
 403                                         m_aBufferString, m_nBufferIndex,
 404                                         coNumStartFlags, "",
 405                                         coNumContFlags, "");
 406
 407         if (aRes.TokenType == 0)
 408         {
 409             // Try again with the default token parsing.
 410             aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex,
 411                                      coStartFlags, "",
 412                                      coContFlags, "");
 413         }
 414
 415         nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
 416         m_nBufferIndex = nRealStart;
 417
 418         bCont = false;
 419         if ( aRes.TokenType == 0  &&
 420                 nRealStart < nBufLen &&
 421                 '\n' == m_aBufferString[ nRealStart ] )
 422         {
 423             // keep data needed for tokens row and col entry up to date
 424             ++m_nRow;
 425             m_nBufferIndex = m_nColOff = nRealStart + 1;
 426             bCont = true;
 427         }
 428         else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
 429         {
 430             if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
 431             {
 432                 //SkipComment
 433                 m_nBufferIndex = nRealStart + 2;
 434                 while (m_nBufferIndex < nBufLen  &&
 435                     '\n' != m_aBufferString[ m_nBufferIndex ])
 436                     ++m_nBufferIndex;
 437                 bCont = true;
 438             }
 439         }
 440
 441     } while (bCont);
 442
 443     // set index of current token
 444     m_nTokenIndex = m_nBufferIndex;
 445
 446     m_aCurToken.nRow   = m_nRow;
 447     m_aCurToken.nCol   = nRealStart - m_nColOff + 1;
 448
 449     bool bHandled = true;
 450     if (nRealStart >= nBufLen)
 451     {
 452         m_aCurToken.eType    = TEND;
 453         m_aCurToken.cMathChar = '\0';
 454         m_aCurToken.nGroup       = TG::NONE;
 455         m_aCurToken.nLevel       = 0;
 456         m_aCurToken.aText.clear();
 457     }
 458     else if (aRes.TokenType & KParseType::ANY_NUMBER)
 459     {
 460         assert(aRes.EndPos > 0);
 461         if ( m_aBufferString[aRes.EndPos-1] == ',' &&
 462              aRes.EndPos < nBufLen &&
 463              m_pSysCC->getType( m_aBufferString, aRes.EndPos ) != UnicodeType::SPACE_SEPARATOR )
 464         {
 465             // Comma followed by a non-space char is unlikely for decimal/thousands separator.
 466             --aRes.EndPos;
 467         }
 468         sal_Int32 n = aRes.EndPos - nRealStart;
 469         assert(n >= 0);
 470         m_aCurToken.eType      = TNUMBER;
 471         m_aCurToken.cMathChar  = '\0';
 472         m_aCurToken.nGroup     = TG::NONE;
 473         m_aCurToken.nLevel     = 5;
 474         m_aCurToken.aText      = m_aBufferString.copy( nRealStart, n );
 475
 476         SAL_WARN_IF( !IsDelimiter( m_aBufferString, aRes.EndPos ), "starmath", "identifier really finished? (compatibility!)" );
 477     }
 478     else if (aRes.TokenType & KParseType::DOUBLE_QUOTE_STRING)
 479     {
 480         m_aCurToken.eType      = TTEXT;
 481         m_aCurToken.cMathChar  = '\0';
 482         m_aCurToken.nGroup     = TG::NONE;
 483         m_aCurToken.nLevel     = 5;
 484         m_aCurToken.aText     = aRes.DequotedNameOrString;
 485         m_aCurToken.nRow       = m_nRow;
 486         m_aCurToken.nCol       = nRealStart - m_nColOff + 2;
 487     }
 488     else if (aRes.TokenType & KParseType::IDENTNAME)
 489     {
 490         sal_Int32 n = aRes.EndPos - nRealStart;
 491         assert(n >= 0);
 492         OUString aName( m_aBufferString.copy( nRealStart, n ) );
 493         const SmTokenTableEntry *pEntry = GetTokenTableEntry( aName );
 494
 495         if (pEntry)
 496         {
 497             m_aCurToken.eType      = pEntry->eType;
 498             m_aCurToken.cMathChar  = pEntry->cMathChar;
 499             m_aCurToken.nGroup     = pEntry->nGroup;
 500             m_aCurToken.nLevel     = pEntry->nLevel;
 501             m_aCurToken.aText      = OUString::createFromAscii( pEntry->pIdent );
 502         }
 503         else
 504         {
 505             m_aCurToken.eType      = TIDENT;
 506             m_aCurToken.cMathChar  = '\0';
 507             m_aCurToken.nGroup     = TG::NONE;
 508             m_aCurToken.nLevel     = 5;
 509             m_aCurToken.aText      = aName;
 510
 511             SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos),"starmath", "identifier really finished? (compatibility!)");
 512         }
 513     }
 514     else if (aRes.TokenType == 0  &&  '_' == m_aBufferString[ nRealStart ])
 515     {
 516         m_aCurToken.eType    = TRSUB;
 517         m_aCurToken.cMathChar = '\0';
 518         m_aCurToken.nGroup       = TG::Power;
 519         m_aCurToken.nLevel       = 0;
 520         m_aCurToken.aText = "_";
 521
 522         aRes.EndPos = nRealStart + 1;
 523     }
 524     else if (aRes.TokenType & KParseType::BOOLEAN)
 525     {
 526         sal_Int32   &rnEndPos = aRes.EndPos;
 527         if (rnEndPos - nRealStart <= 2)
 528         {
 529             sal_Unicode ch = m_aBufferString[ nRealStart ];
 530             switch (ch)
 531             {
 532                 case '<':
 533                     {
 534                         if (m_aBufferString.match("<<", nRealStart))
 535                         {
 536                             m_aCurToken.eType    = TLL;
 537                             m_aCurToken.cMathChar = MS_LL;
 538                             m_aCurToken.nGroup       = TG::Relation;
 539                             m_aCurToken.nLevel       = 0;
 540                             m_aCurToken.aText = "<<";
 541
 542                             rnEndPos = nRealStart + 2;
 543                         }
 544                         else if (m_aBufferString.match("<=", nRealStart))
 545                         {
 546                             m_aCurToken.eType    = TLE;
 547                             m_aCurToken.cMathChar = MS_LE;
 548                             m_aCurToken.nGroup       = TG::Relation;
 549                             m_aCurToken.nLevel       = 0;
 550                             m_aCurToken.aText = "<=";
 551
 552                             rnEndPos = nRealStart + 2;
 553                         }
 554                         else if (m_aBufferString.match("<-", nRealStart))
 555                         {
 556                             m_aCurToken.eType    = TLEFTARROW;
 557                             m_aCurToken.cMathChar = MS_LEFTARROW;
 558                             m_aCurToken.nGroup       = TG::Standalone;
 559                             m_aCurToken.nLevel       = 5;
 560                             m_aCurToken.aText = "<-";
 561
 562                             rnEndPos = nRealStart + 2;
 563                         }
 564                         else if (m_aBufferString.match("<>", nRealStart))
 565                         {
 566                             m_aCurToken.eType    = TNEQ;
 567                             m_aCurToken.cMathChar = MS_NEQ;
 568                             m_aCurToken.nGroup       = TG::Relation;
 569                             m_aCurToken.nLevel       = 0;
 570                             m_aCurToken.aText = "<>";
 571
 572                             rnEndPos = nRealStart + 2;
 573                         }
 574                         else if (m_aBufferString.match("<?>", nRealStart))
 575                         {
 576                             m_aCurToken.eType    = TPLACE;
 577                             m_aCurToken.cMathChar = MS_PLACE;
 578                             m_aCurToken.nGroup       = TG::NONE;
 579                             m_aCurToken.nLevel       = 5;
 580                             m_aCurToken.aText = "<?>";
 581
 582                             rnEndPos = nRealStart + 3;
 583                         }
 584                         else
 585                         {
 586                             m_aCurToken.eType    = TLT;
 587                             m_aCurToken.cMathChar = MS_LT;
 588                             m_aCurToken.nGroup       = TG::Relation;
 589                             m_aCurToken.nLevel       = 0;
 590                             m_aCurToken.aText = "<";
 591                         }
 592                     }
 593                     break;
 594                 case '>':
 595                     {
 596                         if (m_aBufferString.match(">=", nRealStart))
 597                         {
 598                             m_aCurToken.eType    = TGE;
 599                             m_aCurToken.cMathChar = MS_GE;
 600                             m_aCurToken.nGroup       = TG::Relation;
 601                             m_aCurToken.nLevel       = 0;
 602                             m_aCurToken.aText = ">=";
 603
 604                             rnEndPos = nRealStart + 2;
 605                         }
 606                         else if (m_aBufferString.match(">>", nRealStart))
 607                         {
 608                             m_aCurToken.eType    = TGG;
 609                             m_aCurToken.cMathChar = MS_GG;
 610                             m_aCurToken.nGroup       = TG::Relation;
 611                             m_aCurToken.nLevel       = 0;
 612                             m_aCurToken.aText = ">>";
 613
 614                             rnEndPos = nRealStart + 2;
 615                         }
 616                         else
 617                         {
 618                             m_aCurToken.eType    = TGT;
 619                             m_aCurToken.cMathChar = MS_GT;
 620                             m_aCurToken.nGroup       = TG::Relation;
 621                             m_aCurToken.nLevel       = 0;
 622                             m_aCurToken.aText = ">";
 623                         }
 624                     }
 625                     break;
 626                 default:
 627                     bHandled = false;
 628             }
 629         }
 630     }
 631     else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
 632     {
 633         sal_Int32   &rnEndPos = aRes.EndPos;
 634         if (rnEndPos - nRealStart == 1)
 635         {
 636             sal_Unicode ch = m_aBufferString[ nRealStart ];
 637             switch (ch)
 638             {
 639                 case '%':
 640                     {
 641                         //! modifies aRes.EndPos
 642
 643                         OSL_ENSURE( rnEndPos >= nBufLen  ||
 644                                     '%' != m_aBufferString[ rnEndPos ],
 645                                 "unexpected comment start" );
 646
 647                         // get identifier of user-defined character
 648                         ParseResult aTmpRes = m_pSysCC->parseAnyToken(
 649                                 m_aBufferString, rnEndPos,
 650                                 KParseTokens::ANY_LETTER,
 651                                 "",
 652                                 coUserDefinedCharContFlags,
 653                                 "" );
 654
 655                         sal_Int32 nTmpStart = rnEndPos + aTmpRes.LeadingWhiteSpace;
 656
 657                         // default setting for the case that no identifier
 658                         // i.e. a valid symbol-name is following the '%'
 659                         // character
 660                         m_aCurToken.eType      = TTEXT;
 661                         m_aCurToken.cMathChar  = '\0';
 662                         m_aCurToken.nGroup     = TG::NONE;
 663                         m_aCurToken.nLevel     = 5;
 664                         m_aCurToken.aText      ="%";
 665                         m_aCurToken.nRow       = m_nRow;
 666                         m_aCurToken.nCol       = nTmpStart - m_nColOff;
 667
 668                         if (aTmpRes.TokenType & KParseType::IDENTNAME)
 669                         {
 670
 671                             sal_Int32 n = aTmpRes.EndPos - nTmpStart;
 672                             m_aCurToken.eType      = TSPECIAL;
 673                             m_aCurToken.aText      = m_aBufferString.copy( nTmpStart-1, n+1 );
 674
 675                             OSL_ENSURE( aTmpRes.EndPos > rnEndPos,
 676                                     "empty identifier" );
 677                             if (aTmpRes.EndPos > rnEndPos)
 678                                 rnEndPos = aTmpRes.EndPos;
 679                             else
 680                                 ++rnEndPos;
 681                         }
 682
 683                         // if no symbol-name was found we start-over with
 684                         // finding the next token right after the '%' sign.
 685                         // I.e. we leave rnEndPos unmodified.
 686                     }
 687                     break;
 688                 case '[':
 689                     {
 690                         m_aCurToken.eType    = TLBRACKET;
 691                         m_aCurToken.cMathChar = MS_LBRACKET;
 692                         m_aCurToken.nGroup       = TG::LBrace;
 693                         m_aCurToken.nLevel       = 5;
 694                         m_aCurToken.aText = "[";
 695                     }
 696                     break;
 697                 case '\\':
 698                     {
 699                         m_aCurToken.eType    = TESCAPE;
 700                         m_aCurToken.cMathChar = '\0';
 701                         m_aCurToken.nGroup       = TG::NONE;
 702                         m_aCurToken.nLevel       = 5;
 703                         m_aCurToken.aText = "\\";
 704                     }
 705                     break;
 706                 case ']':
 707                     {
 708                         m_aCurToken.eType    = TRBRACKET;
 709                         m_aCurToken.cMathChar = MS_RBRACKET;
 710                         m_aCurToken.nGroup       = TG::RBrace;
 711                         m_aCurToken.nLevel       = 0;
 712                         m_aCurToken.aText = "]";
 713                     }
 714                     break;
 715                 case '^':
 716                     {
 717                         m_aCurToken.eType    = TRSUP;
 718                         m_aCurToken.cMathChar = '\0';
 719                         m_aCurToken.nGroup       = TG::Power;
 720                         m_aCurToken.nLevel       = 0;
 721                         m_aCurToken.aText = "^";
 722                     }
 723                     break;
 724                 case '`':
 725                     {
 726                         m_aCurToken.eType    = TSBLANK;
 727                         m_aCurToken.cMathChar = '\0';
 728                         m_aCurToken.nGroup       = TG::Blank;
 729                         m_aCurToken.nLevel       = 5;
 730                         m_aCurToken.aText = "`";
 731                     }
 732                     break;
 733                 case '{':
 734                     {
 735                         m_aCurToken.eType    = TLGROUP;
 736                         m_aCurToken.cMathChar = MS_LBRACE;
 737                         m_aCurToken.nGroup       = TG::NONE;
 738                         m_aCurToken.nLevel       = 5;
 739                         m_aCurToken.aText = "{";
 740                     }
 741                     break;
 742                 case '|':
 743                     {
 744                         m_aCurToken.eType    = TOR;
 745                         m_aCurToken.cMathChar = MS_OR;
 746                         m_aCurToken.nGroup       = TG::Sum;
 747                         m_aCurToken.nLevel       = 0;
 748                         m_aCurToken.aText = "|";
 749                     }
 750                     break;
 751                 case '}':
 752                     {
 753                         m_aCurToken.eType    = TRGROUP;
 754                         m_aCurToken.cMathChar = MS_RBRACE;
 755                         m_aCurToken.nGroup       = TG::NONE;
 756                         m_aCurToken.nLevel       = 0;
 757                         m_aCurToken.aText = "}";
 758                     }
 759                     break;
 760                 case '~':
 761                     {
 762                         m_aCurToken.eType    = TBLANK;
 763                         m_aCurToken.cMathChar = '\0';
 764                         m_aCurToken.nGroup       = TG::Blank;
 765                         m_aCurToken.nLevel       = 5;
 766                         m_aCurToken.aText = "~";
 767                     }
 768                     break;
 769                 case '#':
 770                     {
 771                         if (m_aBufferString.match("##", nRealStart))
 772                         {
 773                             m_aCurToken.eType    = TDPOUND;
 774                             m_aCurToken.cMathChar = '\0';
 775                             m_aCurToken.nGroup       = TG::NONE;
 776                             m_aCurToken.nLevel       = 0;
 777                             m_aCurToken.aText = "##";
 778
 779                             rnEndPos = nRealStart + 2;
 780                         }
 781                         else
 782                         {
 783                             m_aCurToken.eType    = TPOUND;
 784                             m_aCurToken.cMathChar = '\0';
 785                             m_aCurToken.nGroup       = TG::NONE;
 786                             m_aCurToken.nLevel       = 0;
 787                             m_aCurToken.aText = "#";
 788                         }
 789                     }
 790                     break;
 791                 case '&':
 792                     {
 793                         m_aCurToken.eType    = TAND;
 794                         m_aCurToken.cMathChar = MS_AND;
 795                         m_aCurToken.nGroup       = TG::Product;
 796                         m_aCurToken.nLevel       = 0;
 797                         m_aCurToken.aText = "&";
 798                     }
 799                     break;
 800                 case '(':
 801                     {
 802                         m_aCurToken.eType    = TLPARENT;
 803                         m_aCurToken.cMathChar = MS_LPARENT;
 804                         m_aCurToken.nGroup       = TG::LBrace;
 805                         m_aCurToken.nLevel       = 5;     //! 0 to continue expression
 806                         m_aCurToken.aText = "(";
 807                     }
 808                     break;
 809                 case ')':
 810                     {
 811                         m_aCurToken.eType    = TRPARENT;
 812                         m_aCurToken.cMathChar = MS_RPARENT;
 813                         m_aCurToken.nGroup       = TG::RBrace;
 814                         m_aCurToken.nLevel       = 0;     //! 0 to terminate expression
 815                         m_aCurToken.aText = ")";
 816                     }
 817                     break;
 818                 case '*':
 819                     {
 820                         m_aCurToken.eType    = TMULTIPLY;
 821                         m_aCurToken.cMathChar = MS_MULTIPLY;
 822                         m_aCurToken.nGroup       = TG::Product;
 823                         m_aCurToken.nLevel       = 0;
 824                         m_aCurToken.aText = "*";
 825                     }
 826                     break;
 827                 case '+':
 828                     {
 829                         if (m_aBufferString.match("+-", nRealStart))
 830                         {
 831                             m_aCurToken.eType    = TPLUSMINUS;
 832                             m_aCurToken.cMathChar = MS_PLUSMINUS;
 833                             m_aCurToken.nGroup       = TG::UnOper | TG::Sum;
 834                             m_aCurToken.nLevel       = 5;
 835                             m_aCurToken.aText = "+-";
 836
 837                             rnEndPos = nRealStart + 2;
 838                         }
 839                         else
 840                         {
 841                             m_aCurToken.eType    = TPLUS;
 842                             m_aCurToken.cMathChar = MS_PLUS;
 843                             m_aCurToken.nGroup       = TG::UnOper | TG::Sum;
 844                             m_aCurToken.nLevel       = 5;
 845                             m_aCurToken.aText = "+";
 846                         }
 847                     }
 848                     break;
 849                 case '-':
 850                     {
 851                         if (m_aBufferString.match("-+", nRealStart))
 852                         {
 853                             m_aCurToken.eType    = TMINUSPLUS;
 854                             m_aCurToken.cMathChar = MS_MINUSPLUS;
 855                             m_aCurToken.nGroup       = TG::UnOper | TG::Sum;
 856                             m_aCurToken.nLevel       = 5;
 857                             m_aCurToken.aText = "-+";
 858
 859                             rnEndPos = nRealStart + 2;
 860                         }
 861                         else if (m_aBufferString.match("->", nRealStart))
 862                         {
 863                             m_aCurToken.eType    = TRIGHTARROW;
 864                             m_aCurToken.cMathChar = MS_RIGHTARROW;
 865                             m_aCurToken.nGroup       = TG::Standalone;
 866                             m_aCurToken.nLevel       = 5;
 867                             m_aCurToken.aText = "->";
 868
 869                             rnEndPos = nRealStart + 2;
 870                         }
 871                         else
 872                         {
 873                             m_aCurToken.eType    = TMINUS;
 874                             m_aCurToken.cMathChar = MS_MINUS;
 875                             m_aCurToken.nGroup       = TG::UnOper | TG::Sum;
 876                             m_aCurToken.nLevel       = 5;
 877                             m_aCurToken.aText = "-";
 878                         }
 879                     }
 880                     break;
 881                 case '.':
 882                     {
 883                         // Only one character? Then it can't be a number.
 884                         if (m_nBufferIndex < m_aBufferString.getLength() - 1)
 885                         {
 886                             // for compatibility with SO5.2
 887                             // texts like .34 ...56 ... h ...78..90
 888                             // will be treated as numbers
 889                             m_aCurToken.eType     = TNUMBER;
 890                             m_aCurToken.cMathChar = '\0';
 891                             m_aCurToken.nGroup    = TG::NONE;
 892                             m_aCurToken.nLevel    = 5;
 893
 894                             sal_Int32 nTxtStart = m_nBufferIndex;
 895                             sal_Unicode cChar;
 896                             // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only
 897                             do
 898                             {
 899                                 cChar = m_aBufferString[ ++m_nBufferIndex ];
 900                             }
 901                             while ( (cChar == '.' || rtl::isAsciiDigit( cChar )) &&
 902                                      ( m_nBufferIndex < m_aBufferString.getLength() - 1 ) );
 903
 904                             m_aCurToken.aText = m_aBufferString.copy( nTxtStart, m_nBufferIndex - nTxtStart );
 905                             aRes.EndPos = m_nBufferIndex;
 906                         }
 907                         else
 908                             bHandled = false;
 909                     }
 910                     break;
 911                 case '/':
 912                     {
 913                         m_aCurToken.eType    = TDIVIDEBY;
 914                         m_aCurToken.cMathChar = MS_SLASH;
 915                         m_aCurToken.nGroup       = TG::Product;
 916                         m_aCurToken.nLevel       = 0;
 917                         m_aCurToken.aText = "/";
 918                     }
 919                     break;
 920                 case '=':
 921                     {
 922                         m_aCurToken.eType    = TASSIGN;
 923                         m_aCurToken.cMathChar = MS_ASSIGN;
 924                         m_aCurToken.nGroup       = TG::Relation;
 925                         m_aCurToken.nLevel       = 0;
 926                         m_aCurToken.aText = "=";
 927                     }
 928                     break;
 929                 default:
 930                     bHandled = false;
 931             }
 932         }
 933     }
 934     else
 935         bHandled = false;
 936
 937     if (!bHandled)
 938     {
 939         m_aCurToken.eType      = TCHARACTER;
 940         m_aCurToken.cMathChar  = '\0';
 941         m_aCurToken.nGroup     = TG::NONE;
 942         m_aCurToken.nLevel     = 5;
 943
 944         // tdf#129372: we may have to deal with surrogate pairs
 945         // (see https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates)
 946         // in this case, we must read 2 sal_Unicode instead of 1
 947         int nOffset(rtl::isSurrogate(m_aBufferString[nRealStart])? 2 : 1);
 948         m_aCurToken.aText      = m_aBufferString.copy( nRealStart, nOffset );
 949
 950         aRes.EndPos = nRealStart + nOffset;
 951     }
 952
 953     if (TEND != m_aCurToken.eType)
 954         m_nBufferIndex = aRes.EndPos;
 955 }
 956
 957 void SmParser::NextTokenColor(bool dvipload)
 958 {
 959
 960     sal_Int32   nBufLen = m_aBufferString.getLength();
 961     ParseResult aRes;
 962     sal_Int32   nRealStart;
 963     bool        bCont;
 964
 965     do
 966     {
 967         // skip white spaces
 968         while (UnicodeType::SPACE_SEPARATOR ==
 969                         m_pSysCC->getType( m_aBufferString, m_nBufferIndex ))
 970            ++m_nBufferIndex;
 971         //parse, there are few options, so less strict.
 972         aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex,
 973                                        coStartFlags, "", coContFlags, "");
 974         nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
 975         m_nBufferIndex = nRealStart;
 976         bCont = false;
 977         if ( aRes.TokenType == 0  &&
 978                 nRealStart < nBufLen &&
 979                 '\n' == m_aBufferString[ nRealStart ] )
 980         {
 981             // keep data needed for tokens row and col entry up to date
 982             ++m_nRow;
 983             m_nBufferIndex = m_nColOff = nRealStart + 1;
 984             bCont = true;
 985         }
 986         else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
 987         {
 988             if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
 989             {
 990                 //SkipComment
 991                 m_nBufferIndex = nRealStart + 2;
 992                 while (m_nBufferIndex < nBufLen  &&
 993                     '\n' != m_aBufferString[ m_nBufferIndex ])
 994                     ++m_nBufferIndex;
 995                 bCont = true;
 996             }
 997         }
 998     } while (bCont);
 999
1000     // set index of current token
1001     m_nTokenIndex    = m_nBufferIndex;
1002     m_aCurToken.nRow = m_nRow;
1003     m_aCurToken.nCol = nRealStart - m_nColOff + 1;
1004     if (nRealStart >= nBufLen) m_aCurToken.eType = TEND;
1005     else if (aRes.TokenType & KParseType::IDENTNAME)
1006     {
1007         sal_Int32 n = aRes.EndPos - nRealStart;
1008         assert(n >= 0);
1009         OUString aName( m_aBufferString.copy( nRealStart, n ) );
1010         std::unique_ptr<SmColorTokenTableEntry> aSmColorTokenTableEntry;
1011         if(dvipload) aSmColorTokenTableEntry = starmathdatabase::Identify_ColorName_DVIPSNAMES( aName );
1012         else aSmColorTokenTableEntry = starmathdatabase::Identify_ColorName_Parser( aName );
1013         m_aCurToken = aSmColorTokenTableEntry;
1014     }
1015     else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
1016     {
1017         if( m_aBufferString[ nRealStart ] == '#' && !m_aBufferString.match("##", nRealStart) )
1018         {
1019             m_aCurToken.eType    = THEX;
1020             m_aCurToken.cMathChar = '\0';
1021             m_aCurToken.nGroup       = TG::Color;
1022             m_aCurToken.nLevel       = 0;
1023             m_aCurToken.aText = "hex";
1024         }
1025     }
1026     else m_aCurToken.eType         = TNONE;
1027     if (TEND != m_aCurToken.eType) m_nBufferIndex = aRes.EndPos;
1028 }
1029
1030 void SmParser::NextTokenFontSize()
1031 {
1032
1033     sal_Int32   nBufLen = m_aBufferString.getLength();
1034     ParseResult aRes;
1035     sal_Int32   nRealStart;
1036     bool        bCont;
1037     bool        hex = false;
1038
1039     do
1040     {
1041         // skip white spaces
1042         while (UnicodeType::SPACE_SEPARATOR ==
1043                     m_pSysCC->getType( m_aBufferString, m_nBufferIndex ))
1044            ++m_nBufferIndex;
1045         //hexadecimal parser
1046         aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex,
1047                                        coNum16StartFlags, ".", coNum16ContFlags, ".,");
1048         if (aRes.TokenType == 0)
1049         {
1050             // Try again with the default token parsing.
1051             aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex,
1052                                      coStartFlags, "", coContFlags, "");
1053         }
1054         else hex = true;
1055         nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
1056         m_nBufferIndex = nRealStart;
1057         bCont = false;
1058         if ( aRes.TokenType == 0  &&
1059                 nRealStart < nBufLen &&
1060                 '\n' == m_aBufferString[ nRealStart ] )
1061         {
1062             // keep data needed for tokens row and col entry up to date
1063             ++m_nRow;
1064             m_nBufferIndex = m_nColOff = nRealStart + 1;
1065             bCont = true;
1066         }
1067         else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
1068         {
1069             if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
1070             {
1071                 //SkipComment
1072                 m_nBufferIndex = nRealStart + 2;
1073                 while (m_nBufferIndex < nBufLen  &&
1074                     '\n' != m_aBufferString[ m_nBufferIndex ])
1075                     ++m_nBufferIndex;
1076                 bCont = true;
1077             }
1078         }
1079     } while (bCont);
1080
1081     // set index of current token
1082     m_nTokenIndex      = m_nBufferIndex;
1083     m_aCurToken.nRow   = m_nRow;
1084     m_aCurToken.nCol   = nRealStart - m_nColOff + 1;
1085     if (nRealStart >= nBufLen) m_aCurToken.eType    = TEND;
1086     else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
1087     {
1088         if ( aRes.EndPos - nRealStart == 1 )
1089         {
1090             switch ( m_aBufferString[ nRealStart ] )
1091             {
1092                 case '*':
1093                     m_aCurToken.eType     = TMULTIPLY;
1094                     m_aCurToken.cMathChar = MS_MULTIPLY;
1095                     m_aCurToken.nGroup    = TG::Product;
1096                     m_aCurToken.nLevel    = 0;
1097                     m_aCurToken.aText     = "*";
1098                     break;
1099                 case '+':
1100                     m_aCurToken.eType     = TPLUS;
1101                     m_aCurToken.cMathChar = MS_PLUS;
1102                     m_aCurToken.nGroup    = TG::UnOper | TG::Sum;
1103                     m_aCurToken.nLevel    = 5;
1104                     m_aCurToken.aText     = "+";
1105                     break;
1106                 case '-':
1107                     m_aCurToken.eType     = TMINUS;
1108                     m_aCurToken.cMathChar = MS_MINUS;
1109                     m_aCurToken.nGroup    = TG::UnOper | TG::Sum;
1110                     m_aCurToken.nLevel    = 5;
1111                     m_aCurToken.aText     = "-";
1112                     break;
1113                 case '/':
1114                     m_aCurToken.eType     = TDIVIDEBY;
1115                     m_aCurToken.cMathChar = MS_SLASH;
1116                     m_aCurToken.nGroup    = TG::Product;
1117                     m_aCurToken.nLevel    = 0;
1118                     m_aCurToken.aText     = "/";
1119                     break;
1120                 default:
1121                     m_aCurToken.eType     = TNONE;
1122                     break;
1123             }
1124         }
1125         else m_aCurToken.eType = TNONE;
1126     }
1127     else if(hex)
1128     {
1129         assert(aRes.EndPos > 0);
1130         sal_Int32 n = aRes.EndPos - nRealStart;
1131         assert(n >= 0);
1132         m_aCurToken.eType      = THEX;
1133         m_aCurToken.cMathChar  = '\0';
1134         m_aCurToken.nGroup     = TG::NONE;
1135         m_aCurToken.nLevel     = 5;
1136         m_aCurToken.aText      = m_aBufferString.copy( nRealStart, n );
1137     }
1138     else m_aCurToken.eType     = TNONE;
1139     if (TEND != m_aCurToken.eType) m_nBufferIndex = aRes.EndPos;
1140 }
1141
1142 namespace
1143 {
1144     SmNodeArray buildNodeArray(std::vector<std::unique_ptr<SmNode>>& rSubNodes)
1145     {
1146         SmNodeArray aSubArray(rSubNodes.size());
1147         for (size_t i = 0; i < rSubNodes.size(); ++i)
1148             aSubArray[i] = rSubNodes[i].release();
1149         return aSubArray;
1150     }
1151 } //end namespace
1152
1153 // grammar
1154 /*************************************************************************************************/
1155
1156 std::unique_ptr<SmTableNode> SmParser::DoTable()
1157 {
1158     DepthProtect aDepthGuard(m_nParseDepth);
1159     if (aDepthGuard.TooDeep())
1160         throw std::range_error("parser depth limit");
1161
1162     std::vector<std::unique_ptr<SmNode>> aLineArray;
1163     aLineArray.push_back(DoLine());
1164     while (m_aCurToken.eType == TNEWLINE)
1165     {
1166         NextToken();
1167         aLineArray.push_back(DoLine());
1168     }
1169     assert(m_aCurToken.eType == TEND);
1170     std::unique_ptr<SmTableNode> xSNode(new SmTableNode(m_aCurToken));
1171     xSNode->SetSubNodes(buildNodeArray(aLineArray));
1172     return xSNode;
1173 }
1174
1175 std::unique_ptr<SmNode> SmParser::DoAlign(bool bUseExtraSpaces)
1176     // parse alignment info (if any), then go on with rest of expression
1177 {
1178     DepthProtect aDepthGuard(m_nParseDepth);
1179     if (aDepthGuard.TooDeep())
1180         throw std::range_error("parser depth limit");
1181
1182     std::unique_ptr<SmStructureNode> xSNode;
1183
1184     if (TokenInGroup(TG::Align))
1185     {
1186         xSNode.reset(new SmAlignNode(m_aCurToken));
1187
1188         NextToken();
1189
1190         // allow for just one align statement in 5.0
1191         if (TokenInGroup(TG::Align))
1192             return DoError(SmParseError::DoubleAlign);
1193     }
1194
1195     auto pNode = DoExpression(bUseExtraSpaces);
1196
1197     if (xSNode)
1198     {
1199         xSNode->SetSubNode(0, pNode.release());
1200         return xSNode;
1201     }
1202     return pNode;
1203 }
1204
1205 // Postcondition: m_aCurToken.eType == TEND || m_aCurToken.eType == TNEWLINE
1206 std::unique_ptr<SmNode> SmParser::DoLine()
1207 {
1208     DepthProtect aDepthGuard(m_nParseDepth);
1209     if (aDepthGuard.TooDeep())
1210         throw std::range_error("parser depth limit");
1211
1212     std::vector<std::unique_ptr<SmNode>> ExpressionArray;
1213
1214     // start with single expression that may have an alignment statement
1215     // (and go on with expressions that must not have alignment
1216     // statements in 'while' loop below. See also 'Expression()'.)
1217     if (m_aCurToken.eType != TEND  &&  m_aCurToken.eType != TNEWLINE)
1218         ExpressionArray.push_back(DoAlign());
1219
1220     while (m_aCurToken.eType != TEND  &&  m_aCurToken.eType != TNEWLINE)
1221         ExpressionArray.push_back(DoExpression());
1222
1223     //If there's no expression, add an empty one.
1224     //this is to avoid a formula tree without any caret
1225     //positions, in visual formula editor.
1226     if(ExpressionArray.empty())
1227     {
1228         SmToken aTok;
1229         aTok.eType = TNEWLINE;
1230         ExpressionArray.emplace_back(std::unique_ptr<SmNode>(new SmExpressionNode(aTok)));
1231     }
1232
1233     auto xSNode = std::make_unique<SmLineNode>(m_aCurToken);
1234     xSNode->SetSubNodes(buildNodeArray(ExpressionArray));
1235     return xSNode;
1236 }
1237
1238 std::unique_ptr<SmNode> SmParser::DoExpression(bool bUseExtraSpaces)
1239 {
1240     DepthProtect aDepthGuard(m_nParseDepth);
1241     if (aDepthGuard.TooDeep())
1242         throw std::range_error("parser depth limit");
1243
1244     std::vector<std::unique_ptr<SmNode>> RelationArray;
1245     RelationArray.push_back(DoRelation());
1246     while (m_aCurToken.nLevel >= 4)
1247         RelationArray.push_back(DoRelation());
1248
1249     if (RelationArray.size() > 1)
1250     {
1251         std::unique_ptr<SmExpressionNode> xSNode(new SmExpressionNode(m_aCurToken));
1252         xSNode->SetSubNodes(buildNodeArray(RelationArray));
1253         xSNode->SetUseExtraSpaces(bUseExtraSpaces);
1254         return xSNode;
1255     }
1256     else
1257     {
1258         // This expression has only one node so just push this node.
1259         return std::move(RelationArray[0]);
1260     }
1261 }
1262
1263 std::unique_ptr<SmNode> SmParser::DoRelation()
1264 {
1265     DepthProtect aDepthGuard(m_nParseDepth);
1266     if (aDepthGuard.TooDeep())
1267         throw std::range_error("parser depth limit");
1268
1269     int nDepthLimit = m_nParseDepth;
1270
1271     auto xFirst = DoSum();
1272     while (TokenInGroup(TG::Relation))
1273     {
1274         std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1275         auto xSecond = DoOpSubSup();
1276         auto xThird = DoSum();
1277         xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
1278         xFirst = std::move(xSNode);
1279
1280         ++m_nParseDepth;
1281         if (aDepthGuard.TooDeep())
1282             throw std::range_error("parser depth limit");
1283     }
1284
1285     m_nParseDepth = nDepthLimit;
1286
1287     return xFirst;
1288 }
1289
1290 std::unique_ptr<SmNode> SmParser::DoSum()
1291 {
1292     DepthProtect aDepthGuard(m_nParseDepth);
1293     if (aDepthGuard.TooDeep())
1294         throw std::range_error("parser depth limit");
1295
1296     auto xFirst = DoProduct();
1297     while (TokenInGroup(TG::Sum))
1298     {
1299         std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1300         auto xSecond = DoOpSubSup();
1301         auto xThird = DoProduct();
1302         xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
1303         xFirst = std::move(xSNode);
1304     }
1305     return xFirst;
1306 }
1307
1308 std::unique_ptr<SmNode> SmParser::DoProduct()
1309 {
1310     DepthProtect aDepthGuard(m_nParseDepth);
1311     if (aDepthGuard.TooDeep())
1312         throw std::range_error("parser depth limit");
1313
1314     auto xFirst = DoPower();
1315
1316     int nDepthLimit = 0;
1317
1318     while (TokenInGroup(TG::Product))
1319     {
1320         //this linear loop builds a recursive structure, if it gets
1321         //too deep then later processing, e.g. releasing the tree,
1322         //can exhaust stack
1323         if (nDepthLimit > DEPTH_LIMIT)
1324             throw std::range_error("parser depth limit");
1325
1326         std::unique_ptr<SmStructureNode> xSNode;
1327         std::unique_ptr<SmNode> xOper;
1328         bool bSwitchArgs = false;
1329
1330         SmTokenType eType = m_aCurToken.eType;
1331         switch (eType)
1332         {
1333             case TOVER:
1334                 xSNode.reset(new SmBinVerNode(m_aCurToken));
1335                 xOper.reset(new SmRectangleNode(m_aCurToken));
1336                 NextToken();
1337                 break;
1338
1339             case TBOPER:
1340                 xSNode.reset(new SmBinHorNode(m_aCurToken));
1341
1342                 NextToken();
1343
1344                 //Let the glyph node know it's a binary operation
1345                 m_aCurToken.eType = TBOPER;
1346                 m_aCurToken.nGroup = TG::Product;
1347                 xOper = DoGlyphSpecial();
1348                 break;
1349
1350             case TOVERBRACE :
1351             case TUNDERBRACE :
1352                 xSNode.reset(new SmVerticalBraceNode(m_aCurToken));
1353                 xOper.reset(new SmMathSymbolNode(m_aCurToken));
1354
1355                 NextToken();
1356                 break;
1357
1358             case TWIDEBACKSLASH:
1359             case TWIDESLASH:
1360             {
1361                 SmBinDiagonalNode *pSTmp = new SmBinDiagonalNode(m_aCurToken);
1362                 pSTmp->SetAscending(eType == TWIDESLASH);
1363                 xSNode.reset(pSTmp);
1364
1365                 xOper.reset(new SmPolyLineNode(m_aCurToken));
1366                 NextToken();
1367
1368                 bSwitchArgs = true;
1369                 break;
1370             }
1371
1372             default:
1373                 xSNode.reset(new SmBinHorNode(m_aCurToken));
1374
1375                 xOper = DoOpSubSup();
1376         }
1377
1378         auto xArg = DoPower();
1379
1380         if (bSwitchArgs)
1381         {
1382             //! vgl siehe SmBinDiagonalNode::Arrange
1383             xSNode->SetSubNodes(std::move(xFirst), std::move(xArg), std::move(xOper));
1384         }
1385         else
1386         {
1387             xSNode->SetSubNodes(std::move(xFirst), std::move(xOper), std::move(xArg));
1388         }
1389         xFirst = std::move(xSNode);
1390         ++nDepthLimit;
1391     }
1392     return xFirst;
1393 }
1394
1395 std::unique_ptr<SmNode> SmParser::DoSubSup(TG nActiveGroup, SmNode *pGivenNode)
1396 {
1397     std::unique_ptr<SmNode> xGivenNode(pGivenNode);
1398     DepthProtect aDepthGuard(m_nParseDepth);
1399     if (aDepthGuard.TooDeep())
1400         throw std::range_error("parser depth limit");
1401
1402     assert(nActiveGroup == TG::Power || nActiveGroup == TG::Limit);
1403     assert(m_aCurToken.nGroup == nActiveGroup);
1404
1405     std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
1406     //! Of course 'm_aCurToken' is just the first sub-/supscript token.
1407     //! It should be of no further interest. The positions of the
1408     //! sub-/supscripts will be identified by the corresponding subnodes
1409     //! index in the 'aSubNodes' array (enum value from 'SmSubSup').
1410
1411     pNode->SetUseLimits(nActiveGroup == TG::Limit);
1412
1413     // initialize subnodes array
1414     std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
1415     aSubNodes[0] = std::move(xGivenNode);
1416
1417     // process all sub-/supscripts
1418     int  nIndex = 0;
1419     while (TokenInGroup(nActiveGroup))
1420     {
1421         SmTokenType  eType (m_aCurToken.eType);
1422
1423         switch (eType)
1424         {
1425             case TRSUB :    nIndex = static_cast<int>(RSUB);    break;
1426             case TRSUP :    nIndex = static_cast<int>(RSUP);    break;
1427             case TFROM :
1428             case TCSUB :    nIndex = static_cast<int>(CSUB);    break;
1429             case TTO :
1430             case TCSUP :    nIndex = static_cast<int>(CSUP);    break;
1431             case TLSUB :    nIndex = static_cast<int>(LSUB);    break;
1432             case TLSUP :    nIndex = static_cast<int>(LSUP);    break;
1433             default :
1434                 SAL_WARN( "starmath", "unknown case");
1435         }
1436         nIndex++;
1437         assert(1 <= nIndex  &&  nIndex <= SUBSUP_NUM_ENTRIES);
1438
1439         std::unique_ptr<SmNode> xENode;
1440         if (aSubNodes[nIndex]) // if already occupied at earlier iteration
1441         {
1442             // forget the earlier one, remember an error instead
1443             aSubNodes[nIndex].reset();
1444             xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
1445         }
1446         else
1447         {
1448             // skip sub-/supscript token
1449             NextToken();
1450         }
1451
1452         // get sub-/supscript node
1453         // (even when we saw a double-sub/supscript error in the above
1454         // in order to minimize mess and continue parsing.)
1455         std::unique_ptr<SmNode> xSNode;
1456         if (eType == TFROM  ||  eType == TTO)
1457         {
1458             // parse limits in old 4.0 and 5.0 style
1459             xSNode = DoRelation();
1460         }
1461         else
1462             xSNode = DoTerm(true);
1463
1464         aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
1465     }
1466
1467     pNode->SetSubNodes(buildNodeArray(aSubNodes));
1468     return pNode;
1469 }
1470
1471 std::unique_ptr<SmNode> SmParser::DoSubSupEvaluate(SmNode *pGivenNode)
1472 {
1473     std::unique_ptr<SmNode> xGivenNode(pGivenNode);
1474     DepthProtect aDepthGuard(m_nParseDepth);
1475     if (aDepthGuard.TooDeep()) throw std::range_error("parser depth limit");
1476
1477     std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
1478     pNode->SetUseLimits(true);
1479
1480     // initialize subnodes array
1481     std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
1482     aSubNodes[0] = std::move(xGivenNode);
1483
1484     // process all sub-/supscripts
1485     int  nIndex = 0;
1486     while (TokenInGroup(TG::Limit))
1487     {
1488         SmTokenType  eType (m_aCurToken.eType);
1489
1490         switch (eType)
1491         {
1492             case TFROM :    nIndex = static_cast<int>(RSUB);    break;
1493             case TTO   :    nIndex = static_cast<int>(RSUP);    break;
1494             default :
1495                 SAL_WARN( "starmath", "unknown case");
1496         }
1497         nIndex++;
1498         assert(1 <= nIndex  &&  nIndex <= SUBSUP_NUM_ENTRIES);
1499
1500         std::unique_ptr<SmNode> xENode;
1501         if (aSubNodes[nIndex]) // if already occupied at earlier iteration
1502         {
1503             // forget the earlier one, remember an error instead
1504             aSubNodes[nIndex].reset();
1505             xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
1506         }
1507         else NextToken(); // skip sub-/supscript token
1508
1509         // get sub-/supscript node
1510         std::unique_ptr<SmNode> xSNode;
1511         xSNode = DoTerm(true);
1512
1513         aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
1514     }
1515
1516     pNode->SetSubNodes(buildNodeArray(aSubNodes));
1517     return pNode;
1518 }
1519
1520 std::unique_ptr<SmNode> SmParser::DoOpSubSup()
1521 {
1522     DepthProtect aDepthGuard(m_nParseDepth);
1523     if (aDepthGuard.TooDeep())
1524         throw std::range_error("parser depth limit");
1525
1526     // get operator symbol
1527     auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1528     // skip operator token
1529     NextToken();
1530     // get sub- supscripts if any
1531     if (m_aCurToken.nGroup == TG::Power)
1532         return DoSubSup(TG::Power, pNode.release());
1533     return pNode;
1534 }
1535
1536 std::unique_ptr<SmNode> SmParser::DoPower()
1537 {
1538     DepthProtect aDepthGuard(m_nParseDepth);
1539     if (aDepthGuard.TooDeep())
1540         throw std::range_error("parser depth limit");
1541
1542     // get body for sub- supscripts on top of stack
1543     std::unique_ptr<SmNode> xNode(DoTerm(false));
1544
1545     if (m_aCurToken.nGroup == TG::Power)
1546         return DoSubSup(TG::Power, xNode.release());
1547     return xNode;
1548 }
1549
1550 std::unique_ptr<SmBlankNode> SmParser::DoBlank()
1551 {
1552     DepthProtect aDepthGuard(m_nParseDepth);
1553     if (aDepthGuard.TooDeep())
1554         throw std::range_error("parser depth limit");
1555
1556     assert(TokenInGroup(TG::Blank));
1557     std::unique_ptr<SmBlankNode> pBlankNode(new SmBlankNode(m_aCurToken));
1558
1559     do
1560     {
1561         pBlankNode->IncreaseBy(m_aCurToken);
1562         NextToken();
1563     }
1564     while (TokenInGroup(TG::Blank));
1565
1566     // Ignore trailing spaces, if corresponding option is set
1567     if ( m_aCurToken.eType == TNEWLINE ||
1568              (m_aCurToken.eType == TEND && !utl::ConfigManager::IsFuzzing() && SM_MOD()->GetConfig()->IsIgnoreSpacesRight()) )
1569     {
1570         pBlankNode->Clear();
1571     }
1572     return pBlankNode;
1573 }
1574
1575 std::unique_ptr<SmNode> SmParser::DoTerm(bool bGroupNumberIdent)
1576 {
1577     DepthProtect aDepthGuard(m_nParseDepth);
1578     if (aDepthGuard.TooDeep())
1579         throw std::range_error("parser depth limit");
1580
1581     switch (m_aCurToken.eType)
1582     {
1583         case TESCAPE :
1584             return DoEscape();
1585
1586         case TNOSPACE :
1587         case TLGROUP :
1588         {
1589             bool bNoSpace = m_aCurToken.eType == TNOSPACE;
1590             if (bNoSpace)
1591                 NextToken();
1592             if (m_aCurToken.eType != TLGROUP)
1593                 return DoTerm(false); // nospace is no longer concerned
1594
1595             NextToken();
1596
1597             // allow for empty group
1598             if (m_aCurToken.eType == TRGROUP)
1599             {
1600                 std::unique_ptr<SmStructureNode> xSNode(new SmExpressionNode(m_aCurToken));
1601                 xSNode->SetSubNodes(nullptr, nullptr);
1602
1603                 NextToken();
1604                 return std::unique_ptr<SmNode>(xSNode.release());
1605             }
1606
1607             auto pNode = DoAlign(!bNoSpace);
1608             if (m_aCurToken.eType == TRGROUP) {
1609                 NextToken();
1610                 return pNode;
1611             }
1612             auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
1613             std::unique_ptr<SmNode> xError(DoError(SmParseError::RgroupExpected));
1614             xSNode->SetSubNodes(std::move(pNode), std::move(xError));
1615             return std::unique_ptr<SmNode>(xSNode.release());
1616         }
1617
1618         case TLEFT :
1619             return DoBrace();
1620                    case TEVALUATE:
1621             return DoEvaluate();
1622
1623         case TBLANK :
1624         case TSBLANK :
1625             return DoBlank();
1626
1627         case TTEXT :
1628             {
1629                 auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_TEXT);
1630                 NextToken();
1631                 return std::unique_ptr<SmNode>(pNode.release());
1632             }
1633         case TCHARACTER :
1634             {
1635                 auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_VARIABLE);
1636                 NextToken();
1637                 return std::unique_ptr<SmNode>(pNode.release());
1638             }
1639         case TIDENT :
1640         case TNUMBER :
1641         {
1642             auto pTextNode = std::make_unique<SmTextNode>(m_aCurToken,
1643                                              m_aCurToken.eType == TNUMBER ?
1644                                              FNT_NUMBER :
1645                                              FNT_VARIABLE);
1646             if (!bGroupNumberIdent)
1647             {
1648                 NextToken();
1649                 return std::unique_ptr<SmNode>(pTextNode.release());
1650             }
1651             std::vector<std::unique_ptr<SmNode>> aNodes;
1652             // Some people want to be able to write "x_2n" for "x_{2n}"
1653             // although e.g. LaTeX or AsciiMath interpret that as "x_2 n".
1654             // The tokenizer skips whitespaces so we need some additional
1655             // work to distinguish from "x_2 n".
1656             // See https://bz.apache.org/ooo/show_bug.cgi?id=11752 and
1657             // https://bugs.libreoffice.org/show_bug.cgi?id=55853
1658             sal_Int32 nBufLen = m_aBufferString.getLength();
1659
1660             // We need to be careful to call NextToken() only after having
1661             // tested for a whitespace separator (otherwise it will be
1662             // skipped!)
1663             bool moveToNextToken = true;
1664             while (m_nBufferIndex < nBufLen &&
1665                    m_pSysCC->getType(m_aBufferString, m_nBufferIndex) !=
1666                    UnicodeType::SPACE_SEPARATOR)
1667             {
1668                 NextToken();
1669                 if (m_aCurToken.eType != TNUMBER &&
1670                     m_aCurToken.eType != TIDENT)
1671                 {
1672                     // Neither a number nor an identifier. We just moved to
1673                     // the next token, so no need to do that again.
1674                     moveToNextToken = false;
1675                     break;
1676                 }
1677                 aNodes.emplace_back(std::unique_ptr<SmNode>(new SmTextNode(m_aCurToken,
1678                                                 m_aCurToken.eType ==
1679                                                 TNUMBER ?
1680                                                 FNT_NUMBER :
1681                                                 FNT_VARIABLE)));
1682             }
1683             if (moveToNextToken)
1684                 NextToken();
1685             if (aNodes.empty())
1686                 return std::unique_ptr<SmNode>(pTextNode.release());
1687             // We have several concatenated identifiers and numbers.
1688             // Let's group them into one SmExpressionNode.
1689             aNodes.insert(aNodes.begin(), std::move(pTextNode));
1690             std::unique_ptr<SmExpressionNode> xNode(new SmExpressionNode(SmToken()));
1691             xNode->SetSubNodes(buildNodeArray(aNodes));
1692             return std::unique_ptr<SmNode>(xNode.release());
1693         }
1694         case TLEFTARROW :
1695         case TRIGHTARROW :
1696         case TUPARROW :
1697         case TDOWNARROW :
1698         case TCIRC :
1699         case TDRARROW :
1700         case TDLARROW :
1701         case TDLRARROW :
1702         case TEXISTS :
1703         case TNOTEXISTS :
1704         case TFORALL :
1705         case TPARTIAL :
1706         case TNABLA :
1707         case TLAPLACE :
1708         case TFOURIER :
1709         case TTOWARD :
1710         case TDOTSAXIS :
1711         case TDOTSDIAG :
1712         case TDOTSDOWN :
1713         case TDOTSLOW :
1714         case TDOTSUP :
1715         case TDOTSVERT :
1716             {
1717                 auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1718                 NextToken();
1719                 return std::unique_ptr<SmNode>(pNode.release());
1720             }
1721
1722         case TSETN :
1723         case TSETZ :
1724         case TSETQ :
1725         case TSETR :
1726         case TSETC :
1727         case THBAR :
1728         case TLAMBDABAR :
1729         case TBACKEPSILON :
1730         case TALEPH :
1731         case TIM :
1732         case TRE :
1733         case TWP :
1734         case TEMPTYSET :
1735         case TINFINITY :
1736             {
1737                 auto pNode = std::make_unique<SmMathIdentifierNode>(m_aCurToken);
1738                 NextToken();
1739                 return std::unique_ptr<SmNode>(pNode.release());
1740             }
1741
1742         case TPLACE:
1743             {
1744                 auto pNode = std::make_unique<SmPlaceNode>(m_aCurToken);
1745                 NextToken();
1746                 return std::unique_ptr<SmNode>(pNode.release());
1747             }
1748
1749         case TSPECIAL:
1750             return DoSpecial();
1751
1752         case TBINOM:
1753             return DoBinom();
1754
1755         case TFRAC:
1756             return DoFrac();
1757
1758         case TSTACK:
1759             return DoStack();
1760
1761         case TMATRIX:
1762             return DoMatrix();
1763
1764         case THEX:
1765             NextTokenFontSize();
1766             if( m_aCurToken.eType == THEX )
1767             {
1768                 auto pTextNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_NUMBER );
1769                 NextToken();
1770                 return pTextNode;
1771             }
1772             else return DoError(SmParseError::NumberExpected);
1773         default:
1774             if (TokenInGroup(TG::LBrace)) return DoBrace();
1775             if (TokenInGroup(TG::Oper)) return DoOperator();
1776             if (TokenInGroup(TG::UnOper)) return DoUnOper();
1777             if ( TokenInGroup(TG::Attribute) || TokenInGroup(TG::FontAttr) )
1778             {
1779                 std::stack<std::unique_ptr<SmStructureNode>> aStack;
1780                 bool    bIsAttr;
1781                 for (;;)
1782                 {
1783                     bIsAttr = TokenInGroup(TG::Attribute);
1784                     if (!bIsAttr && !TokenInGroup(TG::FontAttr))
1785                         break;
1786                     aStack.push(bIsAttr ? DoAttribut() : DoFontAttribut());
1787                 }
1788
1789                 auto xFirstNode = DoPower();
1790                 while (!aStack.empty())
1791                 {
1792                     std::unique_ptr<SmStructureNode> xNode = std::move(aStack.top());
1793                     aStack.pop();
1794                     xNode->SetSubNodes(nullptr, std::move(xFirstNode));
1795                     xFirstNode = std::move(xNode);
1796                 }
1797                 return xFirstNode;
1798             }
1799             if (TokenInGroup(TG::Function))
1800                 return DoFunction();
1801             return DoError(SmParseError::UnexpectedChar);
1802     }
1803 }
1804
1805 std::unique_ptr<SmNode> SmParser::DoEscape()
1806 {
1807     DepthProtect aDepthGuard(m_nParseDepth);
1808     if (aDepthGuard.TooDeep())
1809         throw std::range_error("parser depth limit");
1810
1811     NextToken();
1812
1813     switch (m_aCurToken.eType)
1814     {
1815         case TLPARENT :
1816         case TRPARENT :
1817         case TLBRACKET :
1818         case TRBRACKET :
1819         case TLDBRACKET :
1820         case TRDBRACKET :
1821         case TLBRACE :
1822         case TLGROUP :
1823         case TRBRACE :
1824         case TRGROUP :
1825         case TLANGLE :
1826         case TRANGLE :
1827         case TLCEIL :
1828         case TRCEIL :
1829         case TLFLOOR :
1830         case TRFLOOR :
1831         case TLLINE :
1832         case TRLINE :
1833         case TLDLINE :
1834         case TRDLINE :
1835             {
1836                 auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1837                 NextToken();
1838                 return std::unique_ptr<SmNode>(pNode.release());
1839             }
1840         default:
1841             return DoError(SmParseError::UnexpectedToken);
1842     }
1843 }
1844
1845 std::unique_ptr<SmOperNode> SmParser::DoOperator()
1846 {
1847     DepthProtect aDepthGuard(m_nParseDepth);
1848     if (aDepthGuard.TooDeep())
1849         throw std::range_error("parser depth limit");
1850
1851     assert(TokenInGroup(TG::Oper));
1852
1853     auto xSNode = std::make_unique<SmOperNode>(m_aCurToken);
1854
1855     // get operator
1856     auto xOperator = DoOper();
1857
1858     if (m_aCurToken.nGroup == TG::Limit || m_aCurToken.nGroup == TG::Power)
1859         xOperator = DoSubSup(m_aCurToken.nGroup, xOperator.release());
1860
1861     // get argument
1862     auto xArg = DoPower();
1863
1864     xSNode->SetSubNodes(std::move(xOperator), std::move(xArg));
1865     return xSNode;
1866 }
1867
1868 std::unique_ptr<SmNode> SmParser::DoOper()
1869 {
1870     DepthProtect aDepthGuard(m_nParseDepth);
1871     if (aDepthGuard.TooDeep())
1872         throw std::range_error("parser depth limit");
1873
1874     SmTokenType  eType (m_aCurToken.eType);
1875     std::unique_ptr<SmNode> pNode;
1876
1877     switch (eType)
1878     {
1879         case TSUM :
1880         case TPROD :
1881         case TCOPROD :
1882         case TINT :
1883         case TINTD :
1884         case TIINT :
1885         case TIIINT :
1886         case TLINT :
1887         case TLLINT :
1888         case TLLLINT :
1889             pNode.reset(new SmMathSymbolNode(m_aCurToken));
1890             break;
1891
1892         case TLIM :
1893         case TLIMSUP :
1894         case TLIMINF :
1895             {
1896                 const char* pLim = nullptr;
1897                 switch (eType)
1898                 {
1899                     case TLIM :     pLim = "lim";       break;
1900                     case TLIMSUP :  pLim = "lim sup";   break;
1901                     case TLIMINF :  pLim = "lim inf";   break;
1902                     default:
1903                         break;
1904                 }
1905                 if( pLim )
1906                     m_aCurToken.aText = OUString::createFromAscii(pLim);
1907                 pNode.reset(new SmTextNode(m_aCurToken, FNT_TEXT));
1908             }
1909             break;
1910
1911         case TOPER :
1912             NextToken();
1913             OSL_ENSURE(m_aCurToken.eType == TSPECIAL, "Sm: wrong token");
1914             m_aCurToken.eType = TOPER;
1915             pNode.reset(new SmGlyphSpecialNode(m_aCurToken));
1916             break;
1917
1918         default :
1919             assert(false && "unknown case");
1920     }
1921
1922     NextToken();
1923     return pNode;
1924 }
1925
1926 std::unique_ptr<SmStructureNode> SmParser::DoUnOper()
1927 {
1928     DepthProtect aDepthGuard(m_nParseDepth);
1929     if (aDepthGuard.TooDeep())
1930         throw std::range_error("parser depth limit");
1931
1932     assert(TokenInGroup(TG::UnOper));
1933
1934     SmToken      aNodeToken = m_aCurToken;
1935     SmTokenType  eType      = m_aCurToken.eType;
1936     bool         bIsPostfix = eType == TFACT;
1937
1938     std::unique_ptr<SmStructureNode> xSNode;
1939     std::unique_ptr<SmNode> xOper;
1940     std::unique_ptr<SmNode> xExtra;
1941     std::unique_ptr<SmNode> xArg;
1942
1943     switch (eType)
1944     {
1945         case TABS :
1946         case TSQRT :
1947             NextToken();
1948             break;
1949
1950         case TNROOT :
1951             NextToken();
1952             xExtra = DoPower();
1953             break;
1954
1955         case TUOPER :
1956             NextToken();
1957             //Let the glyph know what it is...
1958             m_aCurToken.eType = TUOPER;
1959             m_aCurToken.nGroup = TG::UnOper;
1960             xOper = DoGlyphSpecial();
1961             break;
1962
1963         case TPLUS :
1964         case TMINUS :
1965         case TPLUSMINUS :
1966         case TMINUSPLUS :
1967         case TNEG :
1968         case TFACT :
1969             xOper = DoOpSubSup();
1970             break;
1971
1972         default :
1973             assert(false);
1974     }
1975
1976     // get argument
1977     xArg = DoPower();
1978
1979     if (eType == TABS)
1980     {
1981         xSNode.reset(new SmBraceNode(aNodeToken));
1982         xSNode->SetScaleMode(SmScaleMode::Height);
1983
1984         // build nodes for left & right lines
1985         // (text, group, level of the used token are of no interest here)
1986         // we'll use row & column of the keyword for abs
1987         aNodeToken.eType = TABS;
1988
1989         aNodeToken.cMathChar = MS_VERTLINE;
1990         std::unique_ptr<SmNode> xLeft(new SmMathSymbolNode(aNodeToken));
1991         std::unique_ptr<SmNode> xRight(new SmMathSymbolNode(aNodeToken));
1992
1993         xSNode->SetSubNodes(std::move(xLeft), std::move(xArg), std::move(xRight));
1994     }
1995     else if (eType == TSQRT  ||  eType == TNROOT)
1996     {
1997         xSNode.reset(new SmRootNode(aNodeToken));
1998         xOper.reset(new SmRootSymbolNode(aNodeToken));
1999         xSNode->SetSubNodes(std::move(xExtra), std::move(xOper), std::move(xArg));
2000     }
2001     else
2002     {
2003         xSNode.reset(new SmUnHorNode(aNodeToken));
2004         if (bIsPostfix)
2005             xSNode->SetSubNodes(std::move(xArg), std::move(xOper));
2006         else
2007         {
2008             // prefix operator
2009             xSNode->SetSubNodes(std::move(xOper), std::move(xArg));
2010         }
2011     }
2012     return xSNode;
2013 }
2014
2015 std::unique_ptr<SmStructureNode> SmParser::DoAttribut()
2016 {
2017     DepthProtect aDepthGuard(m_nParseDepth);
2018     if (aDepthGuard.TooDeep())
2019         throw std::range_error("parser depth limit");
2020
2021     assert(TokenInGroup(TG::Attribute));
2022
2023     auto xSNode = std::make_unique<SmAttributNode>(m_aCurToken);
2024     std::unique_ptr<SmNode> xAttr;
2025     SmScaleMode  eScaleMode = SmScaleMode::None;
2026
2027     // get appropriate node for the attribute itself
2028     switch (m_aCurToken.eType)
2029     {   case TUNDERLINE :
2030         case TOVERLINE :
2031         case TOVERSTRIKE :
2032             xAttr.reset(new SmRectangleNode(m_aCurToken));
2033             eScaleMode = SmScaleMode::Width;
2034             break;
2035
2036         case TWIDEVEC :
2037         case TWIDEHARPOON :
2038         case TWIDEHAT :
2039         case TWIDETILDE :
2040             xAttr.reset(new SmMathSymbolNode(m_aCurToken));
2041             eScaleMode = SmScaleMode::Width;
2042             break;
2043
2044         default :
2045             xAttr.reset(new SmMathSymbolNode(m_aCurToken));
2046     }
2047
2048     NextToken();
2049
2050     xSNode->SetSubNodes(std::move(xAttr), nullptr); // the body will be filled later
2051     xSNode->SetScaleMode(eScaleMode);
2052     return xSNode;
2053 }
2054
2055 std::unique_ptr<SmStructureNode> SmParser::DoFontAttribut()
2056 {
2057     DepthProtect aDepthGuard(m_nParseDepth);
2058     if (aDepthGuard.TooDeep())
2059         throw std::range_error("parser depth limit");
2060
2061     assert(TokenInGroup(TG::FontAttr));
2062
2063     switch (m_aCurToken.eType)
2064     {
2065         case TITALIC :
2066         case TNITALIC :
2067         case TBOLD :
2068         case TNBOLD :
2069         case TPHANTOM :
2070             {
2071                 auto pNode = std::make_unique<SmFontNode>(m_aCurToken);
2072                 NextToken();
2073                 return pNode;
2074             }
2075
2076         case TSIZE :
2077             return DoFontSize();
2078
2079         case TFONT :
2080             return DoFont();
2081
2082         case TCOLOR :
2083             return DoColor();
2084
2085         default :
2086             assert(false);
2087             return {};
2088     }
2089 }
2090
2091 std::unique_ptr<SmStructureNode> SmParser::DoColor()
2092 {
2093     DepthProtect aDepthGuard(m_nParseDepth);
2094     if (aDepthGuard.TooDeep()) throw std::range_error("parser depth limit");
2095
2096     assert(m_aCurToken.eType == TCOLOR);
2097     NextTokenColor(false);
2098     SmToken  aToken;
2099
2100     if( m_aCurToken.eType == TDVIPSNAMESCOL ) NextTokenColor(true);
2101     if( m_aCurToken.eType == TERROR ) return DoError(SmParseError::ColorExpected);
2102     if (TokenInGroup(TG::Color))
2103     {
2104         aToken = m_aCurToken;
2105         if( m_aCurToken.eType == TRGB ) //loads r, g and b
2106         {
2107             sal_uInt32 nr, ng, nb, nc;
2108             NextTokenFontSize();
2109             if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2110                 return DoError(SmParseError::ColorExpected);
2111             nr = m_aCurToken.aText.toUInt32();
2112             if( nr > 255 )return DoError(SmParseError::ColorExpected);
2113             NextTokenFontSize();
2114             if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2115                 return DoError(SmParseError::ColorExpected);
2116             ng = m_aCurToken.aText.toUInt32();
2117             if( ng > 255 )return DoError(SmParseError::ColorExpected);
2118             NextTokenFontSize();
2119             if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2120                 return DoError(SmParseError::ColorExpected);
2121             nb = m_aCurToken.aText.toUInt32();
2122             if( nb > 255 )return DoError(SmParseError::ColorExpected);
2123             nc = nb | ng << 8 | nr << 16 | sal_uInt32(0) << 24;
2124             aToken.aText = OUString::number(nc, 16);
2125         }
2126         else if( m_aCurToken.eType == TRGBA ) //loads r, g and b
2127         {
2128             sal_uInt32 nr, na, ng, nb, nc;
2129             NextTokenFontSize();
2130             if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2131                 return DoError(SmParseError::ColorExpected);
2132             nr = m_aCurToken.aText.toUInt32();
2133             if( nr > 255 )return DoError(SmParseError::ColorExpected);
2134             NextTokenFontSize();
2135             if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2136                 return DoError(SmParseError::ColorExpected);
2137             ng = m_aCurToken.aText.toUInt32();
2138             if( ng > 255 )return DoError(SmParseError::ColorExpected);
2139             NextTokenFontSize();
2140             if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2141                 return DoError(SmParseError::ColorExpected);
2142             nb = m_aCurToken.aText.toUInt32();
2143             if( nb > 255 )return DoError(SmParseError::ColorExpected);
2144             NextTokenFontSize();
2145             if( lcl_IsNotWholeNumber(m_aCurToken.aText) )
2146                 return DoError(SmParseError::ColorExpected);
2147             na = m_aCurToken.aText.toUInt32();
2148             if( na > 255 )return DoError(SmParseError::ColorExpected);
2149             nc = nb | ng << 8 | nr << 16 | na << 24;
2150             aToken.aText = OUString::number(nc, 16);
2151         }
2152         else if( m_aCurToken.eType == THEX ) //loads hex code
2153         {
2154             sal_uInt32 nc;
2155             NextTokenFontSize();
2156             if( lcl_IsNotWholeNumber16(m_aCurToken.aText) )
2157                 return DoError(SmParseError::ColorExpected);
2158             nc = m_aCurToken.aText.toUInt32(16);
2159             aToken.aText = OUString::number(nc, 16);
2160         }
2161         NextToken();
2162     }
2163     else return DoError(SmParseError::ColorExpected);
2164
2165     std::unique_ptr<SmStructureNode> xNode;
2166     xNode.reset(new SmFontNode(aToken));
2167     return xNode;
2168 }
2169
2170 std::unique_ptr<SmStructureNode> SmParser::DoFont()
2171 {
2172     DepthProtect aDepthGuard(m_nParseDepth);
2173     if (aDepthGuard.TooDeep())
2174         throw std::range_error("parser depth limit");
2175
2176     assert(m_aCurToken.eType == TFONT);
2177
2178     std::unique_ptr<SmStructureNode> xNode;
2179     // last font rules, get that one
2180     SmToken  aToken;
2181     do
2182     {   NextToken();
2183
2184         if (TokenInGroup(TG::Font))
2185         {   aToken = m_aCurToken;
2186             NextToken();
2187         }
2188         else
2189         {
2190             return DoError(SmParseError::FontExpected);
2191         }
2192     } while (m_aCurToken.eType == TFONT);
2193
2194     xNode.reset(new SmFontNode(aToken));
2195     return xNode;
2196 }
2197
2198 std::unique_ptr<SmStructureNode> SmParser::DoFontSize()
2199 {
2200     DepthProtect aDepthGuard(m_nParseDepth);
2201     if (aDepthGuard.TooDeep()) throw std::range_error("parser depth limit");
2202     std::unique_ptr<SmFontNode> pFontNode(new SmFontNode(m_aCurToken));
2203     NextTokenFontSize();
2204     FontSizeType Type;
2205
2206     switch (m_aCurToken.eType)
2207     {
2208         case THEX:      Type = FontSizeType::ABSOLUT;  break;
2209         case TPLUS:     Type = FontSizeType::PLUS;     break;
2210         case TMINUS:    Type = FontSizeType::MINUS;    break;
2211         case TMULTIPLY: Type = FontSizeType::MULTIPLY; break;
2212         case TDIVIDEBY: Type = FontSizeType::DIVIDE;   break;
2213
2214         default:
2215             return DoError(SmParseError::SizeExpected);
2216     }
2217
2218     if (Type != FontSizeType::ABSOLUT)
2219     {
2220         NextTokenFontSize();
2221         if (m_aCurToken.eType != THEX) return DoError(SmParseError::SizeExpected);
2222     }
2223
2224     // get number argument
2225     Fraction  aValue( 1 );
2226     if (lcl_IsNumber( m_aCurToken.aText ))
2227     {
2228         aValue = m_aCurToken.aText.toDouble();
2229         //!! Reduce values in order to avoid numerical errors
2230         if (aValue.GetDenominator() > 1000)
2231         {
2232             tools::Long nNum   = aValue.GetNumerator();
2233             tools::Long nDenom = aValue.GetDenominator();
2234             while ( nDenom > 1000 ) //remove big denominator
2235             {
2236                 nNum    /= 10;
2237                 nDenom  /= 10;
2238             }
2239             aValue = Fraction( nNum, nDenom );
2240         }
2241     }
2242     else return DoError(SmParseError::SizeExpected);
2243
2244     pFontNode->SetSizeParameter(aValue, Type);
2245     NextToken();
2246     return pFontNode;
2247 }
2248
2249 std::unique_ptr<SmStructureNode> SmParser::DoBrace()
2250 {
2251     DepthProtect aDepthGuard(m_nParseDepth);
2252     if (aDepthGuard.TooDeep())
2253         throw std::range_error("parser depth limit");
2254
2255     assert(m_aCurToken.eType == TLEFT  ||  TokenInGroup(TG::LBrace));
2256
2257     std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
2258     std::unique_ptr<SmNode> pBody, pLeft, pRight;
2259     SmScaleMode   eScaleMode = SmScaleMode::None;
2260     SmParseError  eError     = SmParseError::None;
2261
2262     if (m_aCurToken.eType == TLEFT)
2263     {   NextToken();
2264
2265         eScaleMode = SmScaleMode::Height;
2266
2267         // check for left bracket
2268         if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
2269         {
2270             pLeft.reset(new SmMathSymbolNode(m_aCurToken));
2271
2272             NextToken();
2273             pBody = DoBracebody(true);
2274
2275             if (m_aCurToken.eType == TRIGHT)
2276             {   NextToken();
2277
2278                 // check for right bracket
2279                 if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
2280                 {
2281                     pRight.reset(new SmMathSymbolNode(m_aCurToken));
2282                     NextToken();
2283                 }
2284                 else
2285                     eError = SmParseError::RbraceExpected;
2286             }
2287             else
2288                 eError = SmParseError::RightExpected;
2289         }
2290         else
2291             eError = SmParseError::LbraceExpected;
2292     }
2293     else
2294     {
2295         assert(TokenInGroup(TG::LBrace));
2296
2297         pLeft.reset(new SmMathSymbolNode(m_aCurToken));
2298
2299         NextToken();
2300         pBody = DoBracebody(false);
2301
2302         SmTokenType  eExpectedType = TUNKNOWN;
2303         switch (pLeft->GetToken().eType)
2304         {   case TLPARENT :     eExpectedType = TRPARENT;   break;
2305             case TLBRACKET :    eExpectedType = TRBRACKET;  break;
2306             case TLBRACE :      eExpectedType = TRBRACE;    break;
2307             case TLDBRACKET :   eExpectedType = TRDBRACKET; break;
2308             case TLLINE :       eExpectedType = TRLINE;     break;
2309             case TLDLINE :      eExpectedType = TRDLINE;    break;
2310             case TLANGLE :      eExpectedType = TRANGLE;    break;
2311             case TLFLOOR :      eExpectedType = TRFLOOR;    break;
2312             case TLCEIL :       eExpectedType = TRCEIL;     break;
2313             case TLRLINE :      eExpectedType = TLRLINE;    break;
2314             case TLRDLINE :     eExpectedType = TLRDLINE;   break;
2315             default :
2316                 SAL_WARN("starmath", "unknown case");
2317             }
2318
2319         if (m_aCurToken.eType == eExpectedType)
2320         {
2321             pRight.reset(new SmMathSymbolNode(m_aCurToken));
2322             NextToken();
2323         }
2324         else
2325             eError = SmParseError::ParentMismatch;
2326     }
2327
2328     if (eError == SmParseError::None)
2329     {
2330         assert(pLeft);
2331         assert(pRight);
2332         xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight));
2333         xSNode->SetScaleMode(eScaleMode);
2334         return xSNode;
2335     }
2336     return DoError(eError);
2337 }
2338
2339 std::unique_ptr<SmBracebodyNode> SmParser::DoBracebody(bool bIsLeftRight)
2340 {
2341     DepthProtect aDepthGuard(m_nParseDepth);
2342     if (aDepthGuard.TooDeep())
2343         throw std::range_error("parser depth limit");
2344
2345     auto pBody = std::make_unique<SmBracebodyNode>(m_aCurToken);
2346
2347     std::vector<std::unique_ptr<SmNode>> aNodes;
2348     // get body if any
2349     if (bIsLeftRight)
2350     {
2351         do
2352         {
2353             if (m_aCurToken.eType == TMLINE)
2354             {
2355                 aNodes.emplace_back(std::make_unique<SmMathSymbolNode>(m_aCurToken));
2356                 NextToken();
2357             }
2358             else if (m_aCurToken.eType != TRIGHT)
2359             {
2360                 aNodes.push_back(DoAlign());
2361                 if (m_aCurToken.eType != TMLINE  &&  m_aCurToken.eType != TRIGHT)
2362                     aNodes.emplace_back(DoError(SmParseError::RightExpected));
2363             }
2364         } while (m_aCurToken.eType != TEND  &&  m_aCurToken.eType != TRIGHT);
2365     }
2366     else
2367     {
2368         do
2369         {
2370             if (m_aCurToken.eType == TMLINE)
2371             {
2372                 aNodes.emplace_back(std::make_unique<SmMathSymbolNode>(m_aCurToken));
2373                 NextToken();
2374             }
2375             else if (!TokenInGroup(TG::RBrace))
2376             {
2377                 aNodes.push_back(DoAlign());
2378                 if (m_aCurToken.eType != TMLINE  &&  !TokenInGroup(TG::RBrace))
2379                     aNodes.emplace_back(DoError(SmParseError::RbraceExpected));
2380             }
2381         } while (m_aCurToken.eType != TEND  &&  !TokenInGroup(TG::RBrace));
2382     }
2383
2384     pBody->SetSubNodes(buildNodeArray(aNodes));
2385     pBody->SetScaleMode(bIsLeftRight ? SmScaleMode::Height : SmScaleMode::None);
2386     return pBody;
2387 }
2388
2389 std::unique_ptr<SmNode> SmParser::DoEvaluate()
2390 {
2391
2392     // Checkout depth and create node
2393     DepthProtect aDepthGuard(m_nParseDepth);
2394     if (aDepthGuard.TooDeep()) throw std::range_error("parser depth limit");
2395     std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
2396     SmToken aToken( TRLINE, MS_VERTLINE, "evaluate", TG::RBrace, 5);
2397     aToken.nRow = m_aCurToken.nRow;
2398     aToken.nCol = m_aCurToken.nCol;
2399
2400     // Parse body && left none
2401     NextToken();
2402     std::unique_ptr<SmNode> pBody = DoPower();
2403     SmToken bToken( TNONE, '\0', "", TG::LBrace, 5);
2404     std::unique_ptr<SmNode> pLeft;
2405     pLeft.reset(new SmMathSymbolNode(bToken));
2406
2407     // Mount nodes
2408     std::unique_ptr<SmNode> pRight;
2409     pRight.reset(new SmMathSymbolNode(aToken));
2410     xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight));
2411     xSNode->SetScaleMode(SmScaleMode::Height); // scalable line
2412
2413     // Parse from to
2414     if ( m_aCurToken.nGroup == TG::Limit )
2415     {
2416         std::unique_ptr<SmNode> rSNode;
2417         rSNode = DoSubSupEvaluate(xSNode.release());
2418         rSNode->GetToken().eType = TEVALUATE;
2419         return rSNode;
2420     }
2421
2422     return xSNode;
2423
2424 }
2425
2426 std::unique_ptr<SmTextNode> SmParser::DoFunction()
2427 {
2428     DepthProtect aDepthGuard(m_nParseDepth);
2429     if (aDepthGuard.TooDeep()) throw std::range_error("parser depth limit");
2430     if( m_aCurToken.eType == TFUNC )
2431     {
2432         NextToken();    // skip "FUNC"-statement
2433         m_aCurToken.eType = TFUNC;
2434         m_aCurToken.nGroup = TG::Function;
2435     }
2436     auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_FUNCTION);
2437     NextToken();
2438     return pNode;
2439 }
2440
2441 std::unique_ptr<SmTableNode> SmParser::DoBinom()
2442 {
2443     DepthProtect aDepthGuard(m_nParseDepth);
2444     if (aDepthGuard.TooDeep())
2445         throw std::range_error("parser depth limit");
2446
2447     auto xSNode = std::make_unique<SmTableNode>(m_aCurToken);
2448
2449     NextToken();
2450
2451     auto xFirst = DoSum();
2452     auto xSecond = DoSum();
2453     xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond));
2454     return xSNode;
2455 }
2456
2457 std::unique_ptr<SmBinVerNode> SmParser::DoFrac()
2458 {
2459     DepthProtect aDepthGuard(m_nParseDepth);
2460     if (aDepthGuard.TooDeep()) throw std::range_error("parser depth limit");
2461
2462     std::unique_ptr<SmBinVerNode> xSNode = std::make_unique<SmBinVerNode>(m_aCurToken);
2463     std::unique_ptr<SmNode> xOper = std::make_unique<SmRectangleNode>(m_aCurToken);
2464
2465     NextToken();
2466
2467     auto xFirst = DoSum();
2468     auto xSecond = DoSum();
2469     xSNode->SetSubNodes(std::move(xFirst), std::move(xOper), std::move(xSecond));
2470     return xSNode;
2471 }
2472
2473 std::unique_ptr<SmStructureNode> SmParser::DoStack()
2474 {
2475     DepthProtect aDepthGuard(m_nParseDepth);
2476     if (aDepthGuard.TooDeep())
2477         throw std::range_error("parser depth limit");
2478
2479     std::unique_ptr<SmStructureNode> xSNode(new SmTableNode(m_aCurToken));
2480     NextToken();
2481     if (m_aCurToken.eType != TLGROUP)
2482         return DoError(SmParseError::LgroupExpected);
2483     std::vector<std::unique_ptr<SmNode>> aExprArr;
2484     do
2485     {
2486         NextToken();
2487         aExprArr.push_back(DoAlign());
2488     }
2489     while (m_aCurToken.eType == TPOUND);
2490
2491     if (m_aCurToken.eType == TRGROUP)
2492         NextToken();
2493     else
2494         aExprArr.emplace_back(DoError(SmParseError::RgroupExpected));
2495
2496     xSNode->SetSubNodes(buildNodeArray(aExprArr));
2497     return xSNode;
2498 }
2499
2500 std::unique_ptr<SmStructureNode> SmParser::DoMatrix()
2501 {
2502     DepthProtect aDepthGuard(m_nParseDepth);
2503     if (aDepthGuard.TooDeep())
2504         throw std::range_error("parser depth limit");
2505
2506     std::unique_ptr<SmMatrixNode> xMNode(new SmMatrixNode(m_aCurToken));
2507     NextToken();
2508     if (m_aCurToken.eType != TLGROUP)
2509         return DoError(SmParseError::LgroupExpected);
2510
2511     std::vector<std::unique_ptr<SmNode>> aExprArr;
2512     do
2513     {
2514         NextToken();
2515         aExprArr.push_back(DoAlign());
2516     }
2517     while (m_aCurToken.eType == TPOUND);
2518
2519     size_t nCol = aExprArr.size();
2520     size_t nRow = 1;
2521     while (m_aCurToken.eType == TDPOUND)
2522     {
2523         NextToken();
2524         for (size_t i = 0; i < nCol; i++)
2525         {
2526             auto xNode = DoAlign();
2527             if (i < (nCol - 1))
2528             {
2529                 if (m_aCurToken.eType == TPOUND)
2530                     NextToken();
2531                 else
2532                     xNode = DoError(SmParseError::PoundExpected);
2533             }
2534             aExprArr.emplace_back(std::move(xNode));
2535         }
2536         ++nRow;
2537     }
2538
2539     if (m_aCurToken.eType == TRGROUP)
2540         NextToken();
2541     else
2542     {
2543         std::unique_ptr<SmNode> xENode(DoError(SmParseError::RgroupExpected));
2544         if (aExprArr.empty())
2545             nRow = nCol = 1;
2546         else
2547             aExprArr.pop_back();
2548         aExprArr.emplace_back(std::move(xENode));
2549     }
2550
2551     xMNode->SetSubNodes(buildNodeArray(aExprArr));
2552     xMNode->SetRowCol(static_cast<sal_uInt16>(nRow),
2553                       static_cast<sal_uInt16>(nCol));
2554     return std::unique_ptr<SmStructureNode>(xMNode.release());
2555 }
2556
2557 std::unique_ptr<SmSpecialNode> SmParser::DoSpecial()
2558 {
2559     DepthProtect aDepthGuard(m_nParseDepth);
2560     if (aDepthGuard.TooDeep())
2561         throw std::range_error("parser depth limit");
2562
2563     bool bReplace = false;
2564     OUString &rName = m_aCurToken.aText;
2565     OUString aNewName;
2566
2567     // conversion of symbol names for 6.0 (XML) file format
2568     // (name change on import / export.
2569     // UI uses localized names XML file format does not.)
2570     if( rName.startsWith("%") )
2571     {
2572         if (IsImportSymbolNames())
2573         {
2574             aNewName = SmLocalizedSymbolData::GetUiSymbolName(rName.copy(1));
2575             bReplace = true;
2576         }
2577         else if (IsExportSymbolNames())
2578         {
2579             aNewName = SmLocalizedSymbolData::GetExportSymbolName(rName.copy(1));
2580             bReplace = true;
2581         }
2582     }
2583     if (!aNewName.isEmpty())
2584         aNewName = "%" + aNewName;
2585
2586
2587     if (bReplace && !aNewName.isEmpty() && rName != aNewName)
2588     {
2589         Replace(GetTokenIndex(), rName.getLength(), aNewName);
2590         rName = aNewName;
2591     }
2592
2593     // add symbol name to list of used symbols
2594     const OUString aSymbolName(m_aCurToken.aText.copy(1));
2595     if (!aSymbolName.isEmpty())
2596         m_aUsedSymbols.insert( aSymbolName );
2597
2598     auto pNode = std::make_unique<SmSpecialNode>(m_aCurToken);
2599     NextToken();
2600     return pNode;
2601 }
2602
2603 std::unique_ptr<SmGlyphSpecialNode> SmParser::DoGlyphSpecial()
2604 {
2605     DepthProtect aDepthGuard(m_nParseDepth);
2606     if (aDepthGuard.TooDeep())
2607         throw std::range_error("parser depth limit");
2608
2609     auto pNode = std::make_unique<SmGlyphSpecialNode>(m_aCurToken);
2610     NextToken();
2611     return pNode;
2612 }
2613
2614 std::unique_ptr<SmExpressionNode> SmParser::DoError(SmParseError eError)
2615 {
2616     DepthProtect aDepthGuard(m_nParseDepth);
2617     if (aDepthGuard.TooDeep())
2618         throw std::range_error("parser depth limit");
2619
2620     auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
2621     std::unique_ptr<SmErrorNode> pErr(new SmErrorNode(m_aCurToken));
2622     xSNode->SetSubNodes(std::move(pErr), nullptr);
2623
2624     AddError(eError, xSNode.get());
2625
2626     NextToken();
2627
2628     return xSNode;
2629 }
2630
2631 // end grammar
2632
2633
2634 SmParser::SmParser()
2635     : m_nCurError( 0 )
2636     , m_nBufferIndex( 0 )
2637     , m_nTokenIndex( 0 )
2638     , m_nRow( 0 )
2639     , m_nColOff( 0 )
2640     , m_bImportSymNames( false )
2641     , m_bExportSymNames( false )
2642     , m_nParseDepth(0)
2643     , m_aNumCC( LanguageTag( LANGUAGE_ENGLISH_US ) )
2644     , m_pSysCC( SM_MOD()->GetSysLocale().GetCharClassPtr() )
2645 {
2646 }
2647
2648 std::unique_ptr<SmTableNode> SmParser::Parse(const OUString &rBuffer)
2649 {
2650     m_aUsedSymbols.clear();
2651
2652     m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2653     m_nBufferIndex  = 0;
2654     m_nTokenIndex   = 0;
2655     m_nRow          = 1;
2656     m_nColOff       = 0;
2657     m_nCurError     = -1;
2658
2659     m_aErrDescList.clear();
2660
2661     NextToken();
2662     return DoTable();
2663 }
2664
2665 std::unique_ptr<SmNode> SmParser::ParseExpression(const OUString &rBuffer)
2666 {
2667     m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2668     m_nBufferIndex  = 0;
2669     m_nTokenIndex   = 0;
2670     m_nRow          = 1;
2671     m_nColOff       = 0;
2672     m_nCurError     = -1;
2673
2674     m_aErrDescList.clear();
2675
2676     NextToken();
2677     return DoExpression();
2678 }
2679
2680
2681 void SmParser::AddError(SmParseError Type, SmNode *pNode)
2682 {
2683     std::unique_ptr<SmErrorDesc> pErrDesc(new SmErrorDesc);
2684
2685     pErrDesc->m_eType = Type;
2686     pErrDesc->m_pNode = pNode;
2687     pErrDesc->m_aText = SmResId(RID_ERR_IDENT);
2688
2689     const char* pRID;
2690     switch (Type)
2691     {
2692         case SmParseError::UnexpectedChar:     pRID = RID_ERR_UNEXPECTEDCHARACTER; break;
2693         case SmParseError::UnexpectedToken:    pRID = RID_ERR_UNEXPECTEDTOKEN;     break;
2694         case SmParseError::PoundExpected:      pRID = RID_ERR_POUNDEXPECTED;       break;
2695         case SmParseError::ColorExpected:      pRID = RID_ERR_COLOREXPECTED;       break;
2696         case SmParseError::LgroupExpected:     pRID = RID_ERR_LGROUPEXPECTED;      break;
2697         case SmParseError::RgroupExpected:     pRID = RID_ERR_RGROUPEXPECTED;      break;
2698         case SmParseError::LbraceExpected:     pRID = RID_ERR_LBRACEEXPECTED;      break;
2699         case SmParseError::RbraceExpected:     pRID = RID_ERR_RBRACEEXPECTED;      break;
2700         case SmParseError::ParentMismatch:     pRID = RID_ERR_PARENTMISMATCH;      break;
2701         case SmParseError::RightExpected:      pRID = RID_ERR_RIGHTEXPECTED;       break;
2702         case SmParseError::FontExpected:       pRID = RID_ERR_FONTEXPECTED;        break;
2703         case SmParseError::SizeExpected:       pRID = RID_ERR_SIZEEXPECTED;        break;
2704         case SmParseError::DoubleAlign:        pRID = RID_ERR_DOUBLEALIGN;         break;
2705         case SmParseError::DoubleSubsupscript: pRID = RID_ERR_DOUBLESUBSUPSCRIPT;  break;
2706         case SmParseError::NumberExpected:     pRID = RID_ERR_NUMBEREXPECTED;      break;
2707         default:
2708             assert(false);
2709             return;
2710     }
2711     pErrDesc->m_aText += SmResId(pRID);
2712
2713     m_aErrDescList.push_back(std::move(pErrDesc));
2714 }
2715
2716
2717 const SmErrorDesc *SmParser::NextError()
2718 {
2719     if ( !m_aErrDescList.empty() )
2720         if (m_nCurError > 0) return m_aErrDescList[ --m_nCurError ].get();
2721         else
2722         {
2723             m_nCurError = 0;
2724             return m_aErrDescList[ m_nCurError ].get();
2725         }
2726     else return nullptr;
2727 }
2728
2729
2730 const SmErrorDesc *SmParser::PrevError()
2731 {
2732     if ( !m_aErrDescList.empty() )
2733         if (m_nCurError < static_cast<int>(m_aErrDescList.size() - 1)) return m_aErrDescList[ ++m_nCurError ].get();
2734         else
2735         {
2736             m_nCurError = static_cast<int>(m_aErrDescList.size() - 1);
2737             return m_aErrDescList[ m_nCurError ].get();
2738         }
2739     else return nullptr;
2740 }
2741
2742
2743 const SmErrorDesc *SmParser::GetError()
2744 {
2745     if ( !m_aErrDescList.empty() )
2746         return m_aErrDescList.front().get();
2747     return nullptr;
2748 }
2749
2750 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */