starmath/source/parse.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <memory>
  21 #include <com/sun/star/i18n/UnicodeType.hpp>
  22 #include <i18nlangtag/lang.h>
  23 #include <tools/lineend.hxx>
  24 #include <unotools/configmgr.hxx>
  25 #include <unotools/syslocale.hxx>
  26 #include <o3tl/make_unique.hxx>
  27 #include <parse.hxx>
  28 #include <strings.hrc>
  29 #include <smmod.hxx>
  30 #include "cfgitem.hxx"
  31 #include <cassert>
  32 #include <stack>
  33
  34 using namespace ::com::sun::star::i18n;
  35
  36
  37 SmToken::SmToken()
  38     : eType(TUNKNOWN)
  39     , cMathChar('\0')
  40     , nGroup(TG::NONE)
  41     , nLevel(0)
  42     , nRow(0)
  43     , nCol(0)
  44 {
  45 }
  46
  47 SmToken::SmToken(SmTokenType eTokenType,
  48                  sal_Unicode cMath,
  49                  const sal_Char* pText,
  50                  TG nTokenGroup,
  51                  sal_uInt16 nTokenLevel)
  52     : aText(OUString::createFromAscii(pText))
  53     , eType(eTokenType)
  54     , cMathChar(cMath)
  55     , nGroup(nTokenGroup)
  56     , nLevel(nTokenLevel)
  57     , nRow(0)
  58     , nCol(0)
  59 {
  60 }
  61
  62
  63 static const SmTokenTableEntry aTokenTable[] =
  64 {
  65     { "Im" , TIM, MS_IM, TG::Standalone, 5 },
  66     { "Re" , TRE, MS_RE, TG::Standalone, 5 },
  67     { "abs", TABS, '\0', TG::UnOper, 13 },
  68     { "arcosh", TACOSH, '\0', TG::Function, 5 },
  69     { "arcoth", TACOTH, '\0', TG::Function, 5 },
  70     { "acute", TACUTE, MS_ACUTE, TG::Attribute, 5 },
  71     { "aleph" , TALEPH, MS_ALEPH, TG::Standalone, 5 },
  72     { "alignb", TALIGNC, '\0', TG::Align, 0},
  73     { "alignc", TALIGNC, '\0', TG::Align, 0},
  74     { "alignl", TALIGNL, '\0', TG::Align, 0},
  75     { "alignm", TALIGNC, '\0', TG::Align, 0},
  76     { "alignr", TALIGNR, '\0', TG::Align, 0},
  77     { "alignt", TALIGNC, '\0', TG::Align, 0},
  78     { "and", TAND, MS_AND, TG::Product, 0},
  79     { "approx", TAPPROX, MS_APPROX, TG::Relation, 0},
  80     { "aqua", TAQUA, '\0', TG::Color, 0},
  81     { "arccos", TACOS, '\0', TG::Function, 5},
  82     { "arccot", TACOT, '\0', TG::Function, 5},
  83     { "arcsin", TASIN, '\0', TG::Function, 5},
  84     { "arctan", TATAN, '\0', TG::Function, 5},
  85     { "arsinh", TASINH, '\0', TG::Function, 5},
  86     { "artanh", TATANH, '\0', TG::Function, 5},
  87     { "backepsilon" , TBACKEPSILON, MS_BACKEPSILON, TG::Standalone, 5},
  88     { "bar", TBAR, MS_BAR, TG::Attribute, 5},
  89     { "binom", TBINOM, '\0', TG::NONE, 5 },
  90     { "black", TBLACK, '\0', TG::Color, 0},
  91     { "blue", TBLUE, '\0', TG::Color, 0},
  92     { "bold", TBOLD, '\0', TG::FontAttr, 5},
  93     { "boper", TBOPER, '\0', TG::Product, 0},
  94     { "breve", TBREVE, MS_BREVE, TG::Attribute, 5},
  95     { "bslash", TBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
  96     { "cdot", TCDOT, MS_CDOT, TG::Product, 0},
  97     { "check", TCHECK, MS_CHECK, TG::Attribute, 5},
  98     { "circ" , TCIRC, MS_CIRC, TG::Standalone, 5},
  99     { "circle", TCIRCLE, MS_CIRCLE, TG::Attribute, 5},
 100     { "color", TCOLOR, '\0', TG::FontAttr, 5},
 101     { "coprod", TCOPROD, MS_COPROD, TG::Oper, 5},
 102     { "cos", TCOS, '\0', TG::Function, 5},
 103     { "cosh", TCOSH, '\0', TG::Function, 5},
 104     { "cot", TCOT, '\0', TG::Function, 5},
 105     { "coth", TCOTH, '\0', TG::Function, 5},
 106     { "csub", TCSUB, '\0', TG::Power, 0},
 107     { "csup", TCSUP, '\0', TG::Power, 0},
 108     { "cyan", TCYAN, '\0', TG::Color, 0},
 109     { "dddot", TDDDOT, MS_DDDOT, TG::Attribute, 5},
 110     { "ddot", TDDOT, MS_DDOT, TG::Attribute, 5},
 111     { "def", TDEF, MS_DEF, TG::Relation, 0},
 112     { "div", TDIV, MS_DIV, TG::Product, 0},
 113     { "divides", TDIVIDES, MS_LINE, TG::Relation, 0},
 114     { "dlarrow" , TDLARROW, MS_DLARROW, TG::Standalone, 5},
 115     { "dlrarrow" , TDLRARROW, MS_DLRARROW, TG::Standalone, 5},
 116     { "dot", TDOT, MS_DOT, TG::Attribute, 5},
 117     { "dotsaxis", TDOTSAXIS, MS_DOTSAXIS, TG::Standalone, 5}, // 5 to continue expression
 118     { "dotsdiag", TDOTSDIAG, MS_DOTSUP, TG::Standalone, 5},
 119     { "dotsdown", TDOTSDOWN, MS_DOTSDOWN, TG::Standalone, 5},
 120     { "dotslow", TDOTSLOW, MS_DOTSLOW, TG::Standalone, 5},
 121     { "dotsup", TDOTSUP, MS_DOTSUP, TG::Standalone, 5},
 122     { "dotsvert", TDOTSVERT, MS_DOTSVERT, TG::Standalone, 5},
 123     { "downarrow" , TDOWNARROW, MS_DOWNARROW, TG::Standalone, 5},
 124     { "drarrow" , TDRARROW, MS_DRARROW, TG::Standalone, 5},
 125     { "emptyset" , TEMPTYSET, MS_EMPTYSET, TG::Standalone, 5},
 126     { "equiv", TEQUIV, MS_EQUIV, TG::Relation, 0},
 127     { "exists", TEXISTS, MS_EXISTS, TG::Standalone, 5},
 128     { "notexists", TNOTEXISTS, MS_NOTEXISTS, TG::Standalone, 5},
 129     { "exp", TEXP, '\0', TG::Function, 5},
 130     { "fact", TFACT, MS_FACT, TG::UnOper, 5},
 131     { "fixed", TFIXED, '\0', TG::Font, 0},
 132     { "font", TFONT, '\0', TG::FontAttr, 5},
 133     { "forall", TFORALL, MS_FORALL, TG::Standalone, 5},
 134     { "from", TFROM, '\0', TG::Limit, 0},
 135     { "fuchsia", TFUCHSIA, '\0', TG::Color, 0},
 136     { "func", TFUNC, '\0', TG::Function, 5},
 137     { "ge", TGE, MS_GE, TG::Relation, 0},
 138     { "geslant", TGESLANT, MS_GESLANT, TG::Relation, 0 },
 139     { "gg", TGG, MS_GG, TG::Relation, 0},
 140     { "grave", TGRAVE, MS_GRAVE, TG::Attribute, 5},
 141     { "gray", TGRAY, '\0', TG::Color, 0},
 142     { "green", TGREEN, '\0', TG::Color, 0},
 143     { "gt", TGT, MS_GT, TG::Relation, 0},
 144     { "hat", THAT, MS_HAT, TG::Attribute, 5},
 145     { "hbar" , THBAR, MS_HBAR, TG::Standalone, 5},
 146     { "iiint", TIIINT, MS_IIINT, TG::Oper, 5},
 147     { "iint", TIINT, MS_IINT, TG::Oper, 5},
 148     { "in", TIN, MS_IN, TG::Relation, 0},
 149     { "infinity" , TINFINITY, MS_INFINITY, TG::Standalone, 5},
 150     { "infty" , TINFINITY, MS_INFINITY, TG::Standalone, 5},
 151     { "int", TINT, MS_INT, TG::Oper, 5},
 152     { "intd", TINTD, MS_INT, TG::Oper, 5},
 153     { "intersection", TINTERSECT, MS_INTERSECT, TG::Product, 0},
 154     { "ital", TITALIC, '\0', TG::FontAttr, 5},
 155     { "italic", TITALIC, '\0', TG::FontAttr, 5},
 156     { "lambdabar" , TLAMBDABAR, MS_LAMBDABAR, TG::Standalone, 5},
 157     { "langle", TLANGLE, MS_LMATHANGLE, TG::LBrace, 5},
 158     { "lbrace", TLBRACE, MS_LBRACE, TG::LBrace, 5},
 159     { "lceil", TLCEIL, MS_LCEIL, TG::LBrace, 5},
 160     { "ldbracket", TLDBRACKET, MS_LDBRACKET, TG::LBrace, 5},
 161     { "ldline", TLDLINE, MS_DVERTLINE, TG::LBrace, 5},
 162     { "le", TLE, MS_LE, TG::Relation, 0},
 163     { "left", TLEFT, '\0', TG::NONE, 5},
 164     { "leftarrow" , TLEFTARROW, MS_LEFTARROW, TG::Standalone, 5},
 165     { "leslant", TLESLANT, MS_LESLANT, TG::Relation, 0 },
 166     { "lfloor", TLFLOOR, MS_LFLOOR, TG::LBrace, 5},
 167     { "lim", TLIM, '\0', TG::Oper, 5},
 168     { "lime", TLIME, '\0', TG::Color, 0},
 169     { "liminf", TLIMINF, '\0', TG::Oper, 5},
 170     { "limsup", TLIMSUP, '\0', TG::Oper, 5},
 171     { "lint", TLINT, MS_LINT, TG::Oper, 5},
 172     { "ll", TLL, MS_LL, TG::Relation, 0},
 173     { "lline", TLLINE, MS_VERTLINE, TG::LBrace, 5},
 174     { "llint", TLLINT, MS_LLINT, TG::Oper, 5},
 175     { "lllint", TLLLINT, MS_LLLINT, TG::Oper, 5},
 176     { "ln", TLN, '\0', TG::Function, 5},
 177     { "log", TLOG, '\0', TG::Function, 5},
 178     { "lsub", TLSUB, '\0', TG::Power, 0},
 179     { "lsup", TLSUP, '\0', TG::Power, 0},
 180     { "lt", TLT, MS_LT, TG::Relation, 0},
 181     { "magenta", TMAGENTA, '\0', TG::Color, 0},
 182     { "maroon", TMAROON, '\0', TG::Color, 0},
 183     { "matrix", TMATRIX, '\0', TG::NONE, 5},
 184     { "minusplus", TMINUSPLUS, MS_MINUSPLUS, TG::UnOper | TG::Sum, 5},
 185     { "mline", TMLINE, MS_VERTLINE, TG::NONE, 0},      //! not in TG::RBrace, Level 0
 186     { "nabla", TNABLA, MS_NABLA, TG::Standalone, 5},
 187     { "navy", TNAVY, '\0', TG::Color, 0},
 188     { "nbold", TNBOLD, '\0', TG::FontAttr, 5},
 189     { "ndivides", TNDIVIDES, MS_NDIVIDES, TG::Relation, 0},
 190     { "neg", TNEG, MS_NEG, TG::UnOper, 5 },
 191     { "neq", TNEQ, MS_NEQ, TG::Relation, 0},
 192     { "newline", TNEWLINE, '\0', TG::NONE, 0},
 193     { "ni", TNI, MS_NI, TG::Relation, 0},
 194     { "nitalic", TNITALIC, '\0', TG::FontAttr, 5},
 195     { "none", TNONE, '\0', TG::LBrace | TG::RBrace, 0},
 196     { "nospace", TNOSPACE, '\0', TG::Standalone, 5},
 197     { "notin", TNOTIN, MS_NOTIN, TG::Relation, 0},
 198     { "nroot", TNROOT, MS_SQRT, TG::UnOper, 5},
 199     { "nsubset", TNSUBSET, MS_NSUBSET, TG::Relation, 0 },
 200     { "nsupset", TNSUPSET, MS_NSUPSET, TG::Relation, 0 },
 201     { "nsubseteq", TNSUBSETEQ, MS_NSUBSETEQ, TG::Relation, 0 },
 202     { "nsupseteq", TNSUPSETEQ, MS_NSUPSETEQ, TG::Relation, 0 },
 203     { "odivide", TODIVIDE, MS_ODIVIDE, TG::Product, 0},
 204     { "odot", TODOT, MS_ODOT, TG::Product, 0},
 205     { "olive", TOLIVE, '\0', TG::Color, 0},
 206     { "ominus", TOMINUS, MS_OMINUS, TG::Sum, 0},
 207     { "oper", TOPER, '\0', TG::Oper, 5},
 208     { "oplus", TOPLUS, MS_OPLUS, TG::Sum, 0},
 209     { "or", TOR, MS_OR, TG::Sum, 0},
 210     { "ortho", TORTHO, MS_ORTHO, TG::Relation, 0},
 211     { "otimes", TOTIMES, MS_OTIMES, TG::Product, 0},
 212     { "over", TOVER, '\0', TG::Product, 0},
 213     { "overbrace", TOVERBRACE, MS_OVERBRACE, TG::Product, 5},
 214     { "overline", TOVERLINE, '\0', TG::Attribute, 5},
 215     { "overstrike", TOVERSTRIKE, '\0', TG::Attribute, 5},
 216     { "owns", TNI, MS_NI, TG::Relation, 0},
 217     { "parallel", TPARALLEL, MS_DLINE, TG::Relation, 0},
 218     { "partial", TPARTIAL, MS_PARTIAL, TG::Standalone, 5 },
 219     { "phantom", TPHANTOM, '\0', TG::FontAttr, 5},
 220     { "plusminus", TPLUSMINUS, MS_PLUSMINUS, TG::UnOper | TG::Sum, 5},
 221     { "prec", TPRECEDES, MS_PRECEDES, TG::Relation, 0 },
 222     { "preccurlyeq", TPRECEDESEQUAL, MS_PRECEDESEQUAL, TG::Relation, 0 },
 223     { "precsim", TPRECEDESEQUIV, MS_PRECEDESEQUIV, TG::Relation, 0 },
 224     { "nprec", TNOTPRECEDES, MS_NOTPRECEDES, TG::Relation, 0 },
 225     { "prod", TPROD, MS_PROD, TG::Oper, 5},
 226     { "prop", TPROP, MS_PROP, TG::Relation, 0},
 227     { "purple", TPURPLE, '\0', TG::Color, 0},
 228     { "rangle", TRANGLE, MS_RMATHANGLE, TG::RBrace, 0},  //! 0 to terminate expression
 229     { "rbrace", TRBRACE, MS_RBRACE, TG::RBrace, 0},
 230     { "rceil", TRCEIL, MS_RCEIL, TG::RBrace, 0},
 231     { "rdbracket", TRDBRACKET, MS_RDBRACKET, TG::RBrace, 0},
 232     { "rdline", TRDLINE, MS_DVERTLINE, TG::RBrace, 0},
 233     { "red", TRED, '\0', TG::Color, 0},
 234     { "rfloor", TRFLOOR, MS_RFLOOR, TG::RBrace, 0},  //! 0 to terminate expression
 235     { "right", TRIGHT, '\0', TG::NONE, 0},
 236     { "rightarrow" , TRIGHTARROW, MS_RIGHTARROW, TG::Standalone, 5},
 237     { "rline", TRLINE, MS_VERTLINE, TG::RBrace, 0},  //! 0 to terminate expression
 238     { "rsub", TRSUB, '\0', TG::Power, 0},
 239     { "rsup", TRSUP, '\0', TG::Power, 0},
 240     { "sans", TSANS, '\0', TG::Font, 0},
 241     { "serif", TSERIF, '\0', TG::Font, 0},
 242     { "setC" , TSETC, MS_SETC, TG::Standalone, 5},
 243     { "setN" , TSETN, MS_SETN, TG::Standalone, 5},
 244     { "setQ" , TSETQ, MS_SETQ, TG::Standalone, 5},
 245     { "setR" , TSETR, MS_SETR, TG::Standalone, 5},
 246     { "setZ" , TSETZ, MS_SETZ, TG::Standalone, 5},
 247     { "setminus", TBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
 248     { "silver", TSILVER, '\0', TG::Color, 0},
 249     { "sim", TSIM, MS_SIM, TG::Relation, 0},
 250     { "simeq", TSIMEQ, MS_SIMEQ, TG::Relation, 0},
 251     { "sin", TSIN, '\0', TG::Function, 5},
 252     { "sinh", TSINH, '\0', TG::Function, 5},
 253     { "size", TSIZE, '\0', TG::FontAttr, 5},
 254     { "slash", TSLASH, MS_SLASH, TG::Product, 0 },
 255     { "sqrt", TSQRT, MS_SQRT, TG::UnOper, 5},
 256     { "stack", TSTACK, '\0', TG::NONE, 5},
 257     { "sub", TRSUB, '\0', TG::Power, 0},
 258     { "subset", TSUBSET, MS_SUBSET, TG::Relation, 0},
 259     { "succ", TSUCCEEDS, MS_SUCCEEDS, TG::Relation, 0 },
 260     { "succcurlyeq", TSUCCEEDSEQUAL, MS_SUCCEEDSEQUAL, TG::Relation, 0 },
 261     { "succsim", TSUCCEEDSEQUIV, MS_SUCCEEDSEQUIV, TG::Relation, 0 },
 262     { "nsucc", TNOTSUCCEEDS, MS_NOTSUCCEEDS, TG::Relation, 0 },
 263     { "subseteq", TSUBSETEQ, MS_SUBSETEQ, TG::Relation, 0},
 264     { "sum", TSUM, MS_SUM, TG::Oper, 5},
 265     { "sup", TRSUP, '\0', TG::Power, 0},
 266     { "supset", TSUPSET, MS_SUPSET, TG::Relation, 0},
 267     { "supseteq", TSUPSETEQ, MS_SUPSETEQ, TG::Relation, 0},
 268     { "tan", TTAN, '\0', TG::Function, 5},
 269     { "tanh", TTANH, '\0', TG::Function, 5},
 270     { "teal", TTEAL, '\0', TG::Color, 0},
 271     { "tilde", TTILDE, MS_TILDE, TG::Attribute, 5},
 272     { "times", TTIMES, MS_TIMES, TG::Product, 0},
 273     { "to", TTO, '\0', TG::Limit, 0},
 274     { "toward", TTOWARD, MS_RIGHTARROW, TG::Relation, 0},
 275     { "transl", TTRANSL, MS_TRANSL, TG::Relation, 0},
 276     { "transr", TTRANSR, MS_TRANSR, TG::Relation, 0},
 277     { "underbrace", TUNDERBRACE, MS_UNDERBRACE, TG::Product, 5},
 278     { "underline", TUNDERLINE, '\0', TG::Attribute, 5},
 279     { "union", TUNION, MS_UNION, TG::Sum, 0},
 280     { "uoper", TUOPER, '\0', TG::UnOper, 5},
 281     { "uparrow" , TUPARROW, MS_UPARROW, TG::Standalone, 5},
 282     { "vec", TVEC, MS_VEC, TG::Attribute, 5},
 283     { "white", TWHITE, '\0', TG::Color, 0},
 284     { "widebslash", TWIDEBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
 285     { "widehat", TWIDEHAT, MS_HAT, TG::Attribute, 5},
 286     { "widetilde", TWIDETILDE, MS_TILDE, TG::Attribute, 5},
 287     { "wideslash", TWIDESLASH, MS_SLASH, TG::Product, 0 },
 288     { "widevec", TWIDEVEC, MS_VEC, TG::Attribute, 5},
 289     { "wp" , TWP, MS_WP, TG::Standalone, 5},
 290     { "yellow", TYELLOW, '\0', TG::Color, 0}
 291 };
 292
 293 const SmTokenTableEntry * SmParser::GetTokenTableEntry( const OUString &rName )
 294 {
 295     if (!rName.isEmpty())
 296     {
 297         for (auto const &token : aTokenTable)
 298         {
 299             if (rName.equalsIgnoreAsciiCaseAscii( token.pIdent ))
 300                 return &token;
 301         }
 302     }
 303     return nullptr;
 304 }
 305
 306 namespace {
 307
 308 bool IsDelimiter( const OUString &rTxt, sal_Int32 nPos )
 309     // returns 'true' iff cChar is '\0' or a delimiter
 310 {
 311     assert(nPos <= rTxt.getLength()); //index out of range
 312
 313     if (nPos == rTxt.getLength())
 314         return true;
 315
 316     sal_Unicode cChar = rTxt[nPos];
 317
 318     // check if 'cChar' is in the delimiter table
 319     static const sal_Unicode aDelimiterTable[] =
 320     {
 321         ' ',  '\t', '\n', '\r', '+',  '-',  '*',  '/',  '=',  '#',
 322         '%',  '\\', '"',  '~',  '`',  '>',  '<',  '&',  '|',  '(',
 323         ')',  '{',  '}',  '[',  ']',  '^',  '_'
 324     };
 325     for (auto const &cDelimiter : aDelimiterTable)
 326     {
 327         if (cDelimiter == cChar)
 328             return true;
 329     }
 330
 331     sal_Int16 nTypJp = SM_MOD()->GetSysLocale().GetCharClass().getType( rTxt, nPos );
 332     return ( nTypJp == css::i18n::UnicodeType::SPACE_SEPARATOR ||
 333              nTypJp == css::i18n::UnicodeType::CONTROL);
 334 }
 335
 336 }
 337
 338 void SmParser::Replace( sal_Int32 nPos, sal_Int32 nLen, const OUString &rText )
 339 {
 340     assert( nPos + nLen <= m_aBufferString.getLength() );
 341
 342     m_aBufferString = m_aBufferString.replaceAt( nPos, nLen, rText );
 343     sal_Int32 nChg = rText.getLength() - nLen;
 344     m_nBufferIndex = m_nBufferIndex + nChg;
 345     m_nTokenIndex = m_nTokenIndex + nChg;
 346 }
 347
 348 void SmParser::NextToken()
 349 {
 350     // First character may be any alphabetic
 351     static const sal_Int32 coStartFlags =
 352         KParseTokens::ANY_LETTER |
 353         KParseTokens::IGNORE_LEADING_WS;
 354
 355     // Continuing characters may be any alphabetic
 356     static const sal_Int32 coContFlags =
 357         (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
 358         | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
 359
 360     // user-defined char continuing characters may be any alphanumeric or dot.
 361     static const sal_Int32 coUserDefinedCharContFlags =
 362         KParseTokens::ANY_LETTER_OR_NUMBER |
 363         KParseTokens::ASC_DOT |
 364         KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
 365
 366     // First character for numbers, may be any numeric or dot
 367     static const sal_Int32 coNumStartFlags =
 368         KParseTokens::ASC_DIGIT |
 369         KParseTokens::ASC_DOT |
 370         KParseTokens::IGNORE_LEADING_WS;
 371
 372     // Continuing characters for numbers, may be any numeric or dot.
 373     static const sal_Int32 coNumContFlags =
 374         coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS;
 375
 376     sal_Int32   nBufLen = m_aBufferString.getLength();
 377     ParseResult aRes;
 378     sal_Int32   nRealStart;
 379     bool        bCont;
 380     do
 381     {
 382         // skip white spaces
 383         while (UnicodeType::SPACE_SEPARATOR ==
 384                         m_pSysCC->getType( m_aBufferString, m_nBufferIndex ))
 385            ++m_nBufferIndex;
 386
 387         // Try to parse a number in a locale-independent manner using
 388         // '.' as decimal separator.
 389         // See https://bz.apache.org/ooo/show_bug.cgi?id=45779
 390         aRes = m_aNumCC.parsePredefinedToken(KParseType::ASC_NUMBER,
 391                                         m_aBufferString, m_nBufferIndex,
 392                                         coNumStartFlags, "",
 393                                         coNumContFlags, "");
 394
 395         if (aRes.TokenType == 0)
 396         {
 397             // Try again with the default token parsing.
 398             aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex,
 399                                      coStartFlags, "",
 400                                      coContFlags, "");
 401         }
 402
 403         nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
 404         m_nBufferIndex = nRealStart;
 405
 406         bCont = false;
 407         if ( aRes.TokenType == 0  &&
 408                 nRealStart < nBufLen &&
 409                 '\n' == m_aBufferString[ nRealStart ] )
 410         {
 411             // keep data needed for tokens row and col entry up to date
 412             ++m_nRow;
 413             m_nBufferIndex = m_nColOff = nRealStart + 1;
 414             bCont = true;
 415         }
 416         else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
 417         {
 418             if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
 419             {
 420                 //SkipComment
 421                 m_nBufferIndex = nRealStart + 2;
 422                 while (m_nBufferIndex < nBufLen  &&
 423                     '\n' != m_aBufferString[ m_nBufferIndex ])
 424                     ++m_nBufferIndex;
 425                 bCont = true;
 426             }
 427         }
 428
 429     } while (bCont);
 430
 431     // set index of current token
 432     m_nTokenIndex = m_nBufferIndex;
 433
 434     m_aCurToken.nRow   = m_nRow;
 435     m_aCurToken.nCol   = nRealStart - m_nColOff + 1;
 436
 437     bool bHandled = true;
 438     if (nRealStart >= nBufLen)
 439     {
 440         m_aCurToken.eType    = TEND;
 441         m_aCurToken.cMathChar = '\0';
 442         m_aCurToken.nGroup       = TG::NONE;
 443         m_aCurToken.nLevel       = 0;
 444         m_aCurToken.aText.clear();
 445     }
 446     else if (aRes.TokenType & KParseType::ANY_NUMBER)
 447     {
 448         assert(aRes.EndPos > 0);
 449         if ( m_aBufferString[aRes.EndPos-1] == ',' &&
 450              aRes.EndPos < nBufLen &&
 451              m_pSysCC->getType( m_aBufferString, aRes.EndPos ) != UnicodeType::SPACE_SEPARATOR )
 452         {
 453             // Comma followed by a non-space char is unlikely for decimal/thousands separator.
 454             --aRes.EndPos;
 455         }
 456         sal_Int32 n = aRes.EndPos - nRealStart;
 457         assert(n >= 0);
 458         m_aCurToken.eType      = TNUMBER;
 459         m_aCurToken.cMathChar  = '\0';
 460         m_aCurToken.nGroup     = TG::NONE;
 461         m_aCurToken.nLevel     = 5;
 462         m_aCurToken.aText      = m_aBufferString.copy( nRealStart, n );
 463
 464         SAL_WARN_IF( !IsDelimiter( m_aBufferString, aRes.EndPos ), "starmath", "identifier really finished? (compatibility!)" );
 465     }
 466     else if (aRes.TokenType & KParseType::DOUBLE_QUOTE_STRING)
 467     {
 468         m_aCurToken.eType      = TTEXT;
 469         m_aCurToken.cMathChar  = '\0';
 470         m_aCurToken.nGroup     = TG::NONE;
 471         m_aCurToken.nLevel     = 5;
 472         m_aCurToken.aText     = aRes.DequotedNameOrString;
 473         m_aCurToken.nRow       = m_nRow;
 474         m_aCurToken.nCol       = nRealStart - m_nColOff + 2;
 475     }
 476     else if (aRes.TokenType & KParseType::IDENTNAME)
 477     {
 478         sal_Int32 n = aRes.EndPos - nRealStart;
 479         assert(n >= 0);
 480         OUString aName( m_aBufferString.copy( nRealStart, n ) );
 481         const SmTokenTableEntry *pEntry = GetTokenTableEntry( aName );
 482
 483         if (pEntry)
 484         {
 485             m_aCurToken.eType      = pEntry->eType;
 486             m_aCurToken.cMathChar  = pEntry->cMathChar;
 487             m_aCurToken.nGroup     = pEntry->nGroup;
 488             m_aCurToken.nLevel     = pEntry->nLevel;
 489             m_aCurToken.aText      = OUString::createFromAscii( pEntry->pIdent );
 490         }
 491         else
 492         {
 493             m_aCurToken.eType      = TIDENT;
 494             m_aCurToken.cMathChar  = '\0';
 495             m_aCurToken.nGroup     = TG::NONE;
 496             m_aCurToken.nLevel     = 5;
 497             m_aCurToken.aText      = aName;
 498
 499             SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos),"starmath", "identifier really finished? (compatibility!)");
 500         }
 501     }
 502     else if (aRes.TokenType == 0  &&  '_' == m_aBufferString[ nRealStart ])
 503     {
 504         m_aCurToken.eType    = TRSUB;
 505         m_aCurToken.cMathChar = '\0';
 506         m_aCurToken.nGroup       = TG::Power;
 507         m_aCurToken.nLevel       = 0;
 508         m_aCurToken.aText = "_";
 509
 510         aRes.EndPos = nRealStart + 1;
 511     }
 512     else if (aRes.TokenType & KParseType::BOOLEAN)
 513     {
 514         sal_Int32   &rnEndPos = aRes.EndPos;
 515         if (rnEndPos - nRealStart <= 2)
 516         {
 517             sal_Unicode ch = m_aBufferString[ nRealStart ];
 518             switch (ch)
 519             {
 520                 case '<':
 521                     {
 522                         if (m_aBufferString.match("<<", nRealStart))
 523                         {
 524                             m_aCurToken.eType    = TLL;
 525                             m_aCurToken.cMathChar = MS_LL;
 526                             m_aCurToken.nGroup       = TG::Relation;
 527                             m_aCurToken.nLevel       = 0;
 528                             m_aCurToken.aText = "<<";
 529
 530                             rnEndPos = nRealStart + 2;
 531                         }
 532                         else if (m_aBufferString.match("<=", nRealStart))
 533                         {
 534                             m_aCurToken.eType    = TLE;
 535                             m_aCurToken.cMathChar = MS_LE;
 536                             m_aCurToken.nGroup       = TG::Relation;
 537                             m_aCurToken.nLevel       = 0;
 538                             m_aCurToken.aText = "<=";
 539
 540                             rnEndPos = nRealStart + 2;
 541                         }
 542                         else if (m_aBufferString.match("<-", nRealStart))
 543                         {
 544                             m_aCurToken.eType    = TLEFTARROW;
 545                             m_aCurToken.cMathChar = MS_LEFTARROW;
 546                             m_aCurToken.nGroup       = TG::Standalone;
 547                             m_aCurToken.nLevel       = 5;
 548                             m_aCurToken.aText = "<-";
 549
 550                             rnEndPos = nRealStart + 2;
 551                         }
 552                         else if (m_aBufferString.match("<>", nRealStart))
 553                         {
 554                             m_aCurToken.eType    = TNEQ;
 555                             m_aCurToken.cMathChar = MS_NEQ;
 556                             m_aCurToken.nGroup       = TG::Relation;
 557                             m_aCurToken.nLevel       = 0;
 558                             m_aCurToken.aText = "<>";
 559
 560                             rnEndPos = nRealStart + 2;
 561                         }
 562                         else if (m_aBufferString.match("<?>", nRealStart))
 563                         {
 564                             m_aCurToken.eType    = TPLACE;
 565                             m_aCurToken.cMathChar = MS_PLACE;
 566                             m_aCurToken.nGroup       = TG::NONE;
 567                             m_aCurToken.nLevel       = 5;
 568                             m_aCurToken.aText = "<?>";
 569
 570                             rnEndPos = nRealStart + 3;
 571                         }
 572                         else
 573                         {
 574                             m_aCurToken.eType    = TLT;
 575                             m_aCurToken.cMathChar = MS_LT;
 576                             m_aCurToken.nGroup       = TG::Relation;
 577                             m_aCurToken.nLevel       = 0;
 578                             m_aCurToken.aText = "<";
 579                         }
 580                     }
 581                     break;
 582                 case '>':
 583                     {
 584                         if (m_aBufferString.match(">=", nRealStart))
 585                         {
 586                             m_aCurToken.eType    = TGE;
 587                             m_aCurToken.cMathChar = MS_GE;
 588                             m_aCurToken.nGroup       = TG::Relation;
 589                             m_aCurToken.nLevel       = 0;
 590                             m_aCurToken.aText = ">=";
 591
 592                             rnEndPos = nRealStart + 2;
 593                         }
 594                         else if (m_aBufferString.match(">>", nRealStart))
 595                         {
 596                             m_aCurToken.eType    = TGG;
 597                             m_aCurToken.cMathChar = MS_GG;
 598                             m_aCurToken.nGroup       = TG::Relation;
 599                             m_aCurToken.nLevel       = 0;
 600                             m_aCurToken.aText = ">>";
 601
 602                             rnEndPos = nRealStart + 2;
 603                         }
 604                         else
 605                         {
 606                             m_aCurToken.eType    = TGT;
 607                             m_aCurToken.cMathChar = MS_GT;
 608                             m_aCurToken.nGroup       = TG::Relation;
 609                             m_aCurToken.nLevel       = 0;
 610                             m_aCurToken.aText = ">";
 611                         }
 612                     }
 613                     break;
 614                 default:
 615                     bHandled = false;
 616             }
 617         }
 618     }
 619     else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
 620     {
 621         sal_Int32   &rnEndPos = aRes.EndPos;
 622         if (rnEndPos - nRealStart == 1)
 623         {
 624             sal_Unicode ch = m_aBufferString[ nRealStart ];
 625             switch (ch)
 626             {
 627                 case '%':
 628                     {
 629                         //! modifies aRes.EndPos
 630
 631                         OSL_ENSURE( rnEndPos >= nBufLen  ||
 632                                     '%' != m_aBufferString[ rnEndPos ],
 633                                 "unexpected comment start" );
 634
 635                         // get identifier of user-defined character
 636                         ParseResult aTmpRes = m_pSysCC->parseAnyToken(
 637                                 m_aBufferString, rnEndPos,
 638                                 KParseTokens::ANY_LETTER,
 639                                 "",
 640                                 coUserDefinedCharContFlags,
 641                                 "" );
 642
 643                         sal_Int32 nTmpStart = rnEndPos + aTmpRes.LeadingWhiteSpace;
 644
 645                         // default setting for the case that no identifier
 646                         // i.e. a valid symbol-name is following the '%'
 647                         // character
 648                         m_aCurToken.eType      = TTEXT;
 649                         m_aCurToken.cMathChar  = '\0';
 650                         m_aCurToken.nGroup     = TG::NONE;
 651                         m_aCurToken.nLevel     = 5;
 652                         m_aCurToken.aText      ="%";
 653                         m_aCurToken.nRow       = m_nRow;
 654                         m_aCurToken.nCol       = nTmpStart - m_nColOff;
 655
 656                         if (aTmpRes.TokenType & KParseType::IDENTNAME)
 657                         {
 658
 659                             sal_Int32 n = aTmpRes.EndPos - nTmpStart;
 660                             m_aCurToken.eType      = TSPECIAL;
 661                             m_aCurToken.aText      = m_aBufferString.copy( nTmpStart-1, n+1 );
 662
 663                             OSL_ENSURE( aTmpRes.EndPos > rnEndPos,
 664                                     "empty identifier" );
 665                             if (aTmpRes.EndPos > rnEndPos)
 666                                 rnEndPos = aTmpRes.EndPos;
 667                             else
 668                                 ++rnEndPos;
 669                         }
 670
 671                         // if no symbol-name was found we start-over with
 672                         // finding the next token right after the '%' sign.
 673                         // I.e. we leave rnEndPos unmodified.
 674                     }
 675                     break;
 676                 case '[':
 677                     {
 678                         m_aCurToken.eType    = TLBRACKET;
 679                         m_aCurToken.cMathChar = MS_LBRACKET;
 680                         m_aCurToken.nGroup       = TG::LBrace;
 681                         m_aCurToken.nLevel       = 5;
 682                         m_aCurToken.aText = "[";
 683                     }
 684                     break;
 685                 case '\\':
 686                     {
 687                         m_aCurToken.eType    = TESCAPE;
 688                         m_aCurToken.cMathChar = '\0';
 689                         m_aCurToken.nGroup       = TG::NONE;
 690                         m_aCurToken.nLevel       = 5;
 691                         m_aCurToken.aText = "\\";
 692                     }
 693                     break;
 694                 case ']':
 695                     {
 696                         m_aCurToken.eType    = TRBRACKET;
 697                         m_aCurToken.cMathChar = MS_RBRACKET;
 698                         m_aCurToken.nGroup       = TG::RBrace;
 699                         m_aCurToken.nLevel       = 0;
 700                         m_aCurToken.aText = "]";
 701                     }
 702                     break;
 703                 case '^':
 704                     {
 705                         m_aCurToken.eType    = TRSUP;
 706                         m_aCurToken.cMathChar = '\0';
 707                         m_aCurToken.nGroup       = TG::Power;
 708                         m_aCurToken.nLevel       = 0;
 709                         m_aCurToken.aText = "^";
 710                     }
 711                     break;
 712                 case '`':
 713                     {
 714                         m_aCurToken.eType    = TSBLANK;
 715                         m_aCurToken.cMathChar = '\0';
 716                         m_aCurToken.nGroup       = TG::Blank;
 717                         m_aCurToken.nLevel       = 5;
 718                         m_aCurToken.aText = "`";
 719                     }
 720                     break;
 721                 case '{':
 722                     {
 723                         m_aCurToken.eType    = TLGROUP;
 724                         m_aCurToken.cMathChar = MS_LBRACE;
 725                         m_aCurToken.nGroup       = TG::NONE;
 726                         m_aCurToken.nLevel       = 5;
 727                         m_aCurToken.aText = "{";
 728                     }
 729                     break;
 730                 case '|':
 731                     {
 732                         m_aCurToken.eType    = TOR;
 733                         m_aCurToken.cMathChar = MS_OR;
 734                         m_aCurToken.nGroup       = TG::Sum;
 735                         m_aCurToken.nLevel       = 0;
 736                         m_aCurToken.aText = "|";
 737                     }
 738                     break;
 739                 case '}':
 740                     {
 741                         m_aCurToken.eType    = TRGROUP;
 742                         m_aCurToken.cMathChar = MS_RBRACE;
 743                         m_aCurToken.nGroup       = TG::NONE;
 744                         m_aCurToken.nLevel       = 0;
 745                         m_aCurToken.aText = "}";
 746                     }
 747                     break;
 748                 case '~':
 749                     {
 750                         m_aCurToken.eType    = TBLANK;
 751                         m_aCurToken.cMathChar = '\0';
 752                         m_aCurToken.nGroup       = TG::Blank;
 753                         m_aCurToken.nLevel       = 5;
 754                         m_aCurToken.aText = "~";
 755                     }
 756                     break;
 757                 case '#':
 758                     {
 759                         if (m_aBufferString.match("##", nRealStart))
 760                         {
 761                             m_aCurToken.eType    = TDPOUND;
 762                             m_aCurToken.cMathChar = '\0';
 763                             m_aCurToken.nGroup       = TG::NONE;
 764                             m_aCurToken.nLevel       = 0;
 765                             m_aCurToken.aText = "##";
 766
 767                             rnEndPos = nRealStart + 2;
 768                         }
 769                         else
 770                         {
 771                             m_aCurToken.eType    = TPOUND;
 772                             m_aCurToken.cMathChar = '\0';
 773                             m_aCurToken.nGroup       = TG::NONE;
 774                             m_aCurToken.nLevel       = 0;
 775                             m_aCurToken.aText = "#";
 776                         }
 777                     }
 778                     break;
 779                 case '&':
 780                     {
 781                         m_aCurToken.eType    = TAND;
 782                         m_aCurToken.cMathChar = MS_AND;
 783                         m_aCurToken.nGroup       = TG::Product;
 784                         m_aCurToken.nLevel       = 0;
 785                         m_aCurToken.aText = "&";
 786                     }
 787                     break;
 788                 case '(':
 789                     {
 790                         m_aCurToken.eType    = TLPARENT;
 791                         m_aCurToken.cMathChar = MS_LPARENT;
 792                         m_aCurToken.nGroup       = TG::LBrace;
 793                         m_aCurToken.nLevel       = 5;     //! 0 to continue expression
 794                         m_aCurToken.aText = "(";
 795                     }
 796                     break;
 797                 case ')':
 798                     {
 799                         m_aCurToken.eType    = TRPARENT;
 800                         m_aCurToken.cMathChar = MS_RPARENT;
 801                         m_aCurToken.nGroup       = TG::RBrace;
 802                         m_aCurToken.nLevel       = 0;     //! 0 to terminate expression
 803                         m_aCurToken.aText = ")";
 804                     }
 805                     break;
 806                 case '*':
 807                     {
 808                         m_aCurToken.eType    = TMULTIPLY;
 809                         m_aCurToken.cMathChar = MS_MULTIPLY;
 810                         m_aCurToken.nGroup       = TG::Product;
 811                         m_aCurToken.nLevel       = 0;
 812                         m_aCurToken.aText = "*";
 813                     }
 814                     break;
 815                 case '+':
 816                     {
 817                         if (m_aBufferString.match("+-", nRealStart))
 818                         {
 819                             m_aCurToken.eType    = TPLUSMINUS;
 820                             m_aCurToken.cMathChar = MS_PLUSMINUS;
 821                             m_aCurToken.nGroup       = TG::UnOper | TG::Sum;
 822                             m_aCurToken.nLevel       = 5;
 823                             m_aCurToken.aText = "+-";
 824
 825                             rnEndPos = nRealStart + 2;
 826                         }
 827                         else
 828                         {
 829                             m_aCurToken.eType    = TPLUS;
 830                             m_aCurToken.cMathChar = MS_PLUS;
 831                             m_aCurToken.nGroup       = TG::UnOper | TG::Sum;
 832                             m_aCurToken.nLevel       = 5;
 833                             m_aCurToken.aText = "+";
 834                         }
 835                     }
 836                     break;
 837                 case '-':
 838                     {
 839                         if (m_aBufferString.match("-+", nRealStart))
 840                         {
 841                             m_aCurToken.eType    = TMINUSPLUS;
 842                             m_aCurToken.cMathChar = MS_MINUSPLUS;
 843                             m_aCurToken.nGroup       = TG::UnOper | TG::Sum;
 844                             m_aCurToken.nLevel       = 5;
 845                             m_aCurToken.aText = "-+";
 846
 847                             rnEndPos = nRealStart + 2;
 848                         }
 849                         else if (m_aBufferString.match("->", nRealStart))
 850                         {
 851                             m_aCurToken.eType    = TRIGHTARROW;
 852                             m_aCurToken.cMathChar = MS_RIGHTARROW;
 853                             m_aCurToken.nGroup       = TG::Standalone;
 854                             m_aCurToken.nLevel       = 5;
 855                             m_aCurToken.aText = "->";
 856
 857                             rnEndPos = nRealStart + 2;
 858                         }
 859                         else
 860                         {
 861                             m_aCurToken.eType    = TMINUS;
 862                             m_aCurToken.cMathChar = MS_MINUS;
 863                             m_aCurToken.nGroup       = TG::UnOper | TG::Sum;
 864                             m_aCurToken.nLevel       = 5;
 865                             m_aCurToken.aText = "-";
 866                         }
 867                     }
 868                     break;
 869                 case '.':
 870                     {
 871                         // Only one character? Then it can't be a number.
 872                         if (m_nBufferIndex < m_aBufferString.getLength() - 1)
 873                         {
 874                             // for compatibility with SO5.2
 875                             // texts like .34 ...56 ... h ...78..90
 876                             // will be treated as numbers
 877                             m_aCurToken.eType     = TNUMBER;
 878                             m_aCurToken.cMathChar = '\0';
 879                             m_aCurToken.nGroup    = TG::NONE;
 880                             m_aCurToken.nLevel    = 5;
 881
 882                             sal_Int32 nTxtStart = m_nBufferIndex;
 883                             sal_Unicode cChar;
 884                             // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only
 885                             do
 886                             {
 887                                 cChar = m_aBufferString[ ++m_nBufferIndex ];
 888                             }
 889                             while ( (cChar == '.' || rtl::isAsciiDigit( cChar )) &&
 890                                      ( m_nBufferIndex < m_aBufferString.getLength() - 1 ) );
 891
 892                             m_aCurToken.aText = m_aBufferString.copy( nTxtStart, m_nBufferIndex - nTxtStart );
 893                             aRes.EndPos = m_nBufferIndex;
 894                         }
 895                         else
 896                             bHandled = false;
 897                     }
 898                     break;
 899                 case '/':
 900                     {
 901                         m_aCurToken.eType    = TDIVIDEBY;
 902                         m_aCurToken.cMathChar = MS_SLASH;
 903                         m_aCurToken.nGroup       = TG::Product;
 904                         m_aCurToken.nLevel       = 0;
 905                         m_aCurToken.aText = "/";
 906                     }
 907                     break;
 908                 case '=':
 909                     {
 910                         m_aCurToken.eType    = TASSIGN;
 911                         m_aCurToken.cMathChar = MS_ASSIGN;
 912                         m_aCurToken.nGroup       = TG::Relation;
 913                         m_aCurToken.nLevel       = 0;
 914                         m_aCurToken.aText = "=";
 915                     }
 916                     break;
 917                 default:
 918                     bHandled = false;
 919             }
 920         }
 921     }
 922     else
 923         bHandled = false;
 924
 925     if (!bHandled)
 926     {
 927         m_aCurToken.eType      = TCHARACTER;
 928         m_aCurToken.cMathChar  = '\0';
 929         m_aCurToken.nGroup     = TG::NONE;
 930         m_aCurToken.nLevel     = 5;
 931         m_aCurToken.aText      = m_aBufferString.copy( nRealStart, 1 );
 932
 933         aRes.EndPos = nRealStart + 1;
 934     }
 935
 936     if (TEND != m_aCurToken.eType)
 937         m_nBufferIndex = aRes.EndPos;
 938 }
 939
 940 namespace
 941 {
 942     SmNodeArray buildNodeArray(std::vector<std::unique_ptr<SmNode>>& rSubNodes)
 943     {
 944         SmNodeArray aSubArray(rSubNodes.size());
 945         for (size_t i = 0; i < rSubNodes.size(); ++i)
 946             aSubArray[i] = rSubNodes[i].release();
 947         return aSubArray;
 948     }
 949 }
 950
 951 // grammar
 952
 953 std::unique_ptr<SmTableNode> SmParser::DoTable()
 954 {
 955     DepthProtect aDepthGuard(m_nParseDepth);
 956     if (aDepthGuard.TooDeep())
 957         throw std::range_error("parser depth limit");
 958
 959     std::vector<std::unique_ptr<SmNode>> aLineArray;
 960     aLineArray.push_back(DoLine());
 961     while (m_aCurToken.eType == TNEWLINE)
 962     {
 963         NextToken();
 964         aLineArray.push_back(DoLine());
 965     }
 966     assert(m_aCurToken.eType == TEND);
 967     std::unique_ptr<SmTableNode> xSNode(new SmTableNode(m_aCurToken));
 968     xSNode->SetSubNodes(buildNodeArray(aLineArray));
 969     return xSNode;
 970 }
 971
 972 std::unique_ptr<SmNode> SmParser::DoAlign(bool bUseExtraSpaces)
 973     // parse alignment info (if any), then go on with rest of expression
 974 {
 975     DepthProtect aDepthGuard(m_nParseDepth);
 976     if (aDepthGuard.TooDeep())
 977         throw std::range_error("parser depth limit");
 978
 979     std::unique_ptr<SmStructureNode> xSNode;
 980
 981     if (TokenInGroup(TG::Align))
 982     {
 983         xSNode.reset(new SmAlignNode(m_aCurToken));
 984
 985         NextToken();
 986
 987         // allow for just one align statement in 5.0
 988         if (TokenInGroup(TG::Align))
 989             return std::unique_ptr<SmNode>(DoError(SmParseError::DoubleAlign));
 990     }
 991
 992     auto pNode = DoExpression(bUseExtraSpaces);
 993
 994     if (xSNode)
 995     {
 996         xSNode->SetSubNode(0, pNode.release());
 997         return std::move(xSNode); // this explicit move can be omitted since C++14
 998     }
 999     return pNode;
1000 }
1001
1002 // Postcondition: m_aCurToken.eType == TEND || m_aCurToken.eType == TNEWLINE
1003 std::unique_ptr<SmNode> SmParser::DoLine()
1004 {
1005     DepthProtect aDepthGuard(m_nParseDepth);
1006     if (aDepthGuard.TooDeep())
1007         throw std::range_error("parser depth limit");
1008
1009     std::vector<std::unique_ptr<SmNode>> ExpressionArray;
1010
1011     // start with single expression that may have an alignment statement
1012     // (and go on with expressions that must not have alignment
1013     // statements in 'while' loop below. See also 'Expression()'.)
1014     if (m_aCurToken.eType != TEND  &&  m_aCurToken.eType != TNEWLINE)
1015         ExpressionArray.push_back(DoAlign());
1016
1017     while (m_aCurToken.eType != TEND  &&  m_aCurToken.eType != TNEWLINE)
1018         ExpressionArray.push_back(DoExpression());
1019
1020     //If there's no expression, add an empty one.
1021     //this is to avoid a formula tree without any caret
1022     //positions, in visual formula editor.
1023     if(ExpressionArray.empty())
1024     {
1025         SmToken aTok = SmToken();
1026         aTok.eType = TNEWLINE;
1027         ExpressionArray.emplace_back(std::unique_ptr<SmNode>(new SmExpressionNode(aTok)));
1028     }
1029
1030     auto xSNode = o3tl::make_unique<SmLineNode>(m_aCurToken);
1031     xSNode->SetSubNodes(buildNodeArray(ExpressionArray));
1032     return std::move(xSNode); // this explicit move can be omitted since C++14
1033 }
1034
1035 std::unique_ptr<SmNode> SmParser::DoExpression(bool bUseExtraSpaces)
1036 {
1037     DepthProtect aDepthGuard(m_nParseDepth);
1038     if (aDepthGuard.TooDeep())
1039         throw std::range_error("parser depth limit");
1040
1041     std::vector<std::unique_ptr<SmNode>> RelationArray;
1042     RelationArray.push_back(DoRelation());
1043     while (m_aCurToken.nLevel >= 4)
1044         RelationArray.push_back(DoRelation());
1045
1046     if (RelationArray.size() > 1)
1047     {
1048         std::unique_ptr<SmExpressionNode> xSNode(new SmExpressionNode(m_aCurToken));
1049         xSNode->SetSubNodes(buildNodeArray(RelationArray));
1050         xSNode->SetUseExtraSpaces(bUseExtraSpaces);
1051         // the following explicit move can be omitted since C++14:
1052         // https://stackoverflow.com/questions/22018115/converting-stdunique-ptrderived-to-stdunique-ptrbase
1053         return std::move(xSNode);
1054     }
1055     else
1056     {
1057         // This expression has only one node so just push this node.
1058         return std::move(RelationArray[0]);
1059     }
1060 }
1061
1062 std::unique_ptr<SmNode> SmParser::DoRelation()
1063 {
1064     DepthProtect aDepthGuard(m_nParseDepth);
1065     if (aDepthGuard.TooDeep())
1066         throw std::range_error("parser depth limit");
1067
1068     auto xFirst = DoSum();
1069     while (TokenInGroup(TG::Relation))
1070     {
1071         std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1072         auto xSecond = DoOpSubSup();
1073         auto xThird = DoSum();
1074         xSNode->SetSubNodes(xFirst.release(), xSecond.release(), xThird.release());
1075         xFirst = std::move(xSNode);
1076     }
1077     return xFirst;
1078 }
1079
1080 std::unique_ptr<SmNode> SmParser::DoSum()
1081 {
1082     DepthProtect aDepthGuard(m_nParseDepth);
1083     if (aDepthGuard.TooDeep())
1084         throw std::range_error("parser depth limit");
1085
1086     auto xFirst = DoProduct();
1087     while (TokenInGroup(TG::Sum))
1088     {
1089         std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1090         auto xSecond = DoOpSubSup();
1091         auto xThird = DoProduct();
1092         xSNode->SetSubNodes(xFirst.release(), xSecond.release(), xThird.release());
1093         xFirst = std::move(xSNode);
1094     }
1095     return xFirst;
1096 }
1097
1098 std::unique_ptr<SmNode> SmParser::DoProduct()
1099 {
1100     DepthProtect aDepthGuard(m_nParseDepth);
1101     if (aDepthGuard.TooDeep())
1102         throw std::range_error("parser depth limit");
1103
1104     auto xFirst = DoPower();
1105
1106     int nDepthLimit = 0;
1107
1108     while (TokenInGroup(TG::Product))
1109     {
1110         //this linear loop builds a recursive structure, if it gets
1111         //too deep then later processing, e.g. releasing the tree,
1112         //can exhaust stack
1113         if (nDepthLimit > DEPTH_LIMIT)
1114             throw std::range_error("parser depth limit");
1115
1116         std::unique_ptr<SmStructureNode> xSNode;
1117         std::unique_ptr<SmNode> xOper;
1118         bool bSwitchArgs = false;
1119
1120         SmTokenType eType = m_aCurToken.eType;
1121         switch (eType)
1122         {
1123             case TOVER:
1124                 xSNode.reset(new SmBinVerNode(m_aCurToken));
1125                 xOper.reset(new SmRectangleNode(m_aCurToken));
1126                 NextToken();
1127                 break;
1128
1129             case TBOPER:
1130                 xSNode.reset(new SmBinHorNode(m_aCurToken));
1131
1132                 NextToken();
1133
1134                 //Let the glyph node know it's a binary operation
1135                 m_aCurToken.eType = TBOPER;
1136                 m_aCurToken.nGroup = TG::Product;
1137                 xOper.reset(DoGlyphSpecial());
1138                 break;
1139
1140             case TOVERBRACE :
1141             case TUNDERBRACE :
1142                 xSNode.reset(new SmVerticalBraceNode(m_aCurToken));
1143                 xOper.reset(new SmMathSymbolNode(m_aCurToken));
1144
1145                 NextToken();
1146                 break;
1147
1148             case TWIDEBACKSLASH:
1149             case TWIDESLASH:
1150             {
1151                 SmBinDiagonalNode *pSTmp = new SmBinDiagonalNode(m_aCurToken);
1152                 pSTmp->SetAscending(eType == TWIDESLASH);
1153                 xSNode.reset(pSTmp);
1154
1155                 xOper.reset(new SmPolyLineNode(m_aCurToken));
1156                 NextToken();
1157
1158                 bSwitchArgs = true;
1159                 break;
1160             }
1161
1162             default:
1163                 xSNode.reset(new SmBinHorNode(m_aCurToken));
1164
1165                 xOper = DoOpSubSup();
1166         }
1167
1168         auto xArg = DoPower();
1169
1170         if (bSwitchArgs)
1171         {
1172             //! vgl siehe SmBinDiagonalNode::Arrange
1173             xSNode->SetSubNodes(xFirst.release(), xArg.release(), xOper.release());
1174         }
1175         else
1176         {
1177             xSNode->SetSubNodes(xFirst.release(), xOper.release(), xArg.release());
1178         }
1179         xFirst = std::move(xSNode);
1180         ++nDepthLimit;
1181     }
1182     return xFirst;
1183 }
1184
1185 std::unique_ptr<SmNode> SmParser::DoSubSup(TG nActiveGroup, SmNode *pGivenNode)
1186 {
1187     std::unique_ptr<SmNode> xGivenNode(pGivenNode);
1188     DepthProtect aDepthGuard(m_nParseDepth);
1189     if (aDepthGuard.TooDeep())
1190         throw std::range_error("parser depth limit");
1191
1192     assert(nActiveGroup == TG::Power || nActiveGroup == TG::Limit);
1193     assert(m_aCurToken.nGroup == nActiveGroup);
1194
1195     std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
1196     //! Of course 'm_aCurToken' is just the first sub-/supscript token.
1197     //! It should be of no further interest. The positions of the
1198     //! sub-/supscripts will be identified by the corresponding subnodes
1199     //! index in the 'aSubNodes' array (enum value from 'SmSubSup').
1200
1201     pNode->SetUseLimits(nActiveGroup == TG::Limit);
1202
1203     // initialize subnodes array
1204     std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
1205     aSubNodes[0] = std::move(xGivenNode);
1206
1207     // process all sub-/supscripts
1208     int  nIndex = 0;
1209     while (TokenInGroup(nActiveGroup))
1210     {
1211         SmTokenType  eType (m_aCurToken.eType);
1212
1213         switch (eType)
1214         {
1215             case TRSUB :    nIndex = static_cast<int>(RSUB);    break;
1216             case TRSUP :    nIndex = static_cast<int>(RSUP);    break;
1217             case TFROM :
1218             case TCSUB :    nIndex = static_cast<int>(CSUB);    break;
1219             case TTO :
1220             case TCSUP :    nIndex = static_cast<int>(CSUP);    break;
1221             case TLSUB :    nIndex = static_cast<int>(LSUB);    break;
1222             case TLSUP :    nIndex = static_cast<int>(LSUP);    break;
1223             default :
1224                 SAL_WARN( "starmath", "unknown case");
1225         }
1226         nIndex++;
1227         assert(1 <= nIndex  &&  nIndex <= SUBSUP_NUM_ENTRIES);
1228
1229         std::unique_ptr<SmNode> xENode;
1230         if (aSubNodes[nIndex]) // if already occupied at earlier iteration
1231         {
1232             // forget the earlier one, remember an error instead
1233             aSubNodes[nIndex].reset();
1234             xENode.reset(DoError(SmParseError::DoubleSubsupscript)); // this also skips current token.
1235         }
1236         else
1237         {
1238             // skip sub-/supscript token
1239             NextToken();
1240         }
1241
1242         // get sub-/supscript node
1243         // (even when we saw a double-sub/supscript error in the above
1244         // in order to minimize mess and continue parsing.)
1245         std::unique_ptr<SmNode> xSNode;
1246         if (eType == TFROM  ||  eType == TTO)
1247         {
1248             // parse limits in old 4.0 and 5.0 style
1249             xSNode = DoRelation();
1250         }
1251         else
1252             xSNode.reset(DoTerm(true));
1253
1254         aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
1255     }
1256
1257     pNode->SetSubNodes(buildNodeArray(aSubNodes));
1258     return std::move(pNode); // this explicit move can be omitted since C++14
1259 }
1260
1261 std::unique_ptr<SmNode> SmParser::DoOpSubSup()
1262 {
1263     DepthProtect aDepthGuard(m_nParseDepth);
1264     if (aDepthGuard.TooDeep())
1265         throw std::range_error("parser depth limit");
1266
1267     // get operator symbol
1268     auto pNode = o3tl::make_unique<SmMathSymbolNode>(m_aCurToken);
1269     // skip operator token
1270     NextToken();
1271     // get sub- supscripts if any
1272     if (m_aCurToken.nGroup == TG::Power)
1273         return DoSubSup(TG::Power, pNode.release());
1274     return std::move(pNode); // this explicit move can be omitted since C++14
1275 }
1276
1277 std::unique_ptr<SmNode> SmParser::DoPower()
1278 {
1279     DepthProtect aDepthGuard(m_nParseDepth);
1280     if (aDepthGuard.TooDeep())
1281         throw std::range_error("parser depth limit");
1282
1283     // get body for sub- supscripts on top of stack
1284     std::unique_ptr<SmNode> xNode(DoTerm(false));
1285
1286     if (m_aCurToken.nGroup == TG::Power)
1287         return DoSubSup(TG::Power, xNode.release());
1288     return xNode;
1289 }
1290
1291 SmBlankNode *SmParser::DoBlank()
1292 {
1293     DepthProtect aDepthGuard(m_nParseDepth);
1294     if (aDepthGuard.TooDeep())
1295         throw std::range_error("parser depth limit");
1296
1297     assert(TokenInGroup(TG::Blank));
1298     std::unique_ptr<SmBlankNode> pBlankNode(new SmBlankNode(m_aCurToken));
1299
1300     do
1301     {
1302         pBlankNode->IncreaseBy(m_aCurToken);
1303         NextToken();
1304     }
1305     while (TokenInGroup(TG::Blank));
1306
1307     // Ignore trailing spaces, if corresponding option is set
1308     if ( m_aCurToken.eType == TNEWLINE ||
1309              (m_aCurToken.eType == TEND && !utl::ConfigManager::IsFuzzing() && SM_MOD()->GetConfig()->IsIgnoreSpacesRight()) )
1310     {
1311         pBlankNode->Clear();
1312     }
1313     return pBlankNode.release();
1314 }
1315
1316 SmNode *SmParser::DoTerm(bool bGroupNumberIdent)
1317 {
1318     DepthProtect aDepthGuard(m_nParseDepth);
1319     if (aDepthGuard.TooDeep())
1320         throw std::range_error("parser depth limit");
1321
1322     switch (m_aCurToken.eType)
1323     {
1324         case TESCAPE :
1325             return DoEscape();
1326
1327         case TNOSPACE :
1328         case TLGROUP :
1329         {
1330             bool bNoSpace = m_aCurToken.eType == TNOSPACE;
1331             if (bNoSpace)
1332                 NextToken();
1333             if (m_aCurToken.eType != TLGROUP)
1334                 return DoTerm(false); // nospace is no longer concerned
1335
1336             NextToken();
1337
1338             // allow for empty group
1339             if (m_aCurToken.eType == TRGROUP)
1340             {
1341                 std::unique_ptr<SmStructureNode> xSNode(new SmExpressionNode(m_aCurToken));
1342                 xSNode->SetSubNodes(nullptr, nullptr);
1343
1344                 NextToken();
1345                 return xSNode.release();
1346             }
1347
1348             auto pNode = DoAlign(!bNoSpace);
1349             if (m_aCurToken.eType == TRGROUP) {
1350                 NextToken();
1351                 return pNode.release();
1352             }
1353             auto xSNode = o3tl::make_unique<SmExpressionNode>(m_aCurToken);
1354             std::unique_ptr<SmNode> xError(DoError(SmParseError::RgroupExpected));
1355             xSNode->SetSubNodes(pNode.release(), xError.release());
1356             return xSNode.release();
1357         }
1358
1359         case TLEFT :
1360             return DoBrace();
1361
1362         case TBLANK :
1363         case TSBLANK :
1364             return DoBlank();
1365
1366         case TTEXT :
1367             {
1368                 auto pNode = o3tl::make_unique<SmTextNode>(m_aCurToken, FNT_TEXT);
1369                 NextToken();
1370                 return pNode.release();
1371             }
1372         case TCHARACTER :
1373             {
1374                 auto pNode = o3tl::make_unique<SmTextNode>(m_aCurToken, FNT_VARIABLE);
1375                 NextToken();
1376                 return pNode.release();
1377             }
1378         case TIDENT :
1379         case TNUMBER :
1380         {
1381             auto pTextNode = o3tl::make_unique<SmTextNode>(m_aCurToken,
1382                                              m_aCurToken.eType == TNUMBER ?
1383                                              FNT_NUMBER :
1384                                              FNT_VARIABLE);
1385             if (!bGroupNumberIdent)
1386             {
1387                 NextToken();
1388                 return pTextNode.release();
1389             }
1390             std::vector<std::unique_ptr<SmNode>> aNodes;
1391             // Some people want to be able to write "x_2n" for "x_{2n}"
1392             // although e.g. LaTeX or AsciiMath interpret that as "x_2 n".
1393             // The tokenizer skips whitespaces so we need some additional
1394             // work to distinguish from "x_2 n".
1395             // See https://bz.apache.org/ooo/show_bug.cgi?id=11752 and
1396             // https://bugs.libreoffice.org/show_bug.cgi?id=55853
1397             sal_Int32 nBufLen = m_aBufferString.getLength();
1398
1399             // We need to be careful to call NextToken() only after having
1400             // tested for a whitespace separator (otherwise it will be
1401             // skipped!)
1402             bool moveToNextToken = true;
1403             while (m_nBufferIndex < nBufLen &&
1404                    m_pSysCC->getType(m_aBufferString, m_nBufferIndex) !=
1405                    UnicodeType::SPACE_SEPARATOR)
1406             {
1407                 NextToken();
1408                 if (m_aCurToken.eType != TNUMBER &&
1409                     m_aCurToken.eType != TIDENT)
1410                 {
1411                     // Neither a number nor an identifier. We just moved to
1412                     // the next token, so no need to do that again.
1413                     moveToNextToken = false;
1414                     break;
1415                 }
1416                 aNodes.emplace_back(std::unique_ptr<SmNode>(new SmTextNode(m_aCurToken,
1417                                                 m_aCurToken.eType ==
1418                                                 TNUMBER ?
1419                                                 FNT_NUMBER :
1420                                                 FNT_VARIABLE)));
1421             }
1422             if (moveToNextToken)
1423                 NextToken();
1424             if (aNodes.empty())
1425                 return pTextNode.release();
1426             // We have several concatenated identifiers and numbers.
1427             // Let's group them into one SmExpressionNode.
1428             aNodes.insert(aNodes.begin(), std::move(pTextNode));
1429             std::unique_ptr<SmExpressionNode> xNode(new SmExpressionNode(SmToken()));
1430             xNode->SetSubNodes(buildNodeArray(aNodes));
1431             return xNode.release();
1432         }
1433         case TLEFTARROW :
1434         case TRIGHTARROW :
1435         case TUPARROW :
1436         case TDOWNARROW :
1437         case TCIRC :
1438         case TDRARROW :
1439         case TDLARROW :
1440         case TDLRARROW :
1441         case TEXISTS :
1442         case TNOTEXISTS :
1443         case TFORALL :
1444         case TPARTIAL :
1445         case TNABLA :
1446         case TTOWARD :
1447         case TDOTSAXIS :
1448         case TDOTSDIAG :
1449         case TDOTSDOWN :
1450         case TDOTSLOW :
1451         case TDOTSUP :
1452         case TDOTSVERT :
1453             {
1454                 auto pNode = o3tl::make_unique<SmMathSymbolNode>(m_aCurToken);
1455                 NextToken();
1456                 return pNode.release();
1457             }
1458
1459         case TSETN :
1460         case TSETZ :
1461         case TSETQ :
1462         case TSETR :
1463         case TSETC :
1464         case THBAR :
1465         case TLAMBDABAR :
1466         case TBACKEPSILON :
1467         case TALEPH :
1468         case TIM :
1469         case TRE :
1470         case TWP :
1471         case TEMPTYSET :
1472         case TINFINITY :
1473             {
1474                 auto pNode = o3tl::make_unique<SmMathIdentifierNode>(m_aCurToken);
1475                 NextToken();
1476                 return pNode.release();
1477             }
1478
1479         case TPLACE:
1480             {
1481                 auto pNode = o3tl::make_unique<SmPlaceNode>(m_aCurToken);
1482                 NextToken();
1483                 return pNode.release();
1484             }
1485
1486         case TSPECIAL:
1487             return DoSpecial();
1488
1489         case TBINOM:
1490             return DoBinom();
1491
1492         case TSTACK:
1493             return DoStack();
1494
1495         case TMATRIX:
1496             return DoMatrix();
1497
1498         default:
1499             if (TokenInGroup(TG::LBrace))
1500                 return DoBrace();
1501             if (TokenInGroup(TG::Oper))
1502                 return DoOperator();
1503             if (TokenInGroup(TG::UnOper))
1504                 return DoUnOper();
1505             if ( TokenInGroup(TG::Attribute) ||
1506                  TokenInGroup(TG::FontAttr) )
1507             {
1508                 std::stack<std::unique_ptr<SmStructureNode>> aStack;
1509                 bool    bIsAttr;
1510                 while ( (bIsAttr = TokenInGroup(TG::Attribute))
1511                        ||  TokenInGroup(TG::FontAttr))
1512                     aStack.push(bIsAttr ? DoAttribut() : DoFontAttribut());
1513
1514                 auto xFirstNode = DoPower();
1515                 while (!aStack.empty())
1516                 {
1517                     std::unique_ptr<SmStructureNode> xNode = std::move(aStack.top());
1518                     aStack.pop();
1519                     xNode->SetSubNodes(nullptr, xFirstNode.release());
1520                     xFirstNode = std::move(xNode);
1521                 }
1522                 return xFirstNode.release();
1523             }
1524             if (TokenInGroup(TG::Function))
1525                 return DoFunction();
1526             return DoError(SmParseError::UnexpectedChar);
1527     }
1528 }
1529
1530 SmNode *SmParser::DoEscape()
1531 {
1532     DepthProtect aDepthGuard(m_nParseDepth);
1533     if (aDepthGuard.TooDeep())
1534         throw std::range_error("parser depth limit");
1535
1536     NextToken();
1537
1538     switch (m_aCurToken.eType)
1539     {
1540         case TLPARENT :
1541         case TRPARENT :
1542         case TLBRACKET :
1543         case TRBRACKET :
1544         case TLDBRACKET :
1545         case TRDBRACKET :
1546         case TLBRACE :
1547         case TLGROUP :
1548         case TRBRACE :
1549         case TRGROUP :
1550         case TLANGLE :
1551         case TRANGLE :
1552         case TLCEIL :
1553         case TRCEIL :
1554         case TLFLOOR :
1555         case TRFLOOR :
1556         case TLLINE :
1557         case TRLINE :
1558         case TLDLINE :
1559         case TRDLINE :
1560             {
1561                 auto pNode = o3tl::make_unique<SmMathSymbolNode>(m_aCurToken);
1562                 NextToken();
1563                 return pNode.release();
1564             }
1565         default:
1566             return DoError(SmParseError::UnexpectedToken);
1567     }
1568 }
1569
1570 SmOperNode *SmParser::DoOperator()
1571 {
1572     DepthProtect aDepthGuard(m_nParseDepth);
1573     if (aDepthGuard.TooDeep())
1574         throw std::range_error("parser depth limit");
1575
1576     assert(TokenInGroup(TG::Oper));
1577
1578     auto xSNode = o3tl::make_unique<SmOperNode>(m_aCurToken);
1579
1580     // get operator
1581     auto xOperator = DoOper();
1582
1583     if (m_aCurToken.nGroup == TG::Limit || m_aCurToken.nGroup == TG::Power)
1584         xOperator = DoSubSup(m_aCurToken.nGroup, xOperator.release());
1585
1586     // get argument
1587     auto xArg = DoPower();
1588
1589     xSNode->SetSubNodes(xOperator.release(), xArg.release());
1590     return xSNode.release();
1591 }
1592
1593 std::unique_ptr<SmNode> SmParser::DoOper()
1594 {
1595     DepthProtect aDepthGuard(m_nParseDepth);
1596     if (aDepthGuard.TooDeep())
1597         throw std::range_error("parser depth limit");
1598
1599     SmTokenType  eType (m_aCurToken.eType);
1600     std::unique_ptr<SmNode> pNode;
1601
1602     switch (eType)
1603     {
1604         case TSUM :
1605         case TPROD :
1606         case TCOPROD :
1607         case TINT :
1608         case TINTD :
1609         case TIINT :
1610         case TIIINT :
1611         case TLINT :
1612         case TLLINT :
1613         case TLLLINT :
1614             pNode.reset(new SmMathSymbolNode(m_aCurToken));
1615             break;
1616
1617         case TLIM :
1618         case TLIMSUP :
1619         case TLIMINF :
1620             {
1621                 const sal_Char* pLim = nullptr;
1622                 switch (eType)
1623                 {
1624                     case TLIM :     pLim = "lim";       break;
1625                     case TLIMSUP :  pLim = "lim sup";   break;
1626                     case TLIMINF :  pLim = "lim inf";   break;
1627                     default:
1628                         break;
1629                 }
1630                 if( pLim )
1631                     m_aCurToken.aText = OUString::createFromAscii(pLim);
1632                 pNode.reset(new SmTextNode(m_aCurToken, FNT_TEXT));
1633             }
1634             break;
1635
1636         case TOPER :
1637             NextToken();
1638
1639             OSL_ENSURE(m_aCurToken.eType == TSPECIAL, "Sm: wrong token");
1640             pNode.reset(new SmGlyphSpecialNode(m_aCurToken));
1641             break;
1642
1643         default :
1644             assert(false && "unknown case");
1645     }
1646
1647     NextToken();
1648     return pNode;
1649 }
1650
1651 SmStructureNode *SmParser::DoUnOper()
1652 {
1653     DepthProtect aDepthGuard(m_nParseDepth);
1654     if (aDepthGuard.TooDeep())
1655         throw std::range_error("parser depth limit");
1656
1657     assert(TokenInGroup(TG::UnOper));
1658
1659     SmToken      aNodeToken = m_aCurToken;
1660     SmTokenType  eType      = m_aCurToken.eType;
1661     bool         bIsPostfix = eType == TFACT;
1662
1663     std::unique_ptr<SmStructureNode> xSNode;
1664     std::unique_ptr<SmNode> xOper;
1665     std::unique_ptr<SmNode> xExtra;
1666     std::unique_ptr<SmNode> xArg;
1667
1668     switch (eType)
1669     {
1670         case TABS :
1671         case TSQRT :
1672             NextToken();
1673             break;
1674
1675         case TNROOT :
1676             NextToken();
1677             xExtra = DoPower();
1678             break;
1679
1680         case TUOPER :
1681             NextToken();
1682             //Let the glyph know what it is...
1683             m_aCurToken.eType = TUOPER;
1684             m_aCurToken.nGroup = TG::UnOper;
1685             xOper.reset(DoGlyphSpecial());
1686             break;
1687
1688         case TPLUS :
1689         case TMINUS :
1690         case TPLUSMINUS :
1691         case TMINUSPLUS :
1692         case TNEG :
1693         case TFACT :
1694             xOper = DoOpSubSup();
1695             break;
1696
1697         default :
1698             assert(false);
1699     }
1700
1701     // get argument
1702     xArg = DoPower();
1703
1704     if (eType == TABS)
1705     {
1706         xSNode.reset(new SmBraceNode(aNodeToken));
1707         xSNode->SetScaleMode(SmScaleMode::Height);
1708
1709         // build nodes for left & right lines
1710         // (text, group, level of the used token are of no interest here)
1711         // we'll use row & column of the keyword for abs
1712         aNodeToken.eType = TABS;
1713
1714         aNodeToken.cMathChar = MS_VERTLINE;
1715         std::unique_ptr<SmNode> xLeft(new SmMathSymbolNode(aNodeToken));
1716
1717         aNodeToken.cMathChar = MS_VERTLINE;
1718         std::unique_ptr<SmNode> xRight(new SmMathSymbolNode(aNodeToken));
1719
1720         xSNode->SetSubNodes(xLeft.release(), xArg.release(), xRight.release());
1721     }
1722     else if (eType == TSQRT  ||  eType == TNROOT)
1723     {
1724         xSNode.reset(new SmRootNode(aNodeToken));
1725         xOper.reset(new SmRootSymbolNode(aNodeToken));
1726         xSNode->SetSubNodes(xExtra.release(), xOper.release(), xArg.release());
1727     }
1728     else
1729     {
1730         xSNode.reset(new SmUnHorNode(aNodeToken));
1731         if (bIsPostfix)
1732             xSNode->SetSubNodes(xArg.release(), xOper.release());
1733         else
1734         {
1735             // prefix operator
1736             xSNode->SetSubNodes(xOper.release(), xArg.release());
1737         }
1738     }
1739     return xSNode.release();
1740 }
1741
1742 std::unique_ptr<SmStructureNode> SmParser::DoAttribut()
1743 {
1744     DepthProtect aDepthGuard(m_nParseDepth);
1745     if (aDepthGuard.TooDeep())
1746         throw std::range_error("parser depth limit");
1747
1748     assert(TokenInGroup(TG::Attribute));
1749
1750     auto xSNode = o3tl::make_unique<SmAttributNode>(m_aCurToken);
1751     std::unique_ptr<SmNode> xAttr;
1752     SmScaleMode  eScaleMode = SmScaleMode::None;
1753
1754     // get appropriate node for the attribute itself
1755     switch (m_aCurToken.eType)
1756     {   case TUNDERLINE :
1757         case TOVERLINE :
1758         case TOVERSTRIKE :
1759             xAttr.reset(new SmRectangleNode(m_aCurToken));
1760             eScaleMode = SmScaleMode::Width;
1761             break;
1762
1763         case TWIDEVEC :
1764         case TWIDEHAT :
1765         case TWIDETILDE :
1766             xAttr.reset(new SmMathSymbolNode(m_aCurToken));
1767             eScaleMode = SmScaleMode::Width;
1768             break;
1769
1770         default :
1771             xAttr.reset(new SmMathSymbolNode(m_aCurToken));
1772     }
1773
1774     NextToken();
1775
1776     xSNode->SetSubNodes(xAttr.release(), nullptr); // the body will be filled later
1777     xSNode->SetScaleMode(eScaleMode);
1778     return std::move(xSNode); // this explicit move can be omitted since C++14
1779 }
1780
1781 std::unique_ptr<SmStructureNode> SmParser::DoFontAttribut()
1782 {
1783     DepthProtect aDepthGuard(m_nParseDepth);
1784     if (aDepthGuard.TooDeep())
1785         throw std::range_error("parser depth limit");
1786
1787     assert(TokenInGroup(TG::FontAttr));
1788
1789     switch (m_aCurToken.eType)
1790     {
1791         case TITALIC :
1792         case TNITALIC :
1793         case TBOLD :
1794         case TNBOLD :
1795         case TPHANTOM :
1796             {
1797                 auto pNode = o3tl::make_unique<SmFontNode>(m_aCurToken);
1798                 NextToken();
1799                 return std::move(pNode); // this explicit move can be omitted since C++14
1800             }
1801
1802         case TSIZE :
1803             return DoFontSize();
1804
1805         case TFONT :
1806             return DoFont();
1807
1808         case TCOLOR :
1809             return DoColor();
1810
1811         default :
1812             assert(false);
1813             return {};
1814     }
1815 }
1816
1817 std::unique_ptr<SmStructureNode> SmParser::DoColor()
1818 {
1819     DepthProtect aDepthGuard(m_nParseDepth);
1820     if (aDepthGuard.TooDeep())
1821         throw std::range_error("parser depth limit");
1822
1823     assert(m_aCurToken.eType == TCOLOR);
1824
1825     std::unique_ptr<SmStructureNode> xNode;
1826     // last color rules, get that one
1827     SmToken  aToken;
1828     do
1829     {   NextToken();
1830
1831         if (TokenInGroup(TG::Color))
1832         {   aToken = m_aCurToken;
1833             NextToken();
1834         }
1835         else
1836         {
1837             xNode.reset(DoError(SmParseError::ColorExpected));
1838             return xNode;
1839         }
1840     } while (m_aCurToken.eType == TCOLOR);
1841
1842     xNode.reset(new SmFontNode(aToken));
1843     return xNode;
1844 }
1845
1846 std::unique_ptr<SmStructureNode> SmParser::DoFont()
1847 {
1848     DepthProtect aDepthGuard(m_nParseDepth);
1849     if (aDepthGuard.TooDeep())
1850         throw std::range_error("parser depth limit");
1851
1852     assert(m_aCurToken.eType == TFONT);
1853
1854     std::unique_ptr<SmStructureNode> xNode;
1855     // last font rules, get that one
1856     SmToken  aToken;
1857     do
1858     {   NextToken();
1859
1860         if (TokenInGroup(TG::Font))
1861         {   aToken = m_aCurToken;
1862             NextToken();
1863         }
1864         else
1865         {
1866             xNode.reset(DoError(SmParseError::FontExpected));
1867             return xNode;
1868         }
1869     } while (m_aCurToken.eType == TFONT);
1870
1871     xNode.reset(new SmFontNode(aToken));
1872     return xNode;
1873 }
1874
1875
1876 // gets number used as arguments in Math formulas (e.g. 'size' command)
1877 // Format: no negative numbers, must start with a digit, no exponent notation, ...
1878 static bool lcl_IsNumber(const OUString& rText)
1879 {
1880     bool bPoint = false;
1881     const sal_Unicode* pBuffer = rText.getStr();
1882     for(sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
1883     {
1884         const sal_Unicode cChar = *pBuffer;
1885         if(cChar == '.')
1886         {
1887             if(bPoint)
1888                 return false;
1889             else
1890                 bPoint = true;
1891         }
1892         else if ( !rtl::isAsciiDigit( cChar ) )
1893             return false;
1894     }
1895     return true;
1896 }
1897
1898 std::unique_ptr<SmStructureNode> SmParser::DoFontSize()
1899 {
1900     DepthProtect aDepthGuard(m_nParseDepth);
1901     if (aDepthGuard.TooDeep())
1902         throw std::range_error("parser depth limit");
1903
1904     assert(m_aCurToken.eType == TSIZE);
1905
1906     FontSizeType   Type;
1907     std::unique_ptr<SmFontNode> pFontNode(new SmFontNode(m_aCurToken));
1908
1909     NextToken();
1910
1911     switch (m_aCurToken.eType)
1912     {
1913         case TNUMBER:   Type = FontSizeType::ABSOLUT;  break;
1914         case TPLUS:     Type = FontSizeType::PLUS;     break;
1915         case TMINUS:    Type = FontSizeType::MINUS;    break;
1916         case TMULTIPLY: Type = FontSizeType::MULTIPLY; break;
1917         case TDIVIDEBY: Type = FontSizeType::DIVIDE;   break;
1918
1919         default:
1920             return std::unique_ptr<SmStructureNode>(DoError(SmParseError::SizeExpected));
1921     }
1922
1923     if (Type != FontSizeType::ABSOLUT)
1924     {
1925         NextToken();
1926         if (m_aCurToken.eType != TNUMBER)
1927             return std::unique_ptr<SmStructureNode>(DoError(SmParseError::SizeExpected));
1928     }
1929
1930     // get number argument
1931     Fraction  aValue( 1 );
1932     if (lcl_IsNumber( m_aCurToken.aText ))
1933     {
1934         double fTmp = m_aCurToken.aText.toDouble();
1935         if (fTmp != 0.0)
1936         {
1937             aValue = fTmp;
1938
1939             //!! keep the numerator and denominator from being to large
1940             //!! otherwise ongoing multiplications may result in overflows
1941             //!! (for example in SmNode::SetFontSize the font size calculated
1942             //!! may become 0 because of this!!! Happens e.g. for ftmp = 2.9 with Linux
1943             //!! or ftmp = 1.11111111111111111... (11/9) on every platform.)
1944             if (aValue.GetDenominator() > 1000)
1945             {
1946                 long nNum   = aValue.GetNumerator();
1947                 long nDenom = aValue.GetDenominator();
1948                 while (nDenom > 1000)
1949                 {
1950                     nNum    /= 10;
1951                     nDenom  /= 10;
1952                 }
1953                 aValue = Fraction( nNum, nDenom );
1954             }
1955         }
1956     }
1957
1958     NextToken();
1959
1960     pFontNode->SetSizeParameter(aValue, Type);
1961     return std::move(pFontNode); // this explicit move can be omitted since C++14
1962 }
1963
1964 SmStructureNode *SmParser::DoBrace()
1965 {
1966     DepthProtect aDepthGuard(m_nParseDepth);
1967     if (aDepthGuard.TooDeep())
1968         throw std::range_error("parser depth limit");
1969
1970     assert(m_aCurToken.eType == TLEFT  ||  TokenInGroup(TG::LBrace));
1971
1972     std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
1973     std::unique_ptr<SmNode> pBody, pLeft, pRight;
1974     SmScaleMode   eScaleMode = SmScaleMode::None;
1975     SmParseError  eError     = SmParseError::None;
1976
1977     if (m_aCurToken.eType == TLEFT)
1978     {   NextToken();
1979
1980         eScaleMode = SmScaleMode::Height;
1981
1982         // check for left bracket
1983         if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
1984         {
1985             pLeft.reset(new SmMathSymbolNode(m_aCurToken));
1986
1987             NextToken();
1988             pBody.reset(DoBracebody(true));
1989
1990             if (m_aCurToken.eType == TRIGHT)
1991             {   NextToken();
1992
1993                 // check for right bracket
1994                 if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
1995                 {
1996                     pRight.reset(new SmMathSymbolNode(m_aCurToken));
1997                     NextToken();
1998                 }
1999                 else
2000                     eError = SmParseError::RbraceExpected;
2001             }
2002             else
2003                 eError = SmParseError::RightExpected;
2004         }
2005         else
2006             eError = SmParseError::LbraceExpected;
2007     }
2008     else
2009     {
2010         assert(TokenInGroup(TG::LBrace));
2011
2012         pLeft.reset(new SmMathSymbolNode(m_aCurToken));
2013
2014         NextToken();
2015         pBody.reset(DoBracebody(false));
2016
2017         SmTokenType  eExpectedType = TUNKNOWN;
2018         switch (pLeft->GetToken().eType)
2019         {   case TLPARENT :     eExpectedType = TRPARENT;   break;
2020             case TLBRACKET :    eExpectedType = TRBRACKET;  break;
2021             case TLBRACE :      eExpectedType = TRBRACE;    break;
2022             case TLDBRACKET :   eExpectedType = TRDBRACKET; break;
2023             case TLLINE :       eExpectedType = TRLINE;     break;
2024             case TLDLINE :      eExpectedType = TRDLINE;    break;
2025             case TLANGLE :      eExpectedType = TRANGLE;    break;
2026             case TLFLOOR :      eExpectedType = TRFLOOR;    break;
2027             case TLCEIL :       eExpectedType = TRCEIL;     break;
2028             default :
2029                 SAL_WARN("starmath", "unknown case");
2030             }
2031
2032         if (m_aCurToken.eType == eExpectedType)
2033         {
2034             pRight.reset(new SmMathSymbolNode(m_aCurToken));
2035             NextToken();
2036         }
2037         else
2038             eError = SmParseError::ParentMismatch;
2039     }
2040
2041     if (eError == SmParseError::None)
2042     {
2043         assert(pLeft);
2044         assert(pRight);
2045         xSNode->SetSubNodes(pLeft.release(), pBody.release(), pRight.release());
2046         xSNode->SetScaleMode(eScaleMode);
2047         return xSNode.release();
2048     }
2049     return DoError(eError);
2050 }
2051
2052 SmBracebodyNode *SmParser::DoBracebody(bool bIsLeftRight)
2053 {
2054     DepthProtect aDepthGuard(m_nParseDepth);
2055     if (aDepthGuard.TooDeep())
2056         throw std::range_error("parser depth limit");
2057
2058     auto pBody = o3tl::make_unique<SmBracebodyNode>(m_aCurToken);
2059
2060     std::vector<std::unique_ptr<SmNode>> aNodes;
2061     // get body if any
2062     if (bIsLeftRight)
2063     {
2064         do
2065         {
2066             if (m_aCurToken.eType == TMLINE)
2067             {
2068                 aNodes.emplace_back(o3tl::make_unique<SmMathSymbolNode>(m_aCurToken));
2069                 NextToken();
2070             }
2071             else if (m_aCurToken.eType != TRIGHT)
2072             {
2073                 aNodes.push_back(DoAlign());
2074                 if (m_aCurToken.eType != TMLINE  &&  m_aCurToken.eType != TRIGHT)
2075                     aNodes.emplace_back(std::unique_ptr<SmNode>(DoError(SmParseError::RightExpected)));
2076             }
2077         } while (m_aCurToken.eType != TEND  &&  m_aCurToken.eType != TRIGHT);
2078     }
2079     else
2080     {
2081         do
2082         {
2083             if (m_aCurToken.eType == TMLINE)
2084             {
2085                 aNodes.emplace_back(o3tl::make_unique<SmMathSymbolNode>(m_aCurToken));
2086                 NextToken();
2087             }
2088             else if (!TokenInGroup(TG::RBrace))
2089             {
2090                 aNodes.push_back(DoAlign());
2091                 if (m_aCurToken.eType != TMLINE  &&  !TokenInGroup(TG::RBrace))
2092                     aNodes.emplace_back(std::unique_ptr<SmNode>(DoError(SmParseError::RbraceExpected)));
2093             }
2094         } while (m_aCurToken.eType != TEND  &&  !TokenInGroup(TG::RBrace));
2095     }
2096
2097     pBody->SetSubNodes(buildNodeArray(aNodes));
2098     pBody->SetScaleMode(bIsLeftRight ? SmScaleMode::Height : SmScaleMode::None);
2099     return pBody.release();
2100 }
2101
2102 SmTextNode *SmParser::DoFunction()
2103 {
2104     DepthProtect aDepthGuard(m_nParseDepth);
2105     if (aDepthGuard.TooDeep())
2106         throw std::range_error("parser depth limit");
2107
2108     switch (m_aCurToken.eType)
2109     {
2110         case TFUNC:
2111             NextToken();    // skip "FUNC"-statement
2112             SAL_FALLTHROUGH;
2113
2114         case TSIN :
2115         case TCOS :
2116         case TTAN :
2117         case TCOT :
2118         case TASIN :
2119         case TACOS :
2120         case TATAN :
2121         case TACOT :
2122         case TSINH :
2123         case TCOSH :
2124         case TTANH :
2125         case TCOTH :
2126         case TASINH :
2127         case TACOSH :
2128         case TATANH :
2129         case TACOTH :
2130         case TLN :
2131         case TLOG :
2132         case TEXP :
2133             {
2134                 auto pNode = o3tl::make_unique<SmTextNode>(m_aCurToken, FNT_FUNCTION);
2135                 NextToken();
2136                 return pNode.release();
2137             }
2138
2139         default:
2140             assert(false);
2141             return nullptr;
2142     }
2143 }
2144
2145 SmTableNode *SmParser::DoBinom()
2146 {
2147     DepthProtect aDepthGuard(m_nParseDepth);
2148     if (aDepthGuard.TooDeep())
2149         throw std::range_error("parser depth limit");
2150
2151     auto xSNode = o3tl::make_unique<SmTableNode>(m_aCurToken);
2152
2153     NextToken();
2154
2155     auto xFirst = DoSum();
2156     auto xSecond = DoSum();
2157     xSNode->SetSubNodes(xFirst.release(), xSecond.release());
2158     return xSNode.release();
2159 }
2160
2161 SmStructureNode *SmParser::DoStack()
2162 {
2163     DepthProtect aDepthGuard(m_nParseDepth);
2164     if (aDepthGuard.TooDeep())
2165         throw std::range_error("parser depth limit");
2166
2167     std::unique_ptr<SmStructureNode> xSNode(new SmTableNode(m_aCurToken));
2168     NextToken();
2169     if (m_aCurToken.eType != TLGROUP)
2170         return DoError(SmParseError::LgroupExpected);
2171     std::vector<std::unique_ptr<SmNode>> aExprArr;
2172     do
2173     {
2174         NextToken();
2175         aExprArr.push_back(DoAlign());
2176     }
2177     while (m_aCurToken.eType == TPOUND);
2178
2179     if (m_aCurToken.eType == TRGROUP)
2180         NextToken();
2181     else
2182         aExprArr.emplace_back(std::unique_ptr<SmNode>(DoError(SmParseError::RgroupExpected)));
2183
2184     xSNode->SetSubNodes(buildNodeArray(aExprArr));
2185     return xSNode.release();
2186 }
2187
2188 SmStructureNode *SmParser::DoMatrix()
2189 {
2190     DepthProtect aDepthGuard(m_nParseDepth);
2191     if (aDepthGuard.TooDeep())
2192         throw std::range_error("parser depth limit");
2193
2194     std::unique_ptr<SmMatrixNode> xMNode(new SmMatrixNode(m_aCurToken));
2195     NextToken();
2196     if (m_aCurToken.eType != TLGROUP)
2197         return DoError(SmParseError::LgroupExpected);
2198
2199     std::vector<std::unique_ptr<SmNode>> aExprArr;
2200     do
2201     {
2202         NextToken();
2203         aExprArr.push_back(DoAlign());
2204     }
2205     while (m_aCurToken.eType == TPOUND);
2206
2207     size_t nCol = aExprArr.size();
2208     size_t nRow = 1;
2209     while (m_aCurToken.eType == TDPOUND)
2210     {
2211         NextToken();
2212         for (size_t i = 0; i < nCol; i++)
2213         {
2214             auto xNode = DoAlign();
2215             if (i < (nCol - 1))
2216             {
2217                 if (m_aCurToken.eType == TPOUND)
2218                     NextToken();
2219                 else
2220                     xNode.reset(DoError(SmParseError::PoundExpected));
2221             }
2222             aExprArr.emplace_back(std::move(xNode));
2223         }
2224         ++nRow;
2225     }
2226
2227     if (m_aCurToken.eType == TRGROUP)
2228         NextToken();
2229     else
2230     {
2231         std::unique_ptr<SmNode> xENode(DoError(SmParseError::RgroupExpected));
2232         if (aExprArr.empty())
2233             nRow = nCol = 1;
2234         else
2235             aExprArr.pop_back();
2236         aExprArr.emplace_back(std::move(xENode));
2237     }
2238
2239     xMNode->SetSubNodes(buildNodeArray(aExprArr));
2240     xMNode->SetRowCol(static_cast<sal_uInt16>(nRow),
2241                       static_cast<sal_uInt16>(nCol));
2242     return xMNode.release();
2243 }
2244
2245 SmSpecialNode *SmParser::DoSpecial()
2246 {
2247     DepthProtect aDepthGuard(m_nParseDepth);
2248     if (aDepthGuard.TooDeep())
2249         throw std::range_error("parser depth limit");
2250
2251     bool bReplace = false;
2252     OUString &rName = m_aCurToken.aText;
2253     OUString aNewName;
2254
2255     // conversion of symbol names for 6.0 (XML) file format
2256     // (name change on import / export.
2257     // UI uses localized names XML file format does not.)
2258     if( rName.startsWith("%") )
2259     {
2260         if (IsImportSymbolNames())
2261         {
2262             aNewName = SmLocalizedSymbolData::GetUiSymbolName(rName.copy(1));
2263             bReplace = true;
2264         }
2265         else if (IsExportSymbolNames())
2266         {
2267             aNewName = SmLocalizedSymbolData::GetExportSymbolName(rName.copy(1));
2268             bReplace = true;
2269         }
2270     }
2271     if (!aNewName.isEmpty())
2272         aNewName = "%" + aNewName;
2273
2274
2275     if (bReplace && !aNewName.isEmpty() && rName != aNewName)
2276     {
2277         Replace(GetTokenIndex(), rName.getLength(), aNewName);
2278         rName = aNewName;
2279     }
2280
2281     // add symbol name to list of used symbols
2282     const OUString aSymbolName(m_aCurToken.aText.copy(1));
2283     if (!aSymbolName.isEmpty())
2284         m_aUsedSymbols.insert( aSymbolName );
2285
2286     auto pNode = o3tl::make_unique<SmSpecialNode>(m_aCurToken);
2287     NextToken();
2288     return pNode.release();
2289 }
2290
2291 SmGlyphSpecialNode *SmParser::DoGlyphSpecial()
2292 {
2293     DepthProtect aDepthGuard(m_nParseDepth);
2294     if (aDepthGuard.TooDeep())
2295         throw std::range_error("parser depth limit");
2296
2297     auto pNode = o3tl::make_unique<SmGlyphSpecialNode>(m_aCurToken);
2298     NextToken();
2299     return pNode.release();
2300 }
2301
2302 SmExpressionNode *SmParser::DoError(SmParseError eError)
2303 {
2304     DepthProtect aDepthGuard(m_nParseDepth);
2305     if (aDepthGuard.TooDeep())
2306         throw std::range_error("parser depth limit");
2307
2308     auto xSNode = o3tl::make_unique<SmExpressionNode>(m_aCurToken);
2309     SmErrorNode     *pErr   = new SmErrorNode(m_aCurToken);
2310     xSNode->SetSubNodes(pErr, nullptr);
2311
2312     AddError(eError, xSNode.get());
2313
2314     NextToken();
2315
2316     return xSNode.release();
2317 }
2318
2319 // end grammar
2320
2321
2322 SmParser::SmParser()
2323     : m_nCurError( 0 )
2324     , m_nBufferIndex( 0 )
2325     , m_nTokenIndex( 0 )
2326     , m_nRow( 0 )
2327     , m_nColOff( 0 )
2328     , m_bImportSymNames( false )
2329     , m_bExportSymNames( false )
2330     , m_nParseDepth(0)
2331     , m_aNumCC( LanguageTag( LANGUAGE_ENGLISH_US ) )
2332     , m_pSysCC( SM_MOD()->GetSysLocale().GetCharClassPtr() )
2333 {
2334 }
2335
2336 std::unique_ptr<SmTableNode> SmParser::Parse(const OUString &rBuffer)
2337 {
2338     m_aUsedSymbols.clear();
2339
2340     m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2341     m_nBufferIndex  = 0;
2342     m_nTokenIndex   = 0;
2343     m_nRow          = 1;
2344     m_nColOff       = 0;
2345     m_nCurError     = -1;
2346
2347     m_aErrDescList.clear();
2348
2349     NextToken();
2350     return DoTable();
2351 }
2352
2353 std::unique_ptr<SmNode> SmParser::ParseExpression(const OUString &rBuffer)
2354 {
2355     m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2356     m_nBufferIndex  = 0;
2357     m_nTokenIndex   = 0;
2358     m_nRow          = 1;
2359     m_nColOff       = 0;
2360     m_nCurError     = -1;
2361
2362     m_aErrDescList.clear();
2363
2364     NextToken();
2365     return DoExpression();
2366 }
2367
2368
2369 void SmParser::AddError(SmParseError Type, SmNode *pNode)
2370 {
2371     std::unique_ptr<SmErrorDesc> pErrDesc(new SmErrorDesc);
2372
2373     pErrDesc->m_eType = Type;
2374     pErrDesc->m_pNode = pNode;
2375     pErrDesc->m_aText = SmResId(RID_ERR_IDENT);
2376
2377     const char* pRID;
2378     switch (Type)
2379     {
2380         case SmParseError::UnexpectedChar:     pRID = RID_ERR_UNEXPECTEDCHARACTER; break;
2381         case SmParseError::UnexpectedToken:    pRID = RID_ERR_UNEXPECTEDTOKEN;     break;
2382         case SmParseError::PoundExpected:      pRID = RID_ERR_POUNDEXPECTED;       break;
2383         case SmParseError::ColorExpected:      pRID = RID_ERR_COLOREXPECTED;       break;
2384         case SmParseError::LgroupExpected:     pRID = RID_ERR_LGROUPEXPECTED;      break;
2385         case SmParseError::RgroupExpected:     pRID = RID_ERR_RGROUPEXPECTED;      break;
2386         case SmParseError::LbraceExpected:     pRID = RID_ERR_LBRACEEXPECTED;      break;
2387         case SmParseError::RbraceExpected:     pRID = RID_ERR_RBRACEEXPECTED;      break;
2388         case SmParseError::ParentMismatch:     pRID = RID_ERR_PARENTMISMATCH;      break;
2389         case SmParseError::RightExpected:      pRID = RID_ERR_RIGHTEXPECTED;       break;
2390         case SmParseError::FontExpected:       pRID = RID_ERR_FONTEXPECTED;        break;
2391         case SmParseError::SizeExpected:       pRID = RID_ERR_SIZEEXPECTED;        break;
2392         case SmParseError::DoubleAlign:        pRID = RID_ERR_DOUBLEALIGN;         break;
2393         case SmParseError::DoubleSubsupscript: pRID = RID_ERR_DOUBLESUBSUPSCRIPT;  break;
2394         default:
2395             assert(false);
2396             return;
2397     }
2398     pErrDesc->m_aText += SmResId(pRID);
2399
2400     m_aErrDescList.push_back(std::move(pErrDesc));
2401 }
2402
2403
2404 const SmErrorDesc *SmParser::NextError()
2405 {
2406     if ( !m_aErrDescList.empty() )
2407         if (m_nCurError > 0) return m_aErrDescList[ --m_nCurError ].get();
2408         else
2409         {
2410             m_nCurError = 0;
2411             return m_aErrDescList[ m_nCurError ].get();
2412         }
2413     else return nullptr;
2414 }
2415
2416
2417 const SmErrorDesc *SmParser::PrevError()
2418 {
2419     if ( !m_aErrDescList.empty() )
2420         if (m_nCurError < static_cast<int>(m_aErrDescList.size() - 1)) return m_aErrDescList[ ++m_nCurError ].get();
2421         else
2422         {
2423             m_nCurError = static_cast<int>(m_aErrDescList.size() - 1);
2424             return m_aErrDescList[ m_nCurError ].get();
2425         }
2426     else return nullptr;
2427 }
2428
2429
2430 const SmErrorDesc *SmParser::GetError()
2431 {
2432     if ( !m_aErrDescList.empty() )
2433         return m_aErrDescList.front().get();
2434     return nullptr;
2435 }
2436
2437 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */