1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <com/sun/star/i18n/UnicodeType.hpp>
22 #include <com/sun/star/i18n/KParseTokens.hpp>
23 #include <com/sun/star/i18n/KParseType.hpp>
24 #include <i18nlangtag/lang.h>
25 #include <tools/lineend.hxx>
26 #include <unotools/configmgr.hxx>
27 #include <unotools/syslocale.hxx>
28 #include <sal/log.hxx>
29 #include <osl/diagnose.h>
30 #include <rtl/character.hxx>
33 #include <strings.hrc>
35 #include "cfgitem.hxx"
38 #include <starmathdatabase.hxx>
40 using namespace ::com::sun::star::i18n
;
42 //Definition of math keywords
43 const SmTokenTableEntry aTokenTable
[] =
45 { "abs", TABS
, '\0', TG::UnOper
, 13 },
46 { "acute", TACUTE
, MS_ACUTE
, TG::Attribute
, 5 },
47 { "aleph" , TALEPH
, MS_ALEPH
, TG::Standalone
, 5 },
48 { "alignb", TALIGNC
, '\0', TG::Align
, 0},
49 { "alignc", TALIGNC
, '\0', TG::Align
, 0},
50 { "alignl", TALIGNL
, '\0', TG::Align
, 0},
51 { "alignm", TALIGNC
, '\0', TG::Align
, 0},
52 { "alignr", TALIGNR
, '\0', TG::Align
, 0},
53 { "alignt", TALIGNC
, '\0', TG::Align
, 0},
54 { "and", TAND
, MS_AND
, TG::Product
, 0},
55 { "approx", TAPPROX
, MS_APPROX
, TG::Relation
, 0},
56 { "arccos", TACOS
, '\0', TG::Function
, 5},
57 { "arccot", TACOT
, '\0', TG::Function
, 5},
58 { "arcosh", TACOSH
, '\0', TG::Function
, 5 },
59 { "arcoth", TACOTH
, '\0', TG::Function
, 5 },
60 { "arcsin", TASIN
, '\0', TG::Function
, 5},
61 { "arctan", TATAN
, '\0', TG::Function
, 5},
62 { "arsinh", TASINH
, '\0', TG::Function
, 5},
63 { "artanh", TATANH
, '\0', TG::Function
, 5},
64 { "backepsilon" , TBACKEPSILON
, MS_BACKEPSILON
, TG::Standalone
, 5},
65 { "bar", TBAR
, MS_BAR
, TG::Attribute
, 5},
66 { "binom", TBINOM
, '\0', TG::NONE
, 5 },
67 { "bold", TBOLD
, '\0', TG::FontAttr
, 5},
68 { "boper", TBOPER
, '\0', TG::Product
, 0},
69 { "breve", TBREVE
, MS_BREVE
, TG::Attribute
, 5},
70 { "bslash", TBACKSLASH
, MS_BACKSLASH
, TG::Product
, 0 },
71 { "cdot", TCDOT
, MS_CDOT
, TG::Product
, 0},
72 { "check", TCHECK
, MS_CHECK
, TG::Attribute
, 5},
73 { "circ" , TCIRC
, MS_CIRC
, TG::Standalone
, 5},
74 { "circle", TCIRCLE
, MS_CIRCLE
, TG::Attribute
, 5},
75 { "color", TCOLOR
, '\0', TG::FontAttr
, 5},
76 { "coprod", TCOPROD
, MS_COPROD
, TG::Oper
, 5},
77 { "cos", TCOS
, '\0', TG::Function
, 5},
78 { "cosh", TCOSH
, '\0', TG::Function
, 5},
79 { "cot", TCOT
, '\0', TG::Function
, 5},
80 { "coth", TCOTH
, '\0', TG::Function
, 5},
81 { "csub", TCSUB
, '\0', TG::Power
, 0},
82 { "csup", TCSUP
, '\0', TG::Power
, 0},
83 { "dddot", TDDDOT
, MS_DDDOT
, TG::Attribute
, 5},
84 { "ddot", TDDOT
, MS_DDOT
, TG::Attribute
, 5},
85 { "def", TDEF
, MS_DEF
, TG::Relation
, 0},
86 { "div", TDIV
, MS_DIV
, TG::Product
, 0},
87 { "divides", TDIVIDES
, MS_LINE
, TG::Relation
, 0},
88 { "dlarrow" , TDLARROW
, MS_DLARROW
, TG::Standalone
, 5},
89 { "dlrarrow" , TDLRARROW
, MS_DLRARROW
, TG::Standalone
, 5},
90 { "dot", TDOT
, MS_DOT
, TG::Attribute
, 5},
91 { "dotsaxis", TDOTSAXIS
, MS_DOTSAXIS
, TG::Standalone
, 5}, // 5 to continue expression
92 { "dotsdiag", TDOTSDIAG
, MS_DOTSUP
, TG::Standalone
, 5},
93 { "dotsdown", TDOTSDOWN
, MS_DOTSDOWN
, TG::Standalone
, 5},
94 { "dotslow", TDOTSLOW
, MS_DOTSLOW
, TG::Standalone
, 5},
95 { "dotsup", TDOTSUP
, MS_DOTSUP
, TG::Standalone
, 5},
96 { "dotsvert", TDOTSVERT
, MS_DOTSVERT
, TG::Standalone
, 5},
97 { "downarrow" , TDOWNARROW
, MS_DOWNARROW
, TG::Standalone
, 5},
98 { "drarrow" , TDRARROW
, MS_DRARROW
, TG::Standalone
, 5},
99 { "emptyset" , TEMPTYSET
, MS_EMPTYSET
, TG::Standalone
, 5},
100 { "equiv", TEQUIV
, MS_EQUIV
, TG::Relation
, 0},
101 { "evaluate", TEVALUATE
, '\0', TG::NONE
, 0},
102 { "exists", TEXISTS
, MS_EXISTS
, TG::Standalone
, 5},
103 { "exp", TEXP
, '\0', TG::Function
, 5},
104 { "fact", TFACT
, MS_FACT
, TG::UnOper
, 5},
105 { "fixed", TFIXED
, '\0', TG::Font
, 0},
106 { "font", TFONT
, '\0', TG::FontAttr
, 5},
107 { "forall", TFORALL
, MS_FORALL
, TG::Standalone
, 5},
108 { "fourier", TFOURIER
, MS_FOURIER
, TG::Standalone
, 5},
109 { "frac", TFRAC
, '\0', TG::NONE
, 5},
110 { "from", TFROM
, '\0', TG::Limit
, 0},
111 { "func", TFUNC
, '\0', TG::Function
, 5},
112 { "ge", TGE
, MS_GE
, TG::Relation
, 0},
113 { "geslant", TGESLANT
, MS_GESLANT
, TG::Relation
, 0 },
114 { "gg", TGG
, MS_GG
, TG::Relation
, 0},
115 { "grave", TGRAVE
, MS_GRAVE
, TG::Attribute
, 5},
116 { "gt", TGT
, MS_GT
, TG::Relation
, 0},
117 { "harpoon", THARPOON
, MS_HARPOON
, TG::Attribute
, 5},
118 { "hat", THAT
, MS_HAT
, TG::Attribute
, 5},
119 { "hbar" , THBAR
, MS_HBAR
, TG::Standalone
, 5},
120 { "iiint", TIIINT
, MS_IIINT
, TG::Oper
, 5},
121 { "iint", TIINT
, MS_IINT
, TG::Oper
, 5},
122 { "im" , TIM
, MS_IM
, TG::Standalone
, 5 },
123 { "in", TIN
, MS_IN
, TG::Relation
, 0},
124 { "infinity" , TINFINITY
, MS_INFINITY
, TG::Standalone
, 5},
125 { "infty" , TINFINITY
, MS_INFINITY
, TG::Standalone
, 5},
126 { "int", TINT
, MS_INT
, TG::Oper
, 5},
127 { "intd", TINTD
, MS_INT
, TG::Oper
, 5},
128 { "intersection", TINTERSECT
, MS_INTERSECT
, TG::Product
, 0},
129 { "it", TIT
, '\0', TG::Product
, 0},
130 { "ital", TITALIC
, '\0', TG::FontAttr
, 5},
131 { "italic", TITALIC
, '\0', TG::FontAttr
, 5},
132 { "lambdabar" , TLAMBDABAR
, MS_LAMBDABAR
, TG::Standalone
, 5},
133 { "langle", TLANGLE
, MS_LMATHANGLE
, TG::LBrace
, 5},
134 { "laplace", TLAPLACE
, MS_LAPLACE
, TG::Standalone
, 5},
135 { "lbrace", TLBRACE
, MS_LBRACE
, TG::LBrace
, 5},
136 { "lceil", TLCEIL
, MS_LCEIL
, TG::LBrace
, 5},
137 { "ldbracket", TLDBRACKET
, MS_LDBRACKET
, TG::LBrace
, 5},
138 { "ldline", TLDLINE
, MS_DVERTLINE
, TG::LBrace
, 5},
139 { "le", TLE
, MS_LE
, TG::Relation
, 0},
140 { "left", TLEFT
, '\0', TG::NONE
, 5},
141 { "leftarrow" , TLEFTARROW
, MS_LEFTARROW
, TG::Standalone
, 5},
142 { "leslant", TLESLANT
, MS_LESLANT
, TG::Relation
, 0 },
143 { "lfloor", TLFLOOR
, MS_LFLOOR
, TG::LBrace
, 5},
144 { "lim", TLIM
, '\0', TG::Oper
, 5},
145 { "liminf", TLIMINF
, '\0', TG::Oper
, 5},
146 { "limsup", TLIMSUP
, '\0', TG::Oper
, 5},
147 { "lint", TLINT
, MS_LINT
, TG::Oper
, 5},
148 { "ll", TLL
, MS_LL
, TG::Relation
, 0},
149 { "lline", TLLINE
, MS_VERTLINE
, TG::LBrace
, 5},
150 { "llint", TLLINT
, MS_LLINT
, TG::Oper
, 5},
151 { "lllint", TLLLINT
, MS_LLLINT
, TG::Oper
, 5},
152 { "ln", TLN
, '\0', TG::Function
, 5},
153 { "log", TLOG
, '\0', TG::Function
, 5},
154 { "lrline", TLRLINE
, MS_VERTLINE
, TG::LBrace
| TG::RBrace
, 5},
155 { "lrdline", TLRDLINE
, MS_VERTLINE
, TG::LBrace
| TG::RBrace
, 5},
156 { "lsub", TLSUB
, '\0', TG::Power
, 0},
157 { "lsup", TLSUP
, '\0', TG::Power
, 0},
158 { "lt", TLT
, MS_LT
, TG::Relation
, 0},
159 { "matrix", TMATRIX
, '\0', TG::NONE
, 5},
160 { "minusplus", TMINUSPLUS
, MS_MINUSPLUS
, TG::UnOper
| TG::Sum
, 5},
161 { "mline", TMLINE
, MS_VERTLINE
, TG::NONE
, 0}, //! not in TG::RBrace, Level 0
162 { "nabla", TNABLA
, MS_NABLA
, TG::Standalone
, 5},
163 { "nbold", TNBOLD
, '\0', TG::FontAttr
, 5},
164 { "ndivides", TNDIVIDES
, MS_NDIVIDES
, TG::Relation
, 0},
165 { "neg", TNEG
, MS_NEG
, TG::UnOper
, 5 },
166 { "neq", TNEQ
, MS_NEQ
, TG::Relation
, 0},
167 { "newline", TNEWLINE
, '\0', TG::NONE
, 0},
168 { "ni", TNI
, MS_NI
, TG::Relation
, 0},
169 { "nitalic", TNITALIC
, '\0', TG::FontAttr
, 5},
170 { "none", TNONE
, '\0', TG::LBrace
| TG::RBrace
, 0},
171 { "nospace", TNOSPACE
, '\0', TG::Standalone
, 5},
172 { "notexists", TNOTEXISTS
, MS_NOTEXISTS
, TG::Standalone
, 5},
173 { "notin", TNOTIN
, MS_NOTIN
, TG::Relation
, 0},
174 { "nprec", TNOTPRECEDES
, MS_NOTPRECEDES
, TG::Relation
, 0 },
175 { "nroot", TNROOT
, MS_SQRT
, TG::UnOper
, 5},
176 { "nsubset", TNSUBSET
, MS_NSUBSET
, TG::Relation
, 0 },
177 { "nsubseteq", TNSUBSETEQ
, MS_NSUBSETEQ
, TG::Relation
, 0 },
178 { "nsucc", TNOTSUCCEEDS
, MS_NOTSUCCEEDS
, TG::Relation
, 0 },
179 { "nsupset", TNSUPSET
, MS_NSUPSET
, TG::Relation
, 0 },
180 { "nsupseteq", TNSUPSETEQ
, MS_NSUPSETEQ
, TG::Relation
, 0 },
181 { "odivide", TODIVIDE
, MS_ODIVIDE
, TG::Product
, 0},
182 { "odot", TODOT
, MS_ODOT
, TG::Product
, 0},
183 { "ominus", TOMINUS
, MS_OMINUS
, TG::Sum
, 0},
184 { "oper", TOPER
, '\0', TG::Oper
, 5},
185 { "oplus", TOPLUS
, MS_OPLUS
, TG::Sum
, 0},
186 { "or", TOR
, MS_OR
, TG::Sum
, 0},
187 { "ortho", TORTHO
, MS_ORTHO
, TG::Relation
, 0},
188 { "otimes", TOTIMES
, MS_OTIMES
, TG::Product
, 0},
189 { "over", TOVER
, '\0', TG::Product
, 0},
190 { "overbrace", TOVERBRACE
, MS_OVERBRACE
, TG::Product
, 5},
191 { "overline", TOVERLINE
, '\0', TG::Attribute
, 5},
192 { "overstrike", TOVERSTRIKE
, '\0', TG::Attribute
, 5},
193 { "owns", TNI
, MS_NI
, TG::Relation
, 0},
194 { "parallel", TPARALLEL
, MS_DLINE
, TG::Relation
, 0},
195 { "partial", TPARTIAL
, MS_PARTIAL
, TG::Standalone
, 5 },
196 { "phantom", TPHANTOM
, '\0', TG::FontAttr
, 5},
197 { "plusminus", TPLUSMINUS
, MS_PLUSMINUS
, TG::UnOper
| TG::Sum
, 5},
198 { "prec", TPRECEDES
, MS_PRECEDES
, TG::Relation
, 0 },
199 { "preccurlyeq", TPRECEDESEQUAL
, MS_PRECEDESEQUAL
, TG::Relation
, 0 },
200 { "precsim", TPRECEDESEQUIV
, MS_PRECEDESEQUIV
, TG::Relation
, 0 },
201 { "prod", TPROD
, MS_PROD
, TG::Oper
, 5},
202 { "prop", TPROP
, MS_PROP
, TG::Relation
, 0},
203 { "rangle", TRANGLE
, MS_RMATHANGLE
, TG::RBrace
, 0}, //! 0 to terminate expression
204 { "rbrace", TRBRACE
, MS_RBRACE
, TG::RBrace
, 0},
205 { "rceil", TRCEIL
, MS_RCEIL
, TG::RBrace
, 0},
206 { "rdbracket", TRDBRACKET
, MS_RDBRACKET
, TG::RBrace
, 0},
207 { "rdline", TRDLINE
, MS_DVERTLINE
, TG::RBrace
, 0},
208 { "re" , TRE
, MS_RE
, TG::Standalone
, 5 },
209 { "rfloor", TRFLOOR
, MS_RFLOOR
, TG::RBrace
, 0}, //! 0 to terminate expression
210 { "right", TRIGHT
, '\0', TG::NONE
, 0},
211 { "rightarrow" , TRIGHTARROW
, MS_RIGHTARROW
, TG::Standalone
, 5},
212 { "rline", TRLINE
, MS_VERTLINE
, TG::RBrace
, 0}, //! 0 to terminate expression
213 { "rsub", TRSUB
, '\0', TG::Power
, 0},
214 { "rsup", TRSUP
, '\0', TG::Power
, 0},
215 { "sans", TSANS
, '\0', TG::Font
, 0},
216 { "serif", TSERIF
, '\0', TG::Font
, 0},
217 { "setC" , TSETC
, MS_SETC
, TG::Standalone
, 5},
218 { "setminus", TSETMINUS
, MS_BACKSLASH
, TG::Product
, 0 },
219 { "setN" , TSETN
, MS_SETN
, TG::Standalone
, 5},
220 { "setQ" , TSETQ
, MS_SETQ
, TG::Standalone
, 5},
221 { "setquotient", TSETQUOTIENT
, MS_SLASH
, TG::Product
, 0 },
222 { "setR" , TSETR
, MS_SETR
, TG::Standalone
, 5},
223 { "setZ" , TSETZ
, MS_SETZ
, TG::Standalone
, 5},
224 { "sim", TSIM
, MS_SIM
, TG::Relation
, 0},
225 { "simeq", TSIMEQ
, MS_SIMEQ
, TG::Relation
, 0},
226 { "sin", TSIN
, '\0', TG::Function
, 5},
227 { "sinh", TSINH
, '\0', TG::Function
, 5},
228 { "size", TSIZE
, '\0', TG::FontAttr
, 5},
229 { "slash", TSLASH
, MS_SLASH
, TG::Product
, 0 },
230 { "sqrt", TSQRT
, MS_SQRT
, TG::UnOper
, 5},
231 { "stack", TSTACK
, '\0', TG::NONE
, 5},
232 { "sub", TRSUB
, '\0', TG::Power
, 0},
233 { "subset", TSUBSET
, MS_SUBSET
, TG::Relation
, 0},
234 { "subseteq", TSUBSETEQ
, MS_SUBSETEQ
, TG::Relation
, 0},
235 { "succ", TSUCCEEDS
, MS_SUCCEEDS
, TG::Relation
, 0 },
236 { "succcurlyeq", TSUCCEEDSEQUAL
, MS_SUCCEEDSEQUAL
, TG::Relation
, 0 },
237 { "succsim", TSUCCEEDSEQUIV
, MS_SUCCEEDSEQUIV
, TG::Relation
, 0 },
238 { "sum", TSUM
, MS_SUM
, TG::Oper
, 5},
239 { "sup", TRSUP
, '\0', TG::Power
, 0},
240 { "supset", TSUPSET
, MS_SUPSET
, TG::Relation
, 0},
241 { "supseteq", TSUPSETEQ
, MS_SUPSETEQ
, TG::Relation
, 0},
242 { "tan", TTAN
, '\0', TG::Function
, 5},
243 { "tanh", TTANH
, '\0', TG::Function
, 5},
244 { "tilde", TTILDE
, MS_TILDE
, TG::Attribute
, 5},
245 { "times", TTIMES
, MS_TIMES
, TG::Product
, 0},
246 { "to", TTO
, '\0', TG::Limit
, 0},
247 { "toward", TTOWARD
, MS_RIGHTARROW
, TG::Relation
, 0},
248 { "transl", TTRANSL
, MS_TRANSL
, TG::Relation
, 0},
249 { "transr", TTRANSR
, MS_TRANSR
, TG::Relation
, 0},
250 { "underbrace", TUNDERBRACE
, MS_UNDERBRACE
, TG::Product
, 5},
251 { "underline", TUNDERLINE
, '\0', TG::Attribute
, 5},
252 { "union", TUNION
, MS_UNION
, TG::Sum
, 0},
253 { "uoper", TUOPER
, '\0', TG::UnOper
, 5},
254 { "uparrow" , TUPARROW
, MS_UPARROW
, TG::Standalone
, 5},
255 { "vec", TVEC
, MS_VEC
, TG::Attribute
, 5},
256 { "widebslash", TWIDEBACKSLASH
, MS_BACKSLASH
, TG::Product
, 0 },
257 { "wideharpoon", TWIDEHARPOON
, MS_HARPOON
, TG::Attribute
, 5},
258 { "widehat", TWIDEHAT
, MS_HAT
, TG::Attribute
, 5},
259 { "wideslash", TWIDESLASH
, MS_SLASH
, TG::Product
, 0 },
260 { "widetilde", TWIDETILDE
, MS_TILDE
, TG::Attribute
, 5},
261 { "widevec", TWIDEVEC
, MS_VEC
, TG::Attribute
, 5},
262 { "wp" , TWP
, MS_WP
, TG::Standalone
, 5}
265 // First character may be any alphabetic
266 const sal_Int32 coStartFlags
= KParseTokens::ANY_LETTER
| KParseTokens::IGNORE_LEADING_WS
;
268 // Continuing characters may be any alphabetic
269 const sal_Int32 coContFlags
= (coStartFlags
& ~KParseTokens::IGNORE_LEADING_WS
)
270 | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING
;
271 // First character for numbers, may be any numeric or dot
272 const sal_Int32 coNumStartFlags
= KParseTokens::ASC_DIGIT
| KParseTokens::ASC_DOT
273 | KParseTokens::IGNORE_LEADING_WS
;
274 // Continuing characters for numbers, may be any numeric or dot or comma.
275 // tdf#127873: additionally accept ',' comma group separator as too many
276 // existing documents unwittingly may have used that as decimal separator
277 // in such locales (though it never was as this is always the en-US locale
278 // and the group separator is only parsed away).
279 const sal_Int32 coNumContFlags
= (coNumStartFlags
& ~KParseTokens::IGNORE_LEADING_WS
)
280 | KParseTokens::GROUP_SEPARATOR_IN_NUMBER
;
281 // First character for numbers hexadecimal
282 const sal_Int32 coNum16StartFlags
= KParseTokens::ASC_DIGIT
| KParseTokens::ASC_UPALPHA
283 | KParseTokens::IGNORE_LEADING_WS
;
285 // Continuing characters for numbers hexadecimal
286 const sal_Int32 coNum16ContFlags
= (coNum16StartFlags
& ~KParseTokens::IGNORE_LEADING_WS
);
287 // user-defined char continuing characters may be any alphanumeric or dot.
288 const sal_Int32 coUserDefinedCharContFlags
= KParseTokens::ANY_LETTER_OR_NUMBER
289 | KParseTokens::ASC_DOT
290 | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING
;
292 //Checks if keyword is in the list.
293 static inline bool findCompare(const SmTokenTableEntry
& lhs
, const OUString
& s
)
295 return s
.compareToIgnoreAsciiCaseAscii(lhs
.pIdent
) > 0;
298 //Returns the SmTokenTableEntry for a keyword
299 static const SmTokenTableEntry
* GetTokenTableEntry( const OUString
&rName
)
301 if (rName
.isEmpty())return nullptr; //avoid null pointer exceptions
302 //Looks for the first keyword after or equal to rName in alphabetical order.
303 auto findIter
= std::lower_bound( std::begin(aTokenTable
),
304 std::end(aTokenTable
), rName
, findCompare
);
305 if ( findIter
!= std::end(aTokenTable
) && rName
.equalsIgnoreAsciiCaseAscii( findIter
->pIdent
))
306 return &*findIter
; //check is equal
307 return nullptr; //not found
310 static bool IsDelimiter( const OUString
&rTxt
, sal_Int32 nPos
)
311 { // returns 'true' iff cChar is '\0' or a delimiter
313 assert(nPos
<= rTxt
.getLength()); //index out of range
314 if (nPos
== rTxt
.getLength())return true; //This is EOF
315 sal_Unicode cChar
= rTxt
[nPos
];
317 // check if 'cChar' is in the delimiter table
318 static const sal_Unicode aDelimiterTable
[] =
320 ' ', '{', '}', '(', ')', '\t', '\n', '\r', '+', '-',
321 '*', '/', '=', '[', ']', '^', '_', '#',
322 '%', '>', '<', '&', '|', '\\', '"', '~', '`'
323 };//reordered by usage (by eye) for nanoseconds saving.
326 for (auto const &cDelimiter
: aDelimiterTable
)
328 if (cDelimiter
== cChar
)return true;
331 //special chars support
332 sal_Int16 nTypJp
= SM_MOD()->GetSysLocale().GetCharClass().getType( rTxt
, nPos
);
333 return ( nTypJp
== css::i18n::UnicodeType::SPACE_SEPARATOR
||
334 nTypJp
== css::i18n::UnicodeType::CONTROL
);
337 // checks number used as arguments in Math formulas (e.g. 'size' command)
338 // Format: no negative numbers, must start with a digit, no exponent notation, ...
339 static bool lcl_IsNumber(const OUString
& rText
)
342 const sal_Unicode
* pBuffer
= rText
.getStr();
343 for(sal_Int32 nPos
= 0; nPos
< rText
.getLength(); nPos
++, pBuffer
++)
345 const sal_Unicode cChar
= *pBuffer
;
348 if(bPoint
) return false;
351 else if ( !rtl::isAsciiDigit( cChar
) ) return false;
355 // checks number used as arguments in Math formulas (e.g. 'size' command)
356 // Format: no negative numbers, must start with a digit, no exponent notation, ...
357 static bool lcl_IsNotWholeNumber(const OUString
& rText
)
359 const sal_Unicode
* pBuffer
= rText
.getStr();
360 for(sal_Int32 nPos
= 0; nPos
< rText
.getLength(); nPos
++, pBuffer
++)
361 if ( !rtl::isAsciiDigit( *pBuffer
) ) return true;
364 // checks hex number used as arguments in Math formulas (e.g. 'hex' command)
365 // Format: no negative numbers, must start with a digit, no exponent notation, ...
366 static bool lcl_IsNotWholeNumber16(const OUString
& rText
)
368 const sal_Unicode
* pBuffer
= rText
.getStr();
369 for(sal_Int32 nPos
= 0; nPos
< rText
.getLength(); nPos
++, pBuffer
++)
370 if ( !rtl::isAsciiCanonicHexDigit( *pBuffer
) ) return true;
374 //Text replace onto m_aBufferString
375 void SmParser::Replace( sal_Int32 nPos
, sal_Int32 nLen
, const OUString
&rText
)
377 assert( nPos
+ nLen
<= m_aBufferString
.getLength() ); //checks if length allows text replace
379 m_aBufferString
= m_aBufferString
.replaceAt( nPos
, nLen
, rText
); //replace and reindex
380 sal_Int32 nChg
= rText
.getLength() - nLen
;
381 m_nBufferIndex
= m_nBufferIndex
+ nChg
;
382 m_nTokenIndex
= m_nTokenIndex
+ nChg
;
385 void SmParser::NextToken() //Central part of the parser
388 sal_Int32 nBufLen
= m_aBufferString
.getLength();
390 sal_Int32 nRealStart
;
395 while (UnicodeType::SPACE_SEPARATOR
==
396 m_pSysCC
->getType( m_aBufferString
, m_nBufferIndex
))
399 // Try to parse a number in a locale-independent manner using
400 // '.' as decimal separator.
401 // See https://bz.apache.org/ooo/show_bug.cgi?id=45779
402 aRes
= m_aNumCC
.parsePredefinedToken(KParseType::ASC_NUMBER
,
403 m_aBufferString
, m_nBufferIndex
,
407 if (aRes
.TokenType
== 0)
409 // Try again with the default token parsing.
410 aRes
= m_pSysCC
->parseAnyToken(m_aBufferString
, m_nBufferIndex
,
415 nRealStart
= m_nBufferIndex
+ aRes
.LeadingWhiteSpace
;
416 m_nBufferIndex
= nRealStart
;
419 if ( aRes
.TokenType
== 0 &&
420 nRealStart
< nBufLen
&&
421 '\n' == m_aBufferString
[ nRealStart
] )
423 // keep data needed for tokens row and col entry up to date
425 m_nBufferIndex
= m_nColOff
= nRealStart
+ 1;
428 else if (aRes
.TokenType
& KParseType::ONE_SINGLE_CHAR
)
430 if (nRealStart
+ 2 <= nBufLen
&& m_aBufferString
.match("%%", nRealStart
))
433 m_nBufferIndex
= nRealStart
+ 2;
434 while (m_nBufferIndex
< nBufLen
&&
435 '\n' != m_aBufferString
[ m_nBufferIndex
])
443 // set index of current token
444 m_nTokenIndex
= m_nBufferIndex
;
446 m_aCurToken
.nRow
= m_nRow
;
447 m_aCurToken
.nCol
= nRealStart
- m_nColOff
+ 1;
449 bool bHandled
= true;
450 if (nRealStart
>= nBufLen
)
452 m_aCurToken
.eType
= TEND
;
453 m_aCurToken
.cMathChar
= '\0';
454 m_aCurToken
.nGroup
= TG::NONE
;
455 m_aCurToken
.nLevel
= 0;
456 m_aCurToken
.aText
.clear();
458 else if (aRes
.TokenType
& KParseType::ANY_NUMBER
)
460 assert(aRes
.EndPos
> 0);
461 if ( m_aBufferString
[aRes
.EndPos
-1] == ',' &&
462 aRes
.EndPos
< nBufLen
&&
463 m_pSysCC
->getType( m_aBufferString
, aRes
.EndPos
) != UnicodeType::SPACE_SEPARATOR
)
465 // Comma followed by a non-space char is unlikely for decimal/thousands separator.
468 sal_Int32 n
= aRes
.EndPos
- nRealStart
;
470 m_aCurToken
.eType
= TNUMBER
;
471 m_aCurToken
.cMathChar
= '\0';
472 m_aCurToken
.nGroup
= TG::NONE
;
473 m_aCurToken
.nLevel
= 5;
474 m_aCurToken
.aText
= m_aBufferString
.copy( nRealStart
, n
);
476 SAL_WARN_IF( !IsDelimiter( m_aBufferString
, aRes
.EndPos
), "starmath", "identifier really finished? (compatibility!)" );
478 else if (aRes
.TokenType
& KParseType::DOUBLE_QUOTE_STRING
)
480 m_aCurToken
.eType
= TTEXT
;
481 m_aCurToken
.cMathChar
= '\0';
482 m_aCurToken
.nGroup
= TG::NONE
;
483 m_aCurToken
.nLevel
= 5;
484 m_aCurToken
.aText
= aRes
.DequotedNameOrString
;
485 m_aCurToken
.nRow
= m_nRow
;
486 m_aCurToken
.nCol
= nRealStart
- m_nColOff
+ 2;
488 else if (aRes
.TokenType
& KParseType::IDENTNAME
)
490 sal_Int32 n
= aRes
.EndPos
- nRealStart
;
492 OUString
aName( m_aBufferString
.copy( nRealStart
, n
) );
493 const SmTokenTableEntry
*pEntry
= GetTokenTableEntry( aName
);
497 m_aCurToken
.eType
= pEntry
->eType
;
498 m_aCurToken
.cMathChar
= pEntry
->cMathChar
;
499 m_aCurToken
.nGroup
= pEntry
->nGroup
;
500 m_aCurToken
.nLevel
= pEntry
->nLevel
;
501 m_aCurToken
.aText
= OUString::createFromAscii( pEntry
->pIdent
);
505 m_aCurToken
.eType
= TIDENT
;
506 m_aCurToken
.cMathChar
= '\0';
507 m_aCurToken
.nGroup
= TG::NONE
;
508 m_aCurToken
.nLevel
= 5;
509 m_aCurToken
.aText
= aName
;
511 SAL_WARN_IF(!IsDelimiter(m_aBufferString
, aRes
.EndPos
),"starmath", "identifier really finished? (compatibility!)");
514 else if (aRes
.TokenType
== 0 && '_' == m_aBufferString
[ nRealStart
])
516 m_aCurToken
.eType
= TRSUB
;
517 m_aCurToken
.cMathChar
= '\0';
518 m_aCurToken
.nGroup
= TG::Power
;
519 m_aCurToken
.nLevel
= 0;
520 m_aCurToken
.aText
= "_";
522 aRes
.EndPos
= nRealStart
+ 1;
524 else if (aRes
.TokenType
& KParseType::BOOLEAN
)
526 sal_Int32
&rnEndPos
= aRes
.EndPos
;
527 if (rnEndPos
- nRealStart
<= 2)
529 sal_Unicode ch
= m_aBufferString
[ nRealStart
];
534 if (m_aBufferString
.match("<<", nRealStart
))
536 m_aCurToken
.eType
= TLL
;
537 m_aCurToken
.cMathChar
= MS_LL
;
538 m_aCurToken
.nGroup
= TG::Relation
;
539 m_aCurToken
.nLevel
= 0;
540 m_aCurToken
.aText
= "<<";
542 rnEndPos
= nRealStart
+ 2;
544 else if (m_aBufferString
.match("<=", nRealStart
))
546 m_aCurToken
.eType
= TLE
;
547 m_aCurToken
.cMathChar
= MS_LE
;
548 m_aCurToken
.nGroup
= TG::Relation
;
549 m_aCurToken
.nLevel
= 0;
550 m_aCurToken
.aText
= "<=";
552 rnEndPos
= nRealStart
+ 2;
554 else if (m_aBufferString
.match("<-", nRealStart
))
556 m_aCurToken
.eType
= TLEFTARROW
;
557 m_aCurToken
.cMathChar
= MS_LEFTARROW
;
558 m_aCurToken
.nGroup
= TG::Standalone
;
559 m_aCurToken
.nLevel
= 5;
560 m_aCurToken
.aText
= "<-";
562 rnEndPos
= nRealStart
+ 2;
564 else if (m_aBufferString
.match("<>", nRealStart
))
566 m_aCurToken
.eType
= TNEQ
;
567 m_aCurToken
.cMathChar
= MS_NEQ
;
568 m_aCurToken
.nGroup
= TG::Relation
;
569 m_aCurToken
.nLevel
= 0;
570 m_aCurToken
.aText
= "<>";
572 rnEndPos
= nRealStart
+ 2;
574 else if (m_aBufferString
.match("<?>", nRealStart
))
576 m_aCurToken
.eType
= TPLACE
;
577 m_aCurToken
.cMathChar
= MS_PLACE
;
578 m_aCurToken
.nGroup
= TG::NONE
;
579 m_aCurToken
.nLevel
= 5;
580 m_aCurToken
.aText
= "<?>";
582 rnEndPos
= nRealStart
+ 3;
586 m_aCurToken
.eType
= TLT
;
587 m_aCurToken
.cMathChar
= MS_LT
;
588 m_aCurToken
.nGroup
= TG::Relation
;
589 m_aCurToken
.nLevel
= 0;
590 m_aCurToken
.aText
= "<";
596 if (m_aBufferString
.match(">=", nRealStart
))
598 m_aCurToken
.eType
= TGE
;
599 m_aCurToken
.cMathChar
= MS_GE
;
600 m_aCurToken
.nGroup
= TG::Relation
;
601 m_aCurToken
.nLevel
= 0;
602 m_aCurToken
.aText
= ">=";
604 rnEndPos
= nRealStart
+ 2;
606 else if (m_aBufferString
.match(">>", nRealStart
))
608 m_aCurToken
.eType
= TGG
;
609 m_aCurToken
.cMathChar
= MS_GG
;
610 m_aCurToken
.nGroup
= TG::Relation
;
611 m_aCurToken
.nLevel
= 0;
612 m_aCurToken
.aText
= ">>";
614 rnEndPos
= nRealStart
+ 2;
618 m_aCurToken
.eType
= TGT
;
619 m_aCurToken
.cMathChar
= MS_GT
;
620 m_aCurToken
.nGroup
= TG::Relation
;
621 m_aCurToken
.nLevel
= 0;
622 m_aCurToken
.aText
= ">";
631 else if (aRes
.TokenType
& KParseType::ONE_SINGLE_CHAR
)
633 sal_Int32
&rnEndPos
= aRes
.EndPos
;
634 if (rnEndPos
- nRealStart
== 1)
636 sal_Unicode ch
= m_aBufferString
[ nRealStart
];
641 //! modifies aRes.EndPos
643 OSL_ENSURE( rnEndPos
>= nBufLen
||
644 '%' != m_aBufferString
[ rnEndPos
],
645 "unexpected comment start" );
647 // get identifier of user-defined character
648 ParseResult aTmpRes
= m_pSysCC
->parseAnyToken(
649 m_aBufferString
, rnEndPos
,
650 KParseTokens::ANY_LETTER
,
652 coUserDefinedCharContFlags
,
655 sal_Int32 nTmpStart
= rnEndPos
+ aTmpRes
.LeadingWhiteSpace
;
657 // default setting for the case that no identifier
658 // i.e. a valid symbol-name is following the '%'
660 m_aCurToken
.eType
= TTEXT
;
661 m_aCurToken
.cMathChar
= '\0';
662 m_aCurToken
.nGroup
= TG::NONE
;
663 m_aCurToken
.nLevel
= 5;
664 m_aCurToken
.aText
="%";
665 m_aCurToken
.nRow
= m_nRow
;
666 m_aCurToken
.nCol
= nTmpStart
- m_nColOff
;
668 if (aTmpRes
.TokenType
& KParseType::IDENTNAME
)
671 sal_Int32 n
= aTmpRes
.EndPos
- nTmpStart
;
672 m_aCurToken
.eType
= TSPECIAL
;
673 m_aCurToken
.aText
= m_aBufferString
.copy( nTmpStart
-1, n
+1 );
675 OSL_ENSURE( aTmpRes
.EndPos
> rnEndPos
,
676 "empty identifier" );
677 if (aTmpRes
.EndPos
> rnEndPos
)
678 rnEndPos
= aTmpRes
.EndPos
;
683 // if no symbol-name was found we start-over with
684 // finding the next token right after the '%' sign.
685 // I.e. we leave rnEndPos unmodified.
690 m_aCurToken
.eType
= TLBRACKET
;
691 m_aCurToken
.cMathChar
= MS_LBRACKET
;
692 m_aCurToken
.nGroup
= TG::LBrace
;
693 m_aCurToken
.nLevel
= 5;
694 m_aCurToken
.aText
= "[";
699 m_aCurToken
.eType
= TESCAPE
;
700 m_aCurToken
.cMathChar
= '\0';
701 m_aCurToken
.nGroup
= TG::NONE
;
702 m_aCurToken
.nLevel
= 5;
703 m_aCurToken
.aText
= "\\";
708 m_aCurToken
.eType
= TRBRACKET
;
709 m_aCurToken
.cMathChar
= MS_RBRACKET
;
710 m_aCurToken
.nGroup
= TG::RBrace
;
711 m_aCurToken
.nLevel
= 0;
712 m_aCurToken
.aText
= "]";
717 m_aCurToken
.eType
= TRSUP
;
718 m_aCurToken
.cMathChar
= '\0';
719 m_aCurToken
.nGroup
= TG::Power
;
720 m_aCurToken
.nLevel
= 0;
721 m_aCurToken
.aText
= "^";
726 m_aCurToken
.eType
= TSBLANK
;
727 m_aCurToken
.cMathChar
= '\0';
728 m_aCurToken
.nGroup
= TG::Blank
;
729 m_aCurToken
.nLevel
= 5;
730 m_aCurToken
.aText
= "`";
735 m_aCurToken
.eType
= TLGROUP
;
736 m_aCurToken
.cMathChar
= MS_LBRACE
;
737 m_aCurToken
.nGroup
= TG::NONE
;
738 m_aCurToken
.nLevel
= 5;
739 m_aCurToken
.aText
= "{";
744 m_aCurToken
.eType
= TOR
;
745 m_aCurToken
.cMathChar
= MS_OR
;
746 m_aCurToken
.nGroup
= TG::Sum
;
747 m_aCurToken
.nLevel
= 0;
748 m_aCurToken
.aText
= "|";
753 m_aCurToken
.eType
= TRGROUP
;
754 m_aCurToken
.cMathChar
= MS_RBRACE
;
755 m_aCurToken
.nGroup
= TG::NONE
;
756 m_aCurToken
.nLevel
= 0;
757 m_aCurToken
.aText
= "}";
762 m_aCurToken
.eType
= TBLANK
;
763 m_aCurToken
.cMathChar
= '\0';
764 m_aCurToken
.nGroup
= TG::Blank
;
765 m_aCurToken
.nLevel
= 5;
766 m_aCurToken
.aText
= "~";
771 if (m_aBufferString
.match("##", nRealStart
))
773 m_aCurToken
.eType
= TDPOUND
;
774 m_aCurToken
.cMathChar
= '\0';
775 m_aCurToken
.nGroup
= TG::NONE
;
776 m_aCurToken
.nLevel
= 0;
777 m_aCurToken
.aText
= "##";
779 rnEndPos
= nRealStart
+ 2;
783 m_aCurToken
.eType
= TPOUND
;
784 m_aCurToken
.cMathChar
= '\0';
785 m_aCurToken
.nGroup
= TG::NONE
;
786 m_aCurToken
.nLevel
= 0;
787 m_aCurToken
.aText
= "#";
793 m_aCurToken
.eType
= TAND
;
794 m_aCurToken
.cMathChar
= MS_AND
;
795 m_aCurToken
.nGroup
= TG::Product
;
796 m_aCurToken
.nLevel
= 0;
797 m_aCurToken
.aText
= "&";
802 m_aCurToken
.eType
= TLPARENT
;
803 m_aCurToken
.cMathChar
= MS_LPARENT
;
804 m_aCurToken
.nGroup
= TG::LBrace
;
805 m_aCurToken
.nLevel
= 5; //! 0 to continue expression
806 m_aCurToken
.aText
= "(";
811 m_aCurToken
.eType
= TRPARENT
;
812 m_aCurToken
.cMathChar
= MS_RPARENT
;
813 m_aCurToken
.nGroup
= TG::RBrace
;
814 m_aCurToken
.nLevel
= 0; //! 0 to terminate expression
815 m_aCurToken
.aText
= ")";
820 m_aCurToken
.eType
= TMULTIPLY
;
821 m_aCurToken
.cMathChar
= MS_MULTIPLY
;
822 m_aCurToken
.nGroup
= TG::Product
;
823 m_aCurToken
.nLevel
= 0;
824 m_aCurToken
.aText
= "*";
829 if (m_aBufferString
.match("+-", nRealStart
))
831 m_aCurToken
.eType
= TPLUSMINUS
;
832 m_aCurToken
.cMathChar
= MS_PLUSMINUS
;
833 m_aCurToken
.nGroup
= TG::UnOper
| TG::Sum
;
834 m_aCurToken
.nLevel
= 5;
835 m_aCurToken
.aText
= "+-";
837 rnEndPos
= nRealStart
+ 2;
841 m_aCurToken
.eType
= TPLUS
;
842 m_aCurToken
.cMathChar
= MS_PLUS
;
843 m_aCurToken
.nGroup
= TG::UnOper
| TG::Sum
;
844 m_aCurToken
.nLevel
= 5;
845 m_aCurToken
.aText
= "+";
851 if (m_aBufferString
.match("-+", nRealStart
))
853 m_aCurToken
.eType
= TMINUSPLUS
;
854 m_aCurToken
.cMathChar
= MS_MINUSPLUS
;
855 m_aCurToken
.nGroup
= TG::UnOper
| TG::Sum
;
856 m_aCurToken
.nLevel
= 5;
857 m_aCurToken
.aText
= "-+";
859 rnEndPos
= nRealStart
+ 2;
861 else if (m_aBufferString
.match("->", nRealStart
))
863 m_aCurToken
.eType
= TRIGHTARROW
;
864 m_aCurToken
.cMathChar
= MS_RIGHTARROW
;
865 m_aCurToken
.nGroup
= TG::Standalone
;
866 m_aCurToken
.nLevel
= 5;
867 m_aCurToken
.aText
= "->";
869 rnEndPos
= nRealStart
+ 2;
873 m_aCurToken
.eType
= TMINUS
;
874 m_aCurToken
.cMathChar
= MS_MINUS
;
875 m_aCurToken
.nGroup
= TG::UnOper
| TG::Sum
;
876 m_aCurToken
.nLevel
= 5;
877 m_aCurToken
.aText
= "-";
883 // Only one character? Then it can't be a number.
884 if (m_nBufferIndex
< m_aBufferString
.getLength() - 1)
886 // for compatibility with SO5.2
887 // texts like .34 ...56 ... h ...78..90
888 // will be treated as numbers
889 m_aCurToken
.eType
= TNUMBER
;
890 m_aCurToken
.cMathChar
= '\0';
891 m_aCurToken
.nGroup
= TG::NONE
;
892 m_aCurToken
.nLevel
= 5;
894 sal_Int32 nTxtStart
= m_nBufferIndex
;
896 // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only
899 cChar
= m_aBufferString
[ ++m_nBufferIndex
];
901 while ( (cChar
== '.' || rtl::isAsciiDigit( cChar
)) &&
902 ( m_nBufferIndex
< m_aBufferString
.getLength() - 1 ) );
904 m_aCurToken
.aText
= m_aBufferString
.copy( nTxtStart
, m_nBufferIndex
- nTxtStart
);
905 aRes
.EndPos
= m_nBufferIndex
;
913 m_aCurToken
.eType
= TDIVIDEBY
;
914 m_aCurToken
.cMathChar
= MS_SLASH
;
915 m_aCurToken
.nGroup
= TG::Product
;
916 m_aCurToken
.nLevel
= 0;
917 m_aCurToken
.aText
= "/";
922 m_aCurToken
.eType
= TASSIGN
;
923 m_aCurToken
.cMathChar
= MS_ASSIGN
;
924 m_aCurToken
.nGroup
= TG::Relation
;
925 m_aCurToken
.nLevel
= 0;
926 m_aCurToken
.aText
= "=";
939 m_aCurToken
.eType
= TCHARACTER
;
940 m_aCurToken
.cMathChar
= '\0';
941 m_aCurToken
.nGroup
= TG::NONE
;
942 m_aCurToken
.nLevel
= 5;
944 // tdf#129372: we may have to deal with surrogate pairs
945 // (see https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates)
946 // in this case, we must read 2 sal_Unicode instead of 1
947 int nOffset(rtl::isSurrogate(m_aBufferString
[nRealStart
])? 2 : 1);
948 m_aCurToken
.aText
= m_aBufferString
.copy( nRealStart
, nOffset
);
950 aRes
.EndPos
= nRealStart
+ nOffset
;
953 if (TEND
!= m_aCurToken
.eType
)
954 m_nBufferIndex
= aRes
.EndPos
;
957 void SmParser::NextTokenColor(bool dvipload
)
960 sal_Int32 nBufLen
= m_aBufferString
.getLength();
962 sal_Int32 nRealStart
;
968 while (UnicodeType::SPACE_SEPARATOR
==
969 m_pSysCC
->getType( m_aBufferString
, m_nBufferIndex
))
971 //parse, there are few options, so less strict.
972 aRes
= m_pSysCC
->parseAnyToken(m_aBufferString
, m_nBufferIndex
,
973 coStartFlags
, "", coContFlags
, "");
974 nRealStart
= m_nBufferIndex
+ aRes
.LeadingWhiteSpace
;
975 m_nBufferIndex
= nRealStart
;
977 if ( aRes
.TokenType
== 0 &&
978 nRealStart
< nBufLen
&&
979 '\n' == m_aBufferString
[ nRealStart
] )
981 // keep data needed for tokens row and col entry up to date
983 m_nBufferIndex
= m_nColOff
= nRealStart
+ 1;
986 else if (aRes
.TokenType
& KParseType::ONE_SINGLE_CHAR
)
988 if (nRealStart
+ 2 <= nBufLen
&& m_aBufferString
.match("%%", nRealStart
))
991 m_nBufferIndex
= nRealStart
+ 2;
992 while (m_nBufferIndex
< nBufLen
&&
993 '\n' != m_aBufferString
[ m_nBufferIndex
])
1000 // set index of current token
1001 m_nTokenIndex
= m_nBufferIndex
;
1002 m_aCurToken
.nRow
= m_nRow
;
1003 m_aCurToken
.nCol
= nRealStart
- m_nColOff
+ 1;
1004 if (nRealStart
>= nBufLen
) m_aCurToken
.eType
= TEND
;
1005 else if (aRes
.TokenType
& KParseType::IDENTNAME
)
1007 sal_Int32 n
= aRes
.EndPos
- nRealStart
;
1009 OUString
aName( m_aBufferString
.copy( nRealStart
, n
) );
1010 std::unique_ptr
<SmColorTokenTableEntry
> aSmColorTokenTableEntry
;
1011 if(dvipload
) aSmColorTokenTableEntry
= starmathdatabase::Identify_ColorName_DVIPSNAMES( aName
);
1012 else aSmColorTokenTableEntry
= starmathdatabase::Identify_ColorName_Parser( aName
);
1013 m_aCurToken
= aSmColorTokenTableEntry
;
1015 else if (aRes
.TokenType
& KParseType::ONE_SINGLE_CHAR
)
1017 if( m_aBufferString
[ nRealStart
] == '#' && !m_aBufferString
.match("##", nRealStart
) )
1019 m_aCurToken
.eType
= THEX
;
1020 m_aCurToken
.cMathChar
= '\0';
1021 m_aCurToken
.nGroup
= TG::Color
;
1022 m_aCurToken
.nLevel
= 0;
1023 m_aCurToken
.aText
= "hex";
1026 else m_aCurToken
.eType
= TNONE
;
1027 if (TEND
!= m_aCurToken
.eType
) m_nBufferIndex
= aRes
.EndPos
;
1030 void SmParser::NextTokenFontSize()
1033 sal_Int32 nBufLen
= m_aBufferString
.getLength();
1035 sal_Int32 nRealStart
;
1041 // skip white spaces
1042 while (UnicodeType::SPACE_SEPARATOR
==
1043 m_pSysCC
->getType( m_aBufferString
, m_nBufferIndex
))
1045 //hexadecimal parser
1046 aRes
= m_pSysCC
->parseAnyToken(m_aBufferString
, m_nBufferIndex
,
1047 coNum16StartFlags
, ".", coNum16ContFlags
, ".,");
1048 if (aRes
.TokenType
== 0)
1050 // Try again with the default token parsing.
1051 aRes
= m_pSysCC
->parseAnyToken(m_aBufferString
, m_nBufferIndex
,
1052 coStartFlags
, "", coContFlags
, "");
1055 nRealStart
= m_nBufferIndex
+ aRes
.LeadingWhiteSpace
;
1056 m_nBufferIndex
= nRealStart
;
1058 if ( aRes
.TokenType
== 0 &&
1059 nRealStart
< nBufLen
&&
1060 '\n' == m_aBufferString
[ nRealStart
] )
1062 // keep data needed for tokens row and col entry up to date
1064 m_nBufferIndex
= m_nColOff
= nRealStart
+ 1;
1067 else if (aRes
.TokenType
& KParseType::ONE_SINGLE_CHAR
)
1069 if (nRealStart
+ 2 <= nBufLen
&& m_aBufferString
.match("%%", nRealStart
))
1072 m_nBufferIndex
= nRealStart
+ 2;
1073 while (m_nBufferIndex
< nBufLen
&&
1074 '\n' != m_aBufferString
[ m_nBufferIndex
])
1081 // set index of current token
1082 m_nTokenIndex
= m_nBufferIndex
;
1083 m_aCurToken
.nRow
= m_nRow
;
1084 m_aCurToken
.nCol
= nRealStart
- m_nColOff
+ 1;
1085 if (nRealStart
>= nBufLen
) m_aCurToken
.eType
= TEND
;
1086 else if (aRes
.TokenType
& KParseType::ONE_SINGLE_CHAR
)
1088 if ( aRes
.EndPos
- nRealStart
== 1 )
1090 switch ( m_aBufferString
[ nRealStart
] )
1093 m_aCurToken
.eType
= TMULTIPLY
;
1094 m_aCurToken
.cMathChar
= MS_MULTIPLY
;
1095 m_aCurToken
.nGroup
= TG::Product
;
1096 m_aCurToken
.nLevel
= 0;
1097 m_aCurToken
.aText
= "*";
1100 m_aCurToken
.eType
= TPLUS
;
1101 m_aCurToken
.cMathChar
= MS_PLUS
;
1102 m_aCurToken
.nGroup
= TG::UnOper
| TG::Sum
;
1103 m_aCurToken
.nLevel
= 5;
1104 m_aCurToken
.aText
= "+";
1107 m_aCurToken
.eType
= TMINUS
;
1108 m_aCurToken
.cMathChar
= MS_MINUS
;
1109 m_aCurToken
.nGroup
= TG::UnOper
| TG::Sum
;
1110 m_aCurToken
.nLevel
= 5;
1111 m_aCurToken
.aText
= "-";
1114 m_aCurToken
.eType
= TDIVIDEBY
;
1115 m_aCurToken
.cMathChar
= MS_SLASH
;
1116 m_aCurToken
.nGroup
= TG::Product
;
1117 m_aCurToken
.nLevel
= 0;
1118 m_aCurToken
.aText
= "/";
1121 m_aCurToken
.eType
= TNONE
;
1125 else m_aCurToken
.eType
= TNONE
;
1129 assert(aRes
.EndPos
> 0);
1130 sal_Int32 n
= aRes
.EndPos
- nRealStart
;
1132 m_aCurToken
.eType
= THEX
;
1133 m_aCurToken
.cMathChar
= '\0';
1134 m_aCurToken
.nGroup
= TG::NONE
;
1135 m_aCurToken
.nLevel
= 5;
1136 m_aCurToken
.aText
= m_aBufferString
.copy( nRealStart
, n
);
1138 else m_aCurToken
.eType
= TNONE
;
1139 if (TEND
!= m_aCurToken
.eType
) m_nBufferIndex
= aRes
.EndPos
;
1144 SmNodeArray
buildNodeArray(std::vector
<std::unique_ptr
<SmNode
>>& rSubNodes
)
1146 SmNodeArray
aSubArray(rSubNodes
.size());
1147 for (size_t i
= 0; i
< rSubNodes
.size(); ++i
)
1148 aSubArray
[i
] = rSubNodes
[i
].release();
1154 /*************************************************************************************************/
1156 std::unique_ptr
<SmTableNode
> SmParser::DoTable()
1158 DepthProtect
aDepthGuard(m_nParseDepth
);
1159 if (aDepthGuard
.TooDeep())
1160 throw std::range_error("parser depth limit");
1162 std::vector
<std::unique_ptr
<SmNode
>> aLineArray
;
1163 aLineArray
.push_back(DoLine());
1164 while (m_aCurToken
.eType
== TNEWLINE
)
1167 aLineArray
.push_back(DoLine());
1169 assert(m_aCurToken
.eType
== TEND
);
1170 std::unique_ptr
<SmTableNode
> xSNode(new SmTableNode(m_aCurToken
));
1171 xSNode
->SetSubNodes(buildNodeArray(aLineArray
));
1175 std::unique_ptr
<SmNode
> SmParser::DoAlign(bool bUseExtraSpaces
)
1176 // parse alignment info (if any), then go on with rest of expression
1178 DepthProtect
aDepthGuard(m_nParseDepth
);
1179 if (aDepthGuard
.TooDeep())
1180 throw std::range_error("parser depth limit");
1182 std::unique_ptr
<SmStructureNode
> xSNode
;
1184 if (TokenInGroup(TG::Align
))
1186 xSNode
.reset(new SmAlignNode(m_aCurToken
));
1190 // allow for just one align statement in 5.0
1191 if (TokenInGroup(TG::Align
))
1192 return DoError(SmParseError::DoubleAlign
);
1195 auto pNode
= DoExpression(bUseExtraSpaces
);
1199 xSNode
->SetSubNode(0, pNode
.release());
1205 // Postcondition: m_aCurToken.eType == TEND || m_aCurToken.eType == TNEWLINE
1206 std::unique_ptr
<SmNode
> SmParser::DoLine()
1208 DepthProtect
aDepthGuard(m_nParseDepth
);
1209 if (aDepthGuard
.TooDeep())
1210 throw std::range_error("parser depth limit");
1212 std::vector
<std::unique_ptr
<SmNode
>> ExpressionArray
;
1214 // start with single expression that may have an alignment statement
1215 // (and go on with expressions that must not have alignment
1216 // statements in 'while' loop below. See also 'Expression()'.)
1217 if (m_aCurToken
.eType
!= TEND
&& m_aCurToken
.eType
!= TNEWLINE
)
1218 ExpressionArray
.push_back(DoAlign());
1220 while (m_aCurToken
.eType
!= TEND
&& m_aCurToken
.eType
!= TNEWLINE
)
1221 ExpressionArray
.push_back(DoExpression());
1223 //If there's no expression, add an empty one.
1224 //this is to avoid a formula tree without any caret
1225 //positions, in visual formula editor.
1226 if(ExpressionArray
.empty())
1229 aTok
.eType
= TNEWLINE
;
1230 ExpressionArray
.emplace_back(std::unique_ptr
<SmNode
>(new SmExpressionNode(aTok
)));
1233 auto xSNode
= std::make_unique
<SmLineNode
>(m_aCurToken
);
1234 xSNode
->SetSubNodes(buildNodeArray(ExpressionArray
));
1238 std::unique_ptr
<SmNode
> SmParser::DoExpression(bool bUseExtraSpaces
)
1240 DepthProtect
aDepthGuard(m_nParseDepth
);
1241 if (aDepthGuard
.TooDeep())
1242 throw std::range_error("parser depth limit");
1244 std::vector
<std::unique_ptr
<SmNode
>> RelationArray
;
1245 RelationArray
.push_back(DoRelation());
1246 while (m_aCurToken
.nLevel
>= 4)
1247 RelationArray
.push_back(DoRelation());
1249 if (RelationArray
.size() > 1)
1251 std::unique_ptr
<SmExpressionNode
> xSNode(new SmExpressionNode(m_aCurToken
));
1252 xSNode
->SetSubNodes(buildNodeArray(RelationArray
));
1253 xSNode
->SetUseExtraSpaces(bUseExtraSpaces
);
1258 // This expression has only one node so just push this node.
1259 return std::move(RelationArray
[0]);
1263 std::unique_ptr
<SmNode
> SmParser::DoRelation()
1265 DepthProtect
aDepthGuard(m_nParseDepth
);
1266 if (aDepthGuard
.TooDeep())
1267 throw std::range_error("parser depth limit");
1269 int nDepthLimit
= m_nParseDepth
;
1271 auto xFirst
= DoSum();
1272 while (TokenInGroup(TG::Relation
))
1274 std::unique_ptr
<SmStructureNode
> xSNode(new SmBinHorNode(m_aCurToken
));
1275 auto xSecond
= DoOpSubSup();
1276 auto xThird
= DoSum();
1277 xSNode
->SetSubNodes(std::move(xFirst
), std::move(xSecond
), std::move(xThird
));
1278 xFirst
= std::move(xSNode
);
1281 if (aDepthGuard
.TooDeep())
1282 throw std::range_error("parser depth limit");
1285 m_nParseDepth
= nDepthLimit
;
1290 std::unique_ptr
<SmNode
> SmParser::DoSum()
1292 DepthProtect
aDepthGuard(m_nParseDepth
);
1293 if (aDepthGuard
.TooDeep())
1294 throw std::range_error("parser depth limit");
1296 auto xFirst
= DoProduct();
1297 while (TokenInGroup(TG::Sum
))
1299 std::unique_ptr
<SmStructureNode
> xSNode(new SmBinHorNode(m_aCurToken
));
1300 auto xSecond
= DoOpSubSup();
1301 auto xThird
= DoProduct();
1302 xSNode
->SetSubNodes(std::move(xFirst
), std::move(xSecond
), std::move(xThird
));
1303 xFirst
= std::move(xSNode
);
1308 std::unique_ptr
<SmNode
> SmParser::DoProduct()
1310 DepthProtect
aDepthGuard(m_nParseDepth
);
1311 if (aDepthGuard
.TooDeep())
1312 throw std::range_error("parser depth limit");
1314 auto xFirst
= DoPower();
1316 int nDepthLimit
= 0;
1318 while (TokenInGroup(TG::Product
))
1320 //this linear loop builds a recursive structure, if it gets
1321 //too deep then later processing, e.g. releasing the tree,
1323 if (nDepthLimit
> DEPTH_LIMIT
)
1324 throw std::range_error("parser depth limit");
1326 std::unique_ptr
<SmStructureNode
> xSNode
;
1327 std::unique_ptr
<SmNode
> xOper
;
1328 bool bSwitchArgs
= false;
1330 SmTokenType eType
= m_aCurToken
.eType
;
1334 xSNode
.reset(new SmBinVerNode(m_aCurToken
));
1335 xOper
.reset(new SmRectangleNode(m_aCurToken
));
1340 xSNode
.reset(new SmBinHorNode(m_aCurToken
));
1344 //Let the glyph node know it's a binary operation
1345 m_aCurToken
.eType
= TBOPER
;
1346 m_aCurToken
.nGroup
= TG::Product
;
1347 xOper
= DoGlyphSpecial();
1352 xSNode
.reset(new SmVerticalBraceNode(m_aCurToken
));
1353 xOper
.reset(new SmMathSymbolNode(m_aCurToken
));
1358 case TWIDEBACKSLASH
:
1361 SmBinDiagonalNode
*pSTmp
= new SmBinDiagonalNode(m_aCurToken
);
1362 pSTmp
->SetAscending(eType
== TWIDESLASH
);
1363 xSNode
.reset(pSTmp
);
1365 xOper
.reset(new SmPolyLineNode(m_aCurToken
));
1373 xSNode
.reset(new SmBinHorNode(m_aCurToken
));
1375 xOper
= DoOpSubSup();
1378 auto xArg
= DoPower();
1382 //! vgl siehe SmBinDiagonalNode::Arrange
1383 xSNode
->SetSubNodes(std::move(xFirst
), std::move(xArg
), std::move(xOper
));
1387 xSNode
->SetSubNodes(std::move(xFirst
), std::move(xOper
), std::move(xArg
));
1389 xFirst
= std::move(xSNode
);
1395 std::unique_ptr
<SmNode
> SmParser::DoSubSup(TG nActiveGroup
, SmNode
*pGivenNode
)
1397 std::unique_ptr
<SmNode
> xGivenNode(pGivenNode
);
1398 DepthProtect
aDepthGuard(m_nParseDepth
);
1399 if (aDepthGuard
.TooDeep())
1400 throw std::range_error("parser depth limit");
1402 assert(nActiveGroup
== TG::Power
|| nActiveGroup
== TG::Limit
);
1403 assert(m_aCurToken
.nGroup
== nActiveGroup
);
1405 std::unique_ptr
<SmSubSupNode
> pNode(new SmSubSupNode(m_aCurToken
));
1406 //! Of course 'm_aCurToken' is just the first sub-/supscript token.
1407 //! It should be of no further interest. The positions of the
1408 //! sub-/supscripts will be identified by the corresponding subnodes
1409 //! index in the 'aSubNodes' array (enum value from 'SmSubSup').
1411 pNode
->SetUseLimits(nActiveGroup
== TG::Limit
);
1413 // initialize subnodes array
1414 std::vector
<std::unique_ptr
<SmNode
>> aSubNodes(1 + SUBSUP_NUM_ENTRIES
);
1415 aSubNodes
[0] = std::move(xGivenNode
);
1417 // process all sub-/supscripts
1419 while (TokenInGroup(nActiveGroup
))
1421 SmTokenType
eType (m_aCurToken
.eType
);
1425 case TRSUB
: nIndex
= static_cast<int>(RSUB
); break;
1426 case TRSUP
: nIndex
= static_cast<int>(RSUP
); break;
1428 case TCSUB
: nIndex
= static_cast<int>(CSUB
); break;
1430 case TCSUP
: nIndex
= static_cast<int>(CSUP
); break;
1431 case TLSUB
: nIndex
= static_cast<int>(LSUB
); break;
1432 case TLSUP
: nIndex
= static_cast<int>(LSUP
); break;
1434 SAL_WARN( "starmath", "unknown case");
1437 assert(1 <= nIndex
&& nIndex
<= SUBSUP_NUM_ENTRIES
);
1439 std::unique_ptr
<SmNode
> xENode
;
1440 if (aSubNodes
[nIndex
]) // if already occupied at earlier iteration
1442 // forget the earlier one, remember an error instead
1443 aSubNodes
[nIndex
].reset();
1444 xENode
= DoError(SmParseError::DoubleSubsupscript
); // this also skips current token.
1448 // skip sub-/supscript token
1452 // get sub-/supscript node
1453 // (even when we saw a double-sub/supscript error in the above
1454 // in order to minimize mess and continue parsing.)
1455 std::unique_ptr
<SmNode
> xSNode
;
1456 if (eType
== TFROM
|| eType
== TTO
)
1458 // parse limits in old 4.0 and 5.0 style
1459 xSNode
= DoRelation();
1462 xSNode
= DoTerm(true);
1464 aSubNodes
[nIndex
] = std::move(xENode
? xENode
: xSNode
);
1467 pNode
->SetSubNodes(buildNodeArray(aSubNodes
));
1471 std::unique_ptr
<SmNode
> SmParser::DoSubSupEvaluate(SmNode
*pGivenNode
)
1473 std::unique_ptr
<SmNode
> xGivenNode(pGivenNode
);
1474 DepthProtect
aDepthGuard(m_nParseDepth
);
1475 if (aDepthGuard
.TooDeep()) throw std::range_error("parser depth limit");
1477 std::unique_ptr
<SmSubSupNode
> pNode(new SmSubSupNode(m_aCurToken
));
1478 pNode
->SetUseLimits(true);
1480 // initialize subnodes array
1481 std::vector
<std::unique_ptr
<SmNode
>> aSubNodes(1 + SUBSUP_NUM_ENTRIES
);
1482 aSubNodes
[0] = std::move(xGivenNode
);
1484 // process all sub-/supscripts
1486 while (TokenInGroup(TG::Limit
))
1488 SmTokenType
eType (m_aCurToken
.eType
);
1492 case TFROM
: nIndex
= static_cast<int>(RSUB
); break;
1493 case TTO
: nIndex
= static_cast<int>(RSUP
); break;
1495 SAL_WARN( "starmath", "unknown case");
1498 assert(1 <= nIndex
&& nIndex
<= SUBSUP_NUM_ENTRIES
);
1500 std::unique_ptr
<SmNode
> xENode
;
1501 if (aSubNodes
[nIndex
]) // if already occupied at earlier iteration
1503 // forget the earlier one, remember an error instead
1504 aSubNodes
[nIndex
].reset();
1505 xENode
= DoError(SmParseError::DoubleSubsupscript
); // this also skips current token.
1507 else NextToken(); // skip sub-/supscript token
1509 // get sub-/supscript node
1510 std::unique_ptr
<SmNode
> xSNode
;
1511 xSNode
= DoTerm(true);
1513 aSubNodes
[nIndex
] = std::move(xENode
? xENode
: xSNode
);
1516 pNode
->SetSubNodes(buildNodeArray(aSubNodes
));
1520 std::unique_ptr
<SmNode
> SmParser::DoOpSubSup()
1522 DepthProtect
aDepthGuard(m_nParseDepth
);
1523 if (aDepthGuard
.TooDeep())
1524 throw std::range_error("parser depth limit");
1526 // get operator symbol
1527 auto pNode
= std::make_unique
<SmMathSymbolNode
>(m_aCurToken
);
1528 // skip operator token
1530 // get sub- supscripts if any
1531 if (m_aCurToken
.nGroup
== TG::Power
)
1532 return DoSubSup(TG::Power
, pNode
.release());
1536 std::unique_ptr
<SmNode
> SmParser::DoPower()
1538 DepthProtect
aDepthGuard(m_nParseDepth
);
1539 if (aDepthGuard
.TooDeep())
1540 throw std::range_error("parser depth limit");
1542 // get body for sub- supscripts on top of stack
1543 std::unique_ptr
<SmNode
> xNode(DoTerm(false));
1545 if (m_aCurToken
.nGroup
== TG::Power
)
1546 return DoSubSup(TG::Power
, xNode
.release());
1550 std::unique_ptr
<SmBlankNode
> SmParser::DoBlank()
1552 DepthProtect
aDepthGuard(m_nParseDepth
);
1553 if (aDepthGuard
.TooDeep())
1554 throw std::range_error("parser depth limit");
1556 assert(TokenInGroup(TG::Blank
));
1557 std::unique_ptr
<SmBlankNode
> pBlankNode(new SmBlankNode(m_aCurToken
));
1561 pBlankNode
->IncreaseBy(m_aCurToken
);
1564 while (TokenInGroup(TG::Blank
));
1566 // Ignore trailing spaces, if corresponding option is set
1567 if ( m_aCurToken
.eType
== TNEWLINE
||
1568 (m_aCurToken
.eType
== TEND
&& !utl::ConfigManager::IsFuzzing() && SM_MOD()->GetConfig()->IsIgnoreSpacesRight()) )
1570 pBlankNode
->Clear();
1575 std::unique_ptr
<SmNode
> SmParser::DoTerm(bool bGroupNumberIdent
)
1577 DepthProtect
aDepthGuard(m_nParseDepth
);
1578 if (aDepthGuard
.TooDeep())
1579 throw std::range_error("parser depth limit");
1581 switch (m_aCurToken
.eType
)
1589 bool bNoSpace
= m_aCurToken
.eType
== TNOSPACE
;
1592 if (m_aCurToken
.eType
!= TLGROUP
)
1593 return DoTerm(false); // nospace is no longer concerned
1597 // allow for empty group
1598 if (m_aCurToken
.eType
== TRGROUP
)
1600 std::unique_ptr
<SmStructureNode
> xSNode(new SmExpressionNode(m_aCurToken
));
1601 xSNode
->SetSubNodes(nullptr, nullptr);
1604 return std::unique_ptr
<SmNode
>(xSNode
.release());
1607 auto pNode
= DoAlign(!bNoSpace
);
1608 if (m_aCurToken
.eType
== TRGROUP
) {
1612 auto xSNode
= std::make_unique
<SmExpressionNode
>(m_aCurToken
);
1613 std::unique_ptr
<SmNode
> xError(DoError(SmParseError::RgroupExpected
));
1614 xSNode
->SetSubNodes(std::move(pNode
), std::move(xError
));
1615 return std::unique_ptr
<SmNode
>(xSNode
.release());
1621 return DoEvaluate();
1629 auto pNode
= std::make_unique
<SmTextNode
>(m_aCurToken
, FNT_TEXT
);
1631 return std::unique_ptr
<SmNode
>(pNode
.release());
1635 auto pNode
= std::make_unique
<SmTextNode
>(m_aCurToken
, FNT_VARIABLE
);
1637 return std::unique_ptr
<SmNode
>(pNode
.release());
1642 auto pTextNode
= std::make_unique
<SmTextNode
>(m_aCurToken
,
1643 m_aCurToken
.eType
== TNUMBER
?
1646 if (!bGroupNumberIdent
)
1649 return std::unique_ptr
<SmNode
>(pTextNode
.release());
1651 std::vector
<std::unique_ptr
<SmNode
>> aNodes
;
1652 // Some people want to be able to write "x_2n" for "x_{2n}"
1653 // although e.g. LaTeX or AsciiMath interpret that as "x_2 n".
1654 // The tokenizer skips whitespaces so we need some additional
1655 // work to distinguish from "x_2 n".
1656 // See https://bz.apache.org/ooo/show_bug.cgi?id=11752 and
1657 // https://bugs.libreoffice.org/show_bug.cgi?id=55853
1658 sal_Int32 nBufLen
= m_aBufferString
.getLength();
1660 // We need to be careful to call NextToken() only after having
1661 // tested for a whitespace separator (otherwise it will be
1663 bool moveToNextToken
= true;
1664 while (m_nBufferIndex
< nBufLen
&&
1665 m_pSysCC
->getType(m_aBufferString
, m_nBufferIndex
) !=
1666 UnicodeType::SPACE_SEPARATOR
)
1669 if (m_aCurToken
.eType
!= TNUMBER
&&
1670 m_aCurToken
.eType
!= TIDENT
)
1672 // Neither a number nor an identifier. We just moved to
1673 // the next token, so no need to do that again.
1674 moveToNextToken
= false;
1677 aNodes
.emplace_back(std::unique_ptr
<SmNode
>(new SmTextNode(m_aCurToken
,
1678 m_aCurToken
.eType
==
1683 if (moveToNextToken
)
1686 return std::unique_ptr
<SmNode
>(pTextNode
.release());
1687 // We have several concatenated identifiers and numbers.
1688 // Let's group them into one SmExpressionNode.
1689 aNodes
.insert(aNodes
.begin(), std::move(pTextNode
));
1690 std::unique_ptr
<SmExpressionNode
> xNode(new SmExpressionNode(SmToken()));
1691 xNode
->SetSubNodes(buildNodeArray(aNodes
));
1692 return std::unique_ptr
<SmNode
>(xNode
.release());
1717 auto pNode
= std::make_unique
<SmMathSymbolNode
>(m_aCurToken
);
1719 return std::unique_ptr
<SmNode
>(pNode
.release());
1737 auto pNode
= std::make_unique
<SmMathIdentifierNode
>(m_aCurToken
);
1739 return std::unique_ptr
<SmNode
>(pNode
.release());
1744 auto pNode
= std::make_unique
<SmPlaceNode
>(m_aCurToken
);
1746 return std::unique_ptr
<SmNode
>(pNode
.release());
1765 NextTokenFontSize();
1766 if( m_aCurToken
.eType
== THEX
)
1768 auto pTextNode
= std::make_unique
<SmTextNode
>(m_aCurToken
, FNT_NUMBER
);
1772 else return DoError(SmParseError::NumberExpected
);
1774 if (TokenInGroup(TG::LBrace
)) return DoBrace();
1775 if (TokenInGroup(TG::Oper
)) return DoOperator();
1776 if (TokenInGroup(TG::UnOper
)) return DoUnOper();
1777 if ( TokenInGroup(TG::Attribute
) || TokenInGroup(TG::FontAttr
) )
1779 std::stack
<std::unique_ptr
<SmStructureNode
>> aStack
;
1783 bIsAttr
= TokenInGroup(TG::Attribute
);
1784 if (!bIsAttr
&& !TokenInGroup(TG::FontAttr
))
1786 aStack
.push(bIsAttr
? DoAttribut() : DoFontAttribut());
1789 auto xFirstNode
= DoPower();
1790 while (!aStack
.empty())
1792 std::unique_ptr
<SmStructureNode
> xNode
= std::move(aStack
.top());
1794 xNode
->SetSubNodes(nullptr, std::move(xFirstNode
));
1795 xFirstNode
= std::move(xNode
);
1799 if (TokenInGroup(TG::Function
))
1800 return DoFunction();
1801 return DoError(SmParseError::UnexpectedChar
);
1805 std::unique_ptr
<SmNode
> SmParser::DoEscape()
1807 DepthProtect
aDepthGuard(m_nParseDepth
);
1808 if (aDepthGuard
.TooDeep())
1809 throw std::range_error("parser depth limit");
1813 switch (m_aCurToken
.eType
)
1836 auto pNode
= std::make_unique
<SmMathSymbolNode
>(m_aCurToken
);
1838 return std::unique_ptr
<SmNode
>(pNode
.release());
1841 return DoError(SmParseError::UnexpectedToken
);
1845 std::unique_ptr
<SmOperNode
> SmParser::DoOperator()
1847 DepthProtect
aDepthGuard(m_nParseDepth
);
1848 if (aDepthGuard
.TooDeep())
1849 throw std::range_error("parser depth limit");
1851 assert(TokenInGroup(TG::Oper
));
1853 auto xSNode
= std::make_unique
<SmOperNode
>(m_aCurToken
);
1856 auto xOperator
= DoOper();
1858 if (m_aCurToken
.nGroup
== TG::Limit
|| m_aCurToken
.nGroup
== TG::Power
)
1859 xOperator
= DoSubSup(m_aCurToken
.nGroup
, xOperator
.release());
1862 auto xArg
= DoPower();
1864 xSNode
->SetSubNodes(std::move(xOperator
), std::move(xArg
));
1868 std::unique_ptr
<SmNode
> SmParser::DoOper()
1870 DepthProtect
aDepthGuard(m_nParseDepth
);
1871 if (aDepthGuard
.TooDeep())
1872 throw std::range_error("parser depth limit");
1874 SmTokenType
eType (m_aCurToken
.eType
);
1875 std::unique_ptr
<SmNode
> pNode
;
1889 pNode
.reset(new SmMathSymbolNode(m_aCurToken
));
1896 const char* pLim
= nullptr;
1899 case TLIM
: pLim
= "lim"; break;
1900 case TLIMSUP
: pLim
= "lim sup"; break;
1901 case TLIMINF
: pLim
= "lim inf"; break;
1906 m_aCurToken
.aText
= OUString::createFromAscii(pLim
);
1907 pNode
.reset(new SmTextNode(m_aCurToken
, FNT_TEXT
));
1913 OSL_ENSURE(m_aCurToken
.eType
== TSPECIAL
, "Sm: wrong token");
1914 m_aCurToken
.eType
= TOPER
;
1915 pNode
.reset(new SmGlyphSpecialNode(m_aCurToken
));
1919 assert(false && "unknown case");
1926 std::unique_ptr
<SmStructureNode
> SmParser::DoUnOper()
1928 DepthProtect
aDepthGuard(m_nParseDepth
);
1929 if (aDepthGuard
.TooDeep())
1930 throw std::range_error("parser depth limit");
1932 assert(TokenInGroup(TG::UnOper
));
1934 SmToken aNodeToken
= m_aCurToken
;
1935 SmTokenType eType
= m_aCurToken
.eType
;
1936 bool bIsPostfix
= eType
== TFACT
;
1938 std::unique_ptr
<SmStructureNode
> xSNode
;
1939 std::unique_ptr
<SmNode
> xOper
;
1940 std::unique_ptr
<SmNode
> xExtra
;
1941 std::unique_ptr
<SmNode
> xArg
;
1957 //Let the glyph know what it is...
1958 m_aCurToken
.eType
= TUOPER
;
1959 m_aCurToken
.nGroup
= TG::UnOper
;
1960 xOper
= DoGlyphSpecial();
1969 xOper
= DoOpSubSup();
1981 xSNode
.reset(new SmBraceNode(aNodeToken
));
1982 xSNode
->SetScaleMode(SmScaleMode::Height
);
1984 // build nodes for left & right lines
1985 // (text, group, level of the used token are of no interest here)
1986 // we'll use row & column of the keyword for abs
1987 aNodeToken
.eType
= TABS
;
1989 aNodeToken
.cMathChar
= MS_VERTLINE
;
1990 std::unique_ptr
<SmNode
> xLeft(new SmMathSymbolNode(aNodeToken
));
1991 std::unique_ptr
<SmNode
> xRight(new SmMathSymbolNode(aNodeToken
));
1993 xSNode
->SetSubNodes(std::move(xLeft
), std::move(xArg
), std::move(xRight
));
1995 else if (eType
== TSQRT
|| eType
== TNROOT
)
1997 xSNode
.reset(new SmRootNode(aNodeToken
));
1998 xOper
.reset(new SmRootSymbolNode(aNodeToken
));
1999 xSNode
->SetSubNodes(std::move(xExtra
), std::move(xOper
), std::move(xArg
));
2003 xSNode
.reset(new SmUnHorNode(aNodeToken
));
2005 xSNode
->SetSubNodes(std::move(xArg
), std::move(xOper
));
2009 xSNode
->SetSubNodes(std::move(xOper
), std::move(xArg
));
2015 std::unique_ptr
<SmStructureNode
> SmParser::DoAttribut()
2017 DepthProtect
aDepthGuard(m_nParseDepth
);
2018 if (aDepthGuard
.TooDeep())
2019 throw std::range_error("parser depth limit");
2021 assert(TokenInGroup(TG::Attribute
));
2023 auto xSNode
= std::make_unique
<SmAttributNode
>(m_aCurToken
);
2024 std::unique_ptr
<SmNode
> xAttr
;
2025 SmScaleMode eScaleMode
= SmScaleMode::None
;
2027 // get appropriate node for the attribute itself
2028 switch (m_aCurToken
.eType
)
2032 xAttr
.reset(new SmRectangleNode(m_aCurToken
));
2033 eScaleMode
= SmScaleMode::Width
;
2040 xAttr
.reset(new SmMathSymbolNode(m_aCurToken
));
2041 eScaleMode
= SmScaleMode::Width
;
2045 xAttr
.reset(new SmMathSymbolNode(m_aCurToken
));
2050 xSNode
->SetSubNodes(std::move(xAttr
), nullptr); // the body will be filled later
2051 xSNode
->SetScaleMode(eScaleMode
);
2055 std::unique_ptr
<SmStructureNode
> SmParser::DoFontAttribut()
2057 DepthProtect
aDepthGuard(m_nParseDepth
);
2058 if (aDepthGuard
.TooDeep())
2059 throw std::range_error("parser depth limit");
2061 assert(TokenInGroup(TG::FontAttr
));
2063 switch (m_aCurToken
.eType
)
2071 auto pNode
= std::make_unique
<SmFontNode
>(m_aCurToken
);
2077 return DoFontSize();
2091 std::unique_ptr
<SmStructureNode
> SmParser::DoColor()
2093 DepthProtect
aDepthGuard(m_nParseDepth
);
2094 if (aDepthGuard
.TooDeep()) throw std::range_error("parser depth limit");
2096 assert(m_aCurToken
.eType
== TCOLOR
);
2097 NextTokenColor(false);
2100 if( m_aCurToken
.eType
== TDVIPSNAMESCOL
) NextTokenColor(true);
2101 if( m_aCurToken
.eType
== TERROR
) return DoError(SmParseError::ColorExpected
);
2102 if (TokenInGroup(TG::Color
))
2104 aToken
= m_aCurToken
;
2105 if( m_aCurToken
.eType
== TRGB
) //loads r, g and b
2107 sal_uInt32 nr
, ng
, nb
, nc
;
2108 NextTokenFontSize();
2109 if( lcl_IsNotWholeNumber(m_aCurToken
.aText
) )
2110 return DoError(SmParseError::ColorExpected
);
2111 nr
= m_aCurToken
.aText
.toUInt32();
2112 if( nr
> 255 )return DoError(SmParseError::ColorExpected
);
2113 NextTokenFontSize();
2114 if( lcl_IsNotWholeNumber(m_aCurToken
.aText
) )
2115 return DoError(SmParseError::ColorExpected
);
2116 ng
= m_aCurToken
.aText
.toUInt32();
2117 if( ng
> 255 )return DoError(SmParseError::ColorExpected
);
2118 NextTokenFontSize();
2119 if( lcl_IsNotWholeNumber(m_aCurToken
.aText
) )
2120 return DoError(SmParseError::ColorExpected
);
2121 nb
= m_aCurToken
.aText
.toUInt32();
2122 if( nb
> 255 )return DoError(SmParseError::ColorExpected
);
2123 nc
= nb
| ng
<< 8 | nr
<< 16 | sal_uInt32(0) << 24;
2124 aToken
.aText
= OUString::number(nc
, 16);
2126 else if( m_aCurToken
.eType
== TRGBA
) //loads r, g and b
2128 sal_uInt32 nr
, na
, ng
, nb
, nc
;
2129 NextTokenFontSize();
2130 if( lcl_IsNotWholeNumber(m_aCurToken
.aText
) )
2131 return DoError(SmParseError::ColorExpected
);
2132 nr
= m_aCurToken
.aText
.toUInt32();
2133 if( nr
> 255 )return DoError(SmParseError::ColorExpected
);
2134 NextTokenFontSize();
2135 if( lcl_IsNotWholeNumber(m_aCurToken
.aText
) )
2136 return DoError(SmParseError::ColorExpected
);
2137 ng
= m_aCurToken
.aText
.toUInt32();
2138 if( ng
> 255 )return DoError(SmParseError::ColorExpected
);
2139 NextTokenFontSize();
2140 if( lcl_IsNotWholeNumber(m_aCurToken
.aText
) )
2141 return DoError(SmParseError::ColorExpected
);
2142 nb
= m_aCurToken
.aText
.toUInt32();
2143 if( nb
> 255 )return DoError(SmParseError::ColorExpected
);
2144 NextTokenFontSize();
2145 if( lcl_IsNotWholeNumber(m_aCurToken
.aText
) )
2146 return DoError(SmParseError::ColorExpected
);
2147 na
= m_aCurToken
.aText
.toUInt32();
2148 if( na
> 255 )return DoError(SmParseError::ColorExpected
);
2149 nc
= nb
| ng
<< 8 | nr
<< 16 | na
<< 24;
2150 aToken
.aText
= OUString::number(nc
, 16);
2152 else if( m_aCurToken
.eType
== THEX
) //loads hex code
2155 NextTokenFontSize();
2156 if( lcl_IsNotWholeNumber16(m_aCurToken
.aText
) )
2157 return DoError(SmParseError::ColorExpected
);
2158 nc
= m_aCurToken
.aText
.toUInt32(16);
2159 aToken
.aText
= OUString::number(nc
, 16);
2163 else return DoError(SmParseError::ColorExpected
);
2165 std::unique_ptr
<SmStructureNode
> xNode
;
2166 xNode
.reset(new SmFontNode(aToken
));
2170 std::unique_ptr
<SmStructureNode
> SmParser::DoFont()
2172 DepthProtect
aDepthGuard(m_nParseDepth
);
2173 if (aDepthGuard
.TooDeep())
2174 throw std::range_error("parser depth limit");
2176 assert(m_aCurToken
.eType
== TFONT
);
2178 std::unique_ptr
<SmStructureNode
> xNode
;
2179 // last font rules, get that one
2184 if (TokenInGroup(TG::Font
))
2185 { aToken
= m_aCurToken
;
2190 return DoError(SmParseError::FontExpected
);
2192 } while (m_aCurToken
.eType
== TFONT
);
2194 xNode
.reset(new SmFontNode(aToken
));
2198 std::unique_ptr
<SmStructureNode
> SmParser::DoFontSize()
2200 DepthProtect
aDepthGuard(m_nParseDepth
);
2201 if (aDepthGuard
.TooDeep()) throw std::range_error("parser depth limit");
2202 std::unique_ptr
<SmFontNode
> pFontNode(new SmFontNode(m_aCurToken
));
2203 NextTokenFontSize();
2206 switch (m_aCurToken
.eType
)
2208 case THEX
: Type
= FontSizeType::ABSOLUT
; break;
2209 case TPLUS
: Type
= FontSizeType::PLUS
; break;
2210 case TMINUS
: Type
= FontSizeType::MINUS
; break;
2211 case TMULTIPLY
: Type
= FontSizeType::MULTIPLY
; break;
2212 case TDIVIDEBY
: Type
= FontSizeType::DIVIDE
; break;
2215 return DoError(SmParseError::SizeExpected
);
2218 if (Type
!= FontSizeType::ABSOLUT
)
2220 NextTokenFontSize();
2221 if (m_aCurToken
.eType
!= THEX
) return DoError(SmParseError::SizeExpected
);
2224 // get number argument
2225 Fraction
aValue( 1 );
2226 if (lcl_IsNumber( m_aCurToken
.aText
))
2228 aValue
= m_aCurToken
.aText
.toDouble();
2229 //!! Reduce values in order to avoid numerical errors
2230 if (aValue
.GetDenominator() > 1000)
2232 tools::Long nNum
= aValue
.GetNumerator();
2233 tools::Long nDenom
= aValue
.GetDenominator();
2234 while ( nDenom
> 1000 ) //remove big denominator
2239 aValue
= Fraction( nNum
, nDenom
);
2242 else return DoError(SmParseError::SizeExpected
);
2244 pFontNode
->SetSizeParameter(aValue
, Type
);
2249 std::unique_ptr
<SmStructureNode
> SmParser::DoBrace()
2251 DepthProtect
aDepthGuard(m_nParseDepth
);
2252 if (aDepthGuard
.TooDeep())
2253 throw std::range_error("parser depth limit");
2255 assert(m_aCurToken
.eType
== TLEFT
|| TokenInGroup(TG::LBrace
));
2257 std::unique_ptr
<SmStructureNode
> xSNode(new SmBraceNode(m_aCurToken
));
2258 std::unique_ptr
<SmNode
> pBody
, pLeft
, pRight
;
2259 SmScaleMode eScaleMode
= SmScaleMode::None
;
2260 SmParseError eError
= SmParseError::None
;
2262 if (m_aCurToken
.eType
== TLEFT
)
2265 eScaleMode
= SmScaleMode::Height
;
2267 // check for left bracket
2268 if (TokenInGroup(TG::LBrace
) || TokenInGroup(TG::RBrace
))
2270 pLeft
.reset(new SmMathSymbolNode(m_aCurToken
));
2273 pBody
= DoBracebody(true);
2275 if (m_aCurToken
.eType
== TRIGHT
)
2278 // check for right bracket
2279 if (TokenInGroup(TG::LBrace
) || TokenInGroup(TG::RBrace
))
2281 pRight
.reset(new SmMathSymbolNode(m_aCurToken
));
2285 eError
= SmParseError::RbraceExpected
;
2288 eError
= SmParseError::RightExpected
;
2291 eError
= SmParseError::LbraceExpected
;
2295 assert(TokenInGroup(TG::LBrace
));
2297 pLeft
.reset(new SmMathSymbolNode(m_aCurToken
));
2300 pBody
= DoBracebody(false);
2302 SmTokenType eExpectedType
= TUNKNOWN
;
2303 switch (pLeft
->GetToken().eType
)
2304 { case TLPARENT
: eExpectedType
= TRPARENT
; break;
2305 case TLBRACKET
: eExpectedType
= TRBRACKET
; break;
2306 case TLBRACE
: eExpectedType
= TRBRACE
; break;
2307 case TLDBRACKET
: eExpectedType
= TRDBRACKET
; break;
2308 case TLLINE
: eExpectedType
= TRLINE
; break;
2309 case TLDLINE
: eExpectedType
= TRDLINE
; break;
2310 case TLANGLE
: eExpectedType
= TRANGLE
; break;
2311 case TLFLOOR
: eExpectedType
= TRFLOOR
; break;
2312 case TLCEIL
: eExpectedType
= TRCEIL
; break;
2313 case TLRLINE
: eExpectedType
= TLRLINE
; break;
2314 case TLRDLINE
: eExpectedType
= TLRDLINE
; break;
2316 SAL_WARN("starmath", "unknown case");
2319 if (m_aCurToken
.eType
== eExpectedType
)
2321 pRight
.reset(new SmMathSymbolNode(m_aCurToken
));
2325 eError
= SmParseError::ParentMismatch
;
2328 if (eError
== SmParseError::None
)
2332 xSNode
->SetSubNodes(std::move(pLeft
), std::move(pBody
), std::move(pRight
));
2333 xSNode
->SetScaleMode(eScaleMode
);
2336 return DoError(eError
);
2339 std::unique_ptr
<SmBracebodyNode
> SmParser::DoBracebody(bool bIsLeftRight
)
2341 DepthProtect
aDepthGuard(m_nParseDepth
);
2342 if (aDepthGuard
.TooDeep())
2343 throw std::range_error("parser depth limit");
2345 auto pBody
= std::make_unique
<SmBracebodyNode
>(m_aCurToken
);
2347 std::vector
<std::unique_ptr
<SmNode
>> aNodes
;
2353 if (m_aCurToken
.eType
== TMLINE
)
2355 aNodes
.emplace_back(std::make_unique
<SmMathSymbolNode
>(m_aCurToken
));
2358 else if (m_aCurToken
.eType
!= TRIGHT
)
2360 aNodes
.push_back(DoAlign());
2361 if (m_aCurToken
.eType
!= TMLINE
&& m_aCurToken
.eType
!= TRIGHT
)
2362 aNodes
.emplace_back(DoError(SmParseError::RightExpected
));
2364 } while (m_aCurToken
.eType
!= TEND
&& m_aCurToken
.eType
!= TRIGHT
);
2370 if (m_aCurToken
.eType
== TMLINE
)
2372 aNodes
.emplace_back(std::make_unique
<SmMathSymbolNode
>(m_aCurToken
));
2375 else if (!TokenInGroup(TG::RBrace
))
2377 aNodes
.push_back(DoAlign());
2378 if (m_aCurToken
.eType
!= TMLINE
&& !TokenInGroup(TG::RBrace
))
2379 aNodes
.emplace_back(DoError(SmParseError::RbraceExpected
));
2381 } while (m_aCurToken
.eType
!= TEND
&& !TokenInGroup(TG::RBrace
));
2384 pBody
->SetSubNodes(buildNodeArray(aNodes
));
2385 pBody
->SetScaleMode(bIsLeftRight
? SmScaleMode::Height
: SmScaleMode::None
);
2389 std::unique_ptr
<SmNode
> SmParser::DoEvaluate()
2392 // Checkout depth and create node
2393 DepthProtect
aDepthGuard(m_nParseDepth
);
2394 if (aDepthGuard
.TooDeep()) throw std::range_error("parser depth limit");
2395 std::unique_ptr
<SmStructureNode
> xSNode(new SmBraceNode(m_aCurToken
));
2396 SmToken
aToken( TRLINE
, MS_VERTLINE
, "evaluate", TG::RBrace
, 5);
2397 aToken
.nRow
= m_aCurToken
.nRow
;
2398 aToken
.nCol
= m_aCurToken
.nCol
;
2400 // Parse body && left none
2402 std::unique_ptr
<SmNode
> pBody
= DoPower();
2403 SmToken
bToken( TNONE
, '\0', "", TG::LBrace
, 5);
2404 std::unique_ptr
<SmNode
> pLeft
;
2405 pLeft
.reset(new SmMathSymbolNode(bToken
));
2408 std::unique_ptr
<SmNode
> pRight
;
2409 pRight
.reset(new SmMathSymbolNode(aToken
));
2410 xSNode
->SetSubNodes(std::move(pLeft
), std::move(pBody
), std::move(pRight
));
2411 xSNode
->SetScaleMode(SmScaleMode::Height
); // scalable line
2414 if ( m_aCurToken
.nGroup
== TG::Limit
)
2416 std::unique_ptr
<SmNode
> rSNode
;
2417 rSNode
= DoSubSupEvaluate(xSNode
.release());
2418 rSNode
->GetToken().eType
= TEVALUATE
;
2426 std::unique_ptr
<SmTextNode
> SmParser::DoFunction()
2428 DepthProtect
aDepthGuard(m_nParseDepth
);
2429 if (aDepthGuard
.TooDeep()) throw std::range_error("parser depth limit");
2430 if( m_aCurToken
.eType
== TFUNC
)
2432 NextToken(); // skip "FUNC"-statement
2433 m_aCurToken
.eType
= TFUNC
;
2434 m_aCurToken
.nGroup
= TG::Function
;
2436 auto pNode
= std::make_unique
<SmTextNode
>(m_aCurToken
, FNT_FUNCTION
);
2441 std::unique_ptr
<SmTableNode
> SmParser::DoBinom()
2443 DepthProtect
aDepthGuard(m_nParseDepth
);
2444 if (aDepthGuard
.TooDeep())
2445 throw std::range_error("parser depth limit");
2447 auto xSNode
= std::make_unique
<SmTableNode
>(m_aCurToken
);
2451 auto xFirst
= DoSum();
2452 auto xSecond
= DoSum();
2453 xSNode
->SetSubNodes(std::move(xFirst
), std::move(xSecond
));
2457 std::unique_ptr
<SmBinVerNode
> SmParser::DoFrac()
2459 DepthProtect
aDepthGuard(m_nParseDepth
);
2460 if (aDepthGuard
.TooDeep()) throw std::range_error("parser depth limit");
2462 std::unique_ptr
<SmBinVerNode
> xSNode
= std::make_unique
<SmBinVerNode
>(m_aCurToken
);
2463 std::unique_ptr
<SmNode
> xOper
= std::make_unique
<SmRectangleNode
>(m_aCurToken
);
2467 auto xFirst
= DoSum();
2468 auto xSecond
= DoSum();
2469 xSNode
->SetSubNodes(std::move(xFirst
), std::move(xOper
), std::move(xSecond
));
2473 std::unique_ptr
<SmStructureNode
> SmParser::DoStack()
2475 DepthProtect
aDepthGuard(m_nParseDepth
);
2476 if (aDepthGuard
.TooDeep())
2477 throw std::range_error("parser depth limit");
2479 std::unique_ptr
<SmStructureNode
> xSNode(new SmTableNode(m_aCurToken
));
2481 if (m_aCurToken
.eType
!= TLGROUP
)
2482 return DoError(SmParseError::LgroupExpected
);
2483 std::vector
<std::unique_ptr
<SmNode
>> aExprArr
;
2487 aExprArr
.push_back(DoAlign());
2489 while (m_aCurToken
.eType
== TPOUND
);
2491 if (m_aCurToken
.eType
== TRGROUP
)
2494 aExprArr
.emplace_back(DoError(SmParseError::RgroupExpected
));
2496 xSNode
->SetSubNodes(buildNodeArray(aExprArr
));
2500 std::unique_ptr
<SmStructureNode
> SmParser::DoMatrix()
2502 DepthProtect
aDepthGuard(m_nParseDepth
);
2503 if (aDepthGuard
.TooDeep())
2504 throw std::range_error("parser depth limit");
2506 std::unique_ptr
<SmMatrixNode
> xMNode(new SmMatrixNode(m_aCurToken
));
2508 if (m_aCurToken
.eType
!= TLGROUP
)
2509 return DoError(SmParseError::LgroupExpected
);
2511 std::vector
<std::unique_ptr
<SmNode
>> aExprArr
;
2515 aExprArr
.push_back(DoAlign());
2517 while (m_aCurToken
.eType
== TPOUND
);
2519 size_t nCol
= aExprArr
.size();
2521 while (m_aCurToken
.eType
== TDPOUND
)
2524 for (size_t i
= 0; i
< nCol
; i
++)
2526 auto xNode
= DoAlign();
2529 if (m_aCurToken
.eType
== TPOUND
)
2532 xNode
= DoError(SmParseError::PoundExpected
);
2534 aExprArr
.emplace_back(std::move(xNode
));
2539 if (m_aCurToken
.eType
== TRGROUP
)
2543 std::unique_ptr
<SmNode
> xENode(DoError(SmParseError::RgroupExpected
));
2544 if (aExprArr
.empty())
2547 aExprArr
.pop_back();
2548 aExprArr
.emplace_back(std::move(xENode
));
2551 xMNode
->SetSubNodes(buildNodeArray(aExprArr
));
2552 xMNode
->SetRowCol(static_cast<sal_uInt16
>(nRow
),
2553 static_cast<sal_uInt16
>(nCol
));
2554 return std::unique_ptr
<SmStructureNode
>(xMNode
.release());
2557 std::unique_ptr
<SmSpecialNode
> SmParser::DoSpecial()
2559 DepthProtect
aDepthGuard(m_nParseDepth
);
2560 if (aDepthGuard
.TooDeep())
2561 throw std::range_error("parser depth limit");
2563 bool bReplace
= false;
2564 OUString
&rName
= m_aCurToken
.aText
;
2567 // conversion of symbol names for 6.0 (XML) file format
2568 // (name change on import / export.
2569 // UI uses localized names XML file format does not.)
2570 if( rName
.startsWith("%") )
2572 if (IsImportSymbolNames())
2574 aNewName
= SmLocalizedSymbolData::GetUiSymbolName(rName
.copy(1));
2577 else if (IsExportSymbolNames())
2579 aNewName
= SmLocalizedSymbolData::GetExportSymbolName(rName
.copy(1));
2583 if (!aNewName
.isEmpty())
2584 aNewName
= "%" + aNewName
;
2587 if (bReplace
&& !aNewName
.isEmpty() && rName
!= aNewName
)
2589 Replace(GetTokenIndex(), rName
.getLength(), aNewName
);
2593 // add symbol name to list of used symbols
2594 const OUString
aSymbolName(m_aCurToken
.aText
.copy(1));
2595 if (!aSymbolName
.isEmpty())
2596 m_aUsedSymbols
.insert( aSymbolName
);
2598 auto pNode
= std::make_unique
<SmSpecialNode
>(m_aCurToken
);
2603 std::unique_ptr
<SmGlyphSpecialNode
> SmParser::DoGlyphSpecial()
2605 DepthProtect
aDepthGuard(m_nParseDepth
);
2606 if (aDepthGuard
.TooDeep())
2607 throw std::range_error("parser depth limit");
2609 auto pNode
= std::make_unique
<SmGlyphSpecialNode
>(m_aCurToken
);
2614 std::unique_ptr
<SmExpressionNode
> SmParser::DoError(SmParseError eError
)
2616 DepthProtect
aDepthGuard(m_nParseDepth
);
2617 if (aDepthGuard
.TooDeep())
2618 throw std::range_error("parser depth limit");
2620 auto xSNode
= std::make_unique
<SmExpressionNode
>(m_aCurToken
);
2621 std::unique_ptr
<SmErrorNode
> pErr(new SmErrorNode(m_aCurToken
));
2622 xSNode
->SetSubNodes(std::move(pErr
), nullptr);
2624 AddError(eError
, xSNode
.get());
2634 SmParser::SmParser()
2636 , m_nBufferIndex( 0 )
2637 , m_nTokenIndex( 0 )
2640 , m_bImportSymNames( false )
2641 , m_bExportSymNames( false )
2643 , m_aNumCC( LanguageTag( LANGUAGE_ENGLISH_US
) )
2644 , m_pSysCC( SM_MOD()->GetSysLocale().GetCharClassPtr() )
2648 std::unique_ptr
<SmTableNode
> SmParser::Parse(const OUString
&rBuffer
)
2650 m_aUsedSymbols
.clear();
2652 m_aBufferString
= convertLineEnd(rBuffer
, LINEEND_LF
);
2659 m_aErrDescList
.clear();
2665 std::unique_ptr
<SmNode
> SmParser::ParseExpression(const OUString
&rBuffer
)
2667 m_aBufferString
= convertLineEnd(rBuffer
, LINEEND_LF
);
2674 m_aErrDescList
.clear();
2677 return DoExpression();
2681 void SmParser::AddError(SmParseError Type
, SmNode
*pNode
)
2683 std::unique_ptr
<SmErrorDesc
> pErrDesc(new SmErrorDesc
);
2685 pErrDesc
->m_eType
= Type
;
2686 pErrDesc
->m_pNode
= pNode
;
2687 pErrDesc
->m_aText
= SmResId(RID_ERR_IDENT
);
2692 case SmParseError::UnexpectedChar
: pRID
= RID_ERR_UNEXPECTEDCHARACTER
; break;
2693 case SmParseError::UnexpectedToken
: pRID
= RID_ERR_UNEXPECTEDTOKEN
; break;
2694 case SmParseError::PoundExpected
: pRID
= RID_ERR_POUNDEXPECTED
; break;
2695 case SmParseError::ColorExpected
: pRID
= RID_ERR_COLOREXPECTED
; break;
2696 case SmParseError::LgroupExpected
: pRID
= RID_ERR_LGROUPEXPECTED
; break;
2697 case SmParseError::RgroupExpected
: pRID
= RID_ERR_RGROUPEXPECTED
; break;
2698 case SmParseError::LbraceExpected
: pRID
= RID_ERR_LBRACEEXPECTED
; break;
2699 case SmParseError::RbraceExpected
: pRID
= RID_ERR_RBRACEEXPECTED
; break;
2700 case SmParseError::ParentMismatch
: pRID
= RID_ERR_PARENTMISMATCH
; break;
2701 case SmParseError::RightExpected
: pRID
= RID_ERR_RIGHTEXPECTED
; break;
2702 case SmParseError::FontExpected
: pRID
= RID_ERR_FONTEXPECTED
; break;
2703 case SmParseError::SizeExpected
: pRID
= RID_ERR_SIZEEXPECTED
; break;
2704 case SmParseError::DoubleAlign
: pRID
= RID_ERR_DOUBLEALIGN
; break;
2705 case SmParseError::DoubleSubsupscript
: pRID
= RID_ERR_DOUBLESUBSUPSCRIPT
; break;
2706 case SmParseError::NumberExpected
: pRID
= RID_ERR_NUMBEREXPECTED
; break;
2711 pErrDesc
->m_aText
+= SmResId(pRID
);
2713 m_aErrDescList
.push_back(std::move(pErrDesc
));
2717 const SmErrorDesc
*SmParser::NextError()
2719 if ( !m_aErrDescList
.empty() )
2720 if (m_nCurError
> 0) return m_aErrDescList
[ --m_nCurError
].get();
2724 return m_aErrDescList
[ m_nCurError
].get();
2726 else return nullptr;
2730 const SmErrorDesc
*SmParser::PrevError()
2732 if ( !m_aErrDescList
.empty() )
2733 if (m_nCurError
< static_cast<int>(m_aErrDescList
.size() - 1)) return m_aErrDescList
[ ++m_nCurError
].get();
2736 m_nCurError
= static_cast<int>(m_aErrDescList
.size() - 1);
2737 return m_aErrDescList
[ m_nCurError
].get();
2739 else return nullptr;
2743 const SmErrorDesc
*SmParser::GetError()
2745 if ( !m_aErrDescList
.empty() )
2746 return m_aErrDescList
.front().get();
2750 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */