Bump version to 6.4-15
[LibreOffice.git] / hwpfilter / source / hwpeq.cxx
blobd3bd4620746965618f7dd08b2cfe0598819b404c
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <assert.h>
21 #include <stdio.h>
22 #include <string.h>
24 // DVO: always use standard headers:
25 #include <istream>
26 #include <sstream>
27 using namespace std;
29 #include "mzstring.h"
30 #include "hwpeq.h"
31 #include <sal/types.h>
32 #include <sal/macros.h>
34 #include <rtl/character.hxx>
36 /* @Man: change the hwp formula to LaTeX */
37 #ifdef _WIN32
38 # define ENDL "\r\n"
39 #else /* !_WIN32 */
40 # define ENDL "\n"
41 #endif
43 #define EQ_CASE 0x01 // case sensitive cmd
44 #define EQ_ENV 0x02 // equiv to latex environment
45 #define EQ_ATOP 0x04 // must revert order
47 static bool IS_WS(std::istream::int_type ch) {
48 return ch != std::istream::traits_type::eof()
49 && rtl::isAsciiWhiteSpace(
50 static_cast<unsigned char>(
51 std::istream::traits_type::to_char_type(ch)));
54 static bool IS_BINARY(std::istream::int_type ch) {
55 return ch != std::istream::traits_type::eof()
56 && strchr("+-<=>", std::istream::traits_type::to_char_type(ch));
59 #ifdef _WIN32
60 #define STRICMP stricmp
61 #else
62 #define STRICMP strcasecmp
63 #endif
65 // sub and sup script status
66 enum { SCRIPT_NONE, SCRIPT_SUB, SCRIPT_SUP, SCRIPT_ALL};
68 static int eq_word(MzString& outs, istream *strm, int script = SCRIPT_NONE);
69 static bool eq_sentence(MzString& outs, istream *strm, const char *end = nullptr);
71 struct hwpeq {
72 const char *key; // hwp math keyword
73 const char *latex; // corresponding latex keyword
74 int nargs; // # of argument
75 unsigned char flag; // case sensitive?
78 static const hwpeq eq_tbl[] = {
79 { "!=", "\\equiv ", 0, 0 },
80 { "#", "\\\\", 0, 0 },
81 { "+-", "\\pm ", 0, 0 },
82 { "-+", "\\mp ", 0, 0 },
83 { "<=", "\\leq ", 0, 0 },
84 { "==", "\\equiv ", 0, 0 },
85 { ">=", "\\geq ", 0, 0 },
86 { "Pr", nullptr, 0, 0 },
87 { "^", "^", 1, 0 },
88 { "_", "_", 1, 0 },
89 { "`", "\\;", 0, 0 },
90 { "acute", nullptr, 1, 0 },
91 { "aleph", nullptr, 0, 0 },
92 { "alpha", nullptr, 0, EQ_CASE },
93 { "amalg", nullptr, 0, 0 },
94 { "and", nullptr, 0, 0 },
95 { "angle", nullptr, 0, 0 },
96 { "angstrom", nullptr, 0, 0 },
97 { "approx", nullptr, 0, 0 },
98 { "arc", nullptr, 0, 0 },
99 { "arccos", nullptr, 0, 0 },
100 { "arch", nullptr, 0, 0 },
101 { "arcsin", nullptr, 0, 0 },
102 { "arctan", nullptr, 0, 0 },
103 { "arg", nullptr, 0, 0 },
104 { "assert", "\\vdash", 0, 0 },
105 { "ast", nullptr, 0, 0 },
106 { "asymp", nullptr, 0, 0 },
107 { "atop", nullptr, 1, EQ_ATOP },
108 { "backslash", nullptr, 0, 0 },
109 { "bar", nullptr, 1, 0 },
110 { "because", nullptr, 0, 0 },
111 { "beta", nullptr, 0, EQ_CASE },
112 { "big", nullptr, 0, EQ_CASE },
113 { "bigcap", nullptr, 0, 0 },
114 { "bigcirc", nullptr, 0, 0 },
115 { "bigcup", nullptr, 0, 0 },
116 { "bigg", nullptr, 0, EQ_CASE },
117 { "bigodiv", nullptr, 0, 0 },
118 { "bigodot", nullptr, 0, 0 },
119 { "bigominus", nullptr, 0, 0 },
120 { "bigoplus", nullptr, 0, 0 },
121 { "bigotimes", nullptr, 0, 0 },
122 { "bigsqcap", nullptr, 0, 0 },
123 { "bigsqcup", nullptr, 0, 0 },
124 { "biguplus", nullptr, 0, 0 },
125 { "bigvee", nullptr, 0, 0 },
126 { "bigwedge", nullptr, 0, 0 },
127 { "binom", nullptr, 2, 0 },
128 { "bmatrix", nullptr, 0, EQ_ENV },
129 { "bold", nullptr, 0, 0 },
130 { "bot", nullptr, 0, 0 },
131 { "breve", nullptr, 1, 0 },
132 { "buildrel", nullptr, 0, 0 }, // LATER
133 { "bullet", nullptr, 0, 0 },
134 { "cap", nullptr, 0, 0 },
135 { "cases", nullptr, 0, EQ_ENV },
136 { "ccol", nullptr, 0, 0 }, /* Center vertically */
137 { "cdot", nullptr, 0, 0 },
138 { "cdots", nullptr, 0, 0 },
139 { "check", nullptr, 1, 0 },
140 { "chi", nullptr, 0, EQ_CASE },
141 { "choose", nullptr, 0, EQ_ATOP },
142 { "circ", nullptr, 0, 0 },
143 { "col", nullptr, 0, 0 }, // LATER
144 { "cong", nullptr, 0, 0 },
145 { "coprod", nullptr, 0, 0 },
146 { "cos", nullptr, 0, 0 },
147 { "cosec", nullptr, 0, 0 },
148 { "cosh", nullptr, 0, 0 },
149 { "cot", nullptr, 0, 0 },
150 { "coth", nullptr, 0, 0 },
151 { "cpile", nullptr, 0, 0 }, // LATER
152 { "csc", nullptr, 0, 0 },
153 { "cup", nullptr, 0, 0 },
154 { "dagger", nullptr, 0, 0 },
155 { "dashv", nullptr, 0, 0 },
156 { "ddagger", nullptr, 0, 0 },
157 { "ddot", nullptr, 1, 0 },
158 { "ddots", nullptr, 0, 0 },
159 { "def", nullptr, 0, 0 },
160 { "deg", nullptr, 0, 0 },
161 { "del", nullptr, 0, 0 },
162 { "delta", nullptr, 0, EQ_CASE },
163 { "diamond", nullptr, 0, 0 },
164 { "dim", nullptr, 0, 0 },
165 { "div", nullptr, 0, 0 },
166 { "divide", nullptr, 0, 0 },
167 { "dline", nullptr, 0, 0 },
168 { "dmatrix", nullptr, 0, EQ_ENV },
169 { "dot", nullptr, 1, 0 },
170 { "doteq", nullptr, 0, 0 },
171 { "dotsaxis", nullptr, 0, 0 },
172 { "dotsdiag", nullptr, 0, 0 },
173 { "dotslow", "\\ldots", 0, 0 },
174 { "dotsvert", "\\vdots", 0, 0 },
175 { "downarrow", nullptr, 0, EQ_CASE },
176 { "dsum", "+", 0, 0 },
177 { "dyad", nullptr, 0, 0 }, // LATER
178 { "ell", nullptr, 0, 0 },
179 { "emptyset", nullptr, 0, 0 },
180 { "epsilon", nullptr, 0, EQ_CASE },
181 { "eqalign", nullptr, 0, EQ_ENV },
182 { "equiv", nullptr, 0, 0 },
183 { "eta", nullptr, 0, EQ_CASE },
184 { "exarrow", nullptr, 0, 0 },
185 { "exist", "\\exists", 0, 0 },
186 { "exists", nullptr, 0, 0 },
187 { "exp", nullptr, 0, EQ_CASE },
188 { "for", nullptr, 0, 0 },
189 { "forall", nullptr, 0, 0 },
190 { "from", "_", 1, 0 },
191 { "gamma", nullptr, 0, EQ_CASE },
192 { "gcd", nullptr, 0, 0 },
193 { "ge", "\\geq", 0, 0 },
194 { "geq", nullptr, 0, 0 },
195 { "ggg", nullptr, 0, 0 },
196 { "grad", nullptr, 0, 0 },
197 { "grave", nullptr, 1, 0 },
198 { "hat", "\\widehat", 1, 0 },
199 { "hbar", nullptr, 0, 0 },
200 { "hom", nullptr, 0, 0 },
201 { "hookleft", nullptr, 0, 0 },
202 { "hookright", nullptr, 0, 0 },
203 { "identical", nullptr, 0, 0 }, // LATER
204 { "if", nullptr, 0, 0 },
205 { "imag", nullptr, 0, 0 },
206 { "image", nullptr, 0, 0 },
207 { "imath", nullptr, 0, 0 },
208 { "in", nullptr, 0, 0 },
209 { "inf", "\\infty", 0, 0 },
210 { "infinity", "\\infty", 0, 0 },
211 { "infty", nullptr, 0, 0 },
212 { "int", nullptr, 0, 0 },
213 { "integral", "\\int", 0, 0 },
214 { "inter", "\\bigcap", 0, 0 },
215 { "iota", nullptr, 0, EQ_CASE },
216 { "iso", nullptr, 0, 0 }, // ams
217 { "it", nullptr, 0, 0 },
218 { "jmath", nullptr, 0, 0 },
219 { "kappa", nullptr, 0, EQ_CASE },
220 { "ker", nullptr, 0, 0 },
221 { "lambda", nullptr, 0, EQ_CASE },
222 { "land", nullptr, 0, 0 }, // LATER
223 { "langle", nullptr, 0, 0 },
224 { "larrow", "\\leftarrow", 0, EQ_CASE },
225 { "lbrace", nullptr, 0, 0 },
226 { "lbrack", "[", 0, 0 },
227 { "lceil", nullptr, 0, 0 },
228 { "lcol", nullptr, 0, 0 }, // LATER
229 { "ldots", nullptr, 0, 0 },
230 { "le", nullptr, 0, 0 },
231 { "left", nullptr, 0, 0 },
232 { "leftarrow", nullptr, 0, EQ_CASE },
233 { "leq", nullptr, 0, 0 },
234 { "lfloor", nullptr, 0, 0 },
235 { "lg", nullptr, 0, 0 },
236 { "lim", nullptr, 0, EQ_CASE },
237 { "line", "\\vert", 0, 0 },
238 { "liter", "\\ell", 0, 0 },
239 { "lll", nullptr, 0, 0 }, // ams
240 { "ln", nullptr, 0, 0 },
241 { "log", nullptr, 0, 0 },
242 { "lor", "\\vee", 0, 0 },
243 { "lparen", "(", 0, 0 },
244 { "lpile", nullptr, 0, 0 }, // LATER
245 { "lrarrow", "\\leftrightarrow", 0, EQ_CASE },
246 { "lrharpoons", "\\leftrightharpoons",0, 0 },
247 { "mapsto", nullptr, 0, 0 },
248 { "massert", "\\dashv", 0, 0 },
249 { "matrix", nullptr, 0, EQ_ENV },
250 { "max", nullptr, 0, 0 },
251 { "mho", nullptr, 0, 0 }, // ams
252 { "min", nullptr, 0, 0 },
253 { "minusplus", nullptr, 0, 0 },
254 { "mit", "", 0, 0 }, // font
255 { "mod", "\\bmod", 0, 0 },
256 { "models", nullptr, 0, 0 },
257 { "msangle", nullptr, 0, 0 }, // LATER
258 { "mu", nullptr, 0, EQ_CASE },
259 { "nabla", nullptr, 0, 0 },
260 { "ne", nullptr, 0, 0 },
261 { "nearrow", nullptr, 0, 0 },
262 { "neg", nullptr, 0, 0 },
263 { "neq", nullptr, 0, 0 },
264 { "nequiv", nullptr, 0, 0 },
265 { "ni", nullptr, 0, 0 },
266 { "not", nullptr, 0, 0 },
267 { "notin", nullptr, 0, 0 },
268 { "nu", nullptr, 0, EQ_CASE },
269 { "nwarrow", nullptr, 0, 0 },
270 { "odiv", nullptr, 0, 0 },
271 { "odot", nullptr, 0, 0 },
272 { "oint", nullptr, 0, 0 },
273 { "omega", nullptr, 0, EQ_CASE },
274 { "omicron", nullptr, 0, EQ_CASE },
275 { "ominus", nullptr, 0, 0 },
276 { "oplus", nullptr, 0, 0 },
277 { "or ", nullptr, 0, 0 },
278 { "oslash", nullptr, 0, 0 },
279 { "otimes", nullptr, 0, 0 },
280 { "over", nullptr, 1, EQ_ATOP },
281 { "overline", nullptr, 1, 0 },
282 { "owns", "\\ni", 0, 0 },
283 { "parallel", nullptr, 0, 0 },
284 { "partial", nullptr, 0, 0 },
285 { "phantom", nullptr, 0, 0 },
286 { "phi", nullptr, 0, EQ_CASE },
287 { "pi", nullptr, 0, EQ_CASE },
288 { "pile", nullptr, 0, 0 }, // LATER
289 { "plusminus", "\\pm", 0, 0 },
290 { "pmatrix", nullptr, 0, EQ_ENV },
291 { "prec", nullptr, 0, 0 },
292 { "prep", nullptr, 0, 0 },
293 { "prime", nullptr, 0, 0 },
294 { "prod", nullptr, 0, 0 },
295 { "propto", nullptr, 0, 0 },
296 { "psi", nullptr, 0, EQ_CASE },
297 { "rangle", nullptr, 0, 0 },
298 { "rarrow", "\\rightarrow", 0, EQ_CASE },
299 { "rbrace", "]", 0, 0 },
300 { "rbrace", nullptr, 0, 0 },
301 { "rceil", nullptr, 0, 0 },
302 { "rcol", nullptr, 0, 0 }, // LATER
303 { "real", "\\Re", 0, 0 },
304 { "reimage", nullptr, 0, 0 },
305 { "rel", nullptr, 0, 0 },
306 { "rfloor", nullptr, 0, 0 },
307 { "rho", nullptr, 0, EQ_CASE },
308 { "right", nullptr, 0, 0 },
309 { "rightarrow", nullptr, 0, EQ_CASE },
310 { "rlharpoons", nullptr, 0, 0 },
311 { "rm", nullptr, 0, 0 },
312 { "root", "\\sqrt", 1, 0 },
313 { "rparen", ")", 0, 0 },
314 { "rpile", nullptr, 0, 0 }, // LATER
315 { "rtangle", nullptr, 0, 0 },
316 { "sangle", nullptr, 0, 0 },
317 { "scale", nullptr, 0, 0 },
318 { "searrow", nullptr, 0, 0 },
319 { "sec", nullptr, 0, 0 },
320 { "sigma", nullptr, 0, EQ_CASE },
321 { "sim", nullptr, 0, 0 },
322 { "simeq", nullptr, 0, 0 },
323 { "sin", nullptr, 0, 0 },
324 { "sinh", nullptr, 0, 0 },
325 { "slash", nullptr, 0, 0 },
326 { "smallint", nullptr, 0, 0 },
327 { "smallinter", nullptr, 0, 0 },
328 { "smalloint", nullptr, 0, 0 },
329 { "smallprod", nullptr, 0, 0 },
330 { "smallsum", nullptr, 0, 0 },
331 { "smallunion", nullptr, 0, 0 },
332 { "smcoprod", nullptr, 0, 0 },
333 { "sqcap", nullptr, 0, 0 },
334 { "sqcup", nullptr, 0, 0 },
335 { "sqrt", nullptr, 1, 0 },
336 { "sqsubset", nullptr, 0, 0 },
337 { "sqsubseteq", nullptr, 0, 0 },
338 { "sqsupset", nullptr, 0, 0 },
339 { "sqsupseteq", nullptr, 0, 0 },
340 { "star", nullptr, 0, 0 },
341 { "sub", "_", 0, 0 },
342 { "subset", nullptr, 0, 0 },
343 { "subseteq", nullptr, 0, 0 },
344 { "succ", nullptr, 0, 0 },
345 { "sum", nullptr, 0, 0 },
346 { "sup", "^", 0, 0 },
347 { "superset", nullptr, 0, 0 },
348 { "supset", nullptr, 0, 0 },
349 { "supseteq", nullptr, 0, 0 },
350 { "swarrow", nullptr, 0, 0 },
351 { "tan", nullptr, 0, 0 },
352 { "tanh", nullptr, 0, 0 },
353 { "tau", nullptr, 0, EQ_CASE },
354 { "therefore", nullptr, 0, 0 },
355 { "theta", nullptr, 0, EQ_CASE },
356 { "tilde", "\\widetilde", 1, 0 },
357 { "times", nullptr, 0, 0 },
358 { "to", "^", 1, 0 },
359 { "top", nullptr, 0, 0 },
360 { "triangle", nullptr, 0, 0 },
361 { "triangled", nullptr, 0, 0 },
362 { "trianglel", nullptr, 0, 0 },
363 { "triangler", nullptr, 0, 0 },
364 { "triangleu", nullptr, 0, 0 },
365 { "udarrow", "\\updownarrow",0, EQ_CASE },
366 { "under", "\\underline", 1, 0 },
367 { "underline", "\\underline", 1, 0 },
368 { "union", "\\bigcup", 0, 0 },
369 { "uparrow", nullptr, 0, EQ_CASE },
370 { "uplus", nullptr, 0, 0 },
371 { "upsilon", nullptr, 0, EQ_CASE },
372 { "varepsilon", nullptr, 0, 0 },
373 { "varphi", nullptr, 0, 0 },
374 { "varpi", nullptr, 0, 0 },
375 { "varrho", nullptr, 0, 0 },
376 { "varsigma", nullptr, 0, 0 },
377 { "vartheta", nullptr, 0, 0 },
378 { "varupsilon", nullptr, 0, 0 },
379 { "vdash", nullptr, 0, 0 },
380 { "vdots", nullptr, 0, 0 },
381 { "vec", nullptr, 1, 0 },
382 { "vee", nullptr, 0, 0 },
383 { "vert", nullptr, 0, 0 },
384 { "wedge", nullptr, 0, 0 },
385 { "wp", nullptr, 0, 0 },
386 { "xi", nullptr, 0, EQ_CASE },
387 { "xor", nullptr, 0, 0 },
388 { "zeta", nullptr, 0, EQ_CASE }
391 static const hwpeq *lookup_eqn(char const *str)
393 static const int eqCount = SAL_N_ELEMENTS(eq_tbl);
394 int l = 0, r = eqCount;
395 const hwpeq *result = nullptr;
397 while( l < r ) {
398 const int m = (l + r) / 2;
399 const int k = strcmp(eq_tbl[m].key, str);
400 if( k == 0 ) {
401 result = eq_tbl + m;
402 break;
404 else if( k < 0 )
405 l = m + 1;
406 else
407 r = m;
409 return result;
412 /* If only the first character is uppercase or all characters are uppercase, change to lowercase */
413 static void make_keyword( char *keyword, const char *token)
415 char* ptr;
416 bool result = true;
417 int len = strlen(token);
418 assert(keyword);
420 if( 255 < len )
422 len = 255;
424 memcpy(keyword, token, len);
425 keyword[len] = 0;
427 if( (token[0] & 0x80) || rtl::isAsciiLowerCase(static_cast<unsigned char>(token[0])) || strlen(token) < 2 )
428 return;
430 bool capital = rtl::isAsciiUpperCase(
431 static_cast<unsigned char>(keyword[1]));
432 for( ptr = keyword + 2; *ptr && result; ptr++ )
434 if( (*ptr & 0x80) ||
435 (!capital && rtl::isAsciiUpperCase(static_cast<unsigned char>(*ptr))) ||
436 (capital && rtl::isAsciiLowerCase(static_cast<unsigned char>(*ptr))) )
438 result = false;
442 if( result )
444 ptr = keyword;
445 while( *ptr )
447 if( rtl::isAsciiUpperCase(static_cast<unsigned char>(*ptr)) )
448 *ptr = sal::static_int_cast<char>(
449 rtl::toAsciiLowerCase(static_cast<unsigned char>(*ptr)));
450 ptr++;
455 // token reading function
456 struct eq_stack {
457 MzString white;
458 MzString token;
459 istream *strm;
461 eq_stack() { strm = nullptr; };
462 bool state(istream const *s) {
463 if( strm != s) { white = nullptr; token = nullptr; }
464 return token.length() != 0;
468 static eq_stack *stk = nullptr;
470 static void push_token(MzString const &white, MzString const &token, istream *strm)
472 // one time stack
473 assert(stk->token.length() == 0);
475 stk->white = white;
476 stk->token = token;
477 stk->strm = strm;
481 * It returns the length of the read tokens.
483 * control char, control sequence, binary sequence,
484 * alphabet string, single character */
485 static int next_token(MzString &white, MzString &token, istream *strm)
487 std::istream::int_type ch = 0;
489 if( stk->state(strm) ) {
490 white = stk->white;
491 token = stk->token;
492 stk->token = nullptr;
493 stk->white = nullptr;
494 return token.length();
497 token = nullptr;
498 white = nullptr;
499 if( !strm->good() || (ch = strm->get()) == std::istream::traits_type::eof() )
500 return 0;
502 // read preceding ws
503 if( IS_WS(ch) ) {
504 do white << static_cast<char>(ch);
505 while( IS_WS(ch = strm->get()) );
508 if( ch == '\\' || ch & 0x80
509 || (ch != std::istream::traits_type::eof() && rtl::isAsciiAlpha(ch)) )
511 if( ch == '\\' ) {
512 token << static_cast<char>(ch);
513 ch = strm->get();
515 do {
516 token << static_cast<char>(ch);
517 ch = strm->get();
518 } while( ch != std::istream::traits_type::eof()
519 && (ch & 0x80 || rtl::isAsciiAlpha(ch)) ) ;
520 strm->putback(static_cast<char>(ch));
521 /* special treatment of sub, sub, over, atop
522 The reason for this is that affect next_state().
524 if( !STRICMP("sub", token) || !STRICMP("from", token) ||
525 !STRICMP("sup", token) || !STRICMP("to", token) ||
526 !STRICMP("over", token) || !STRICMP("atop", token) ||
527 !STRICMP("left", token) || !STRICMP("right", token) )
529 char buf[256];
530 make_keyword(buf, token);
531 token = buf;
533 if( !token.compare("sub") || !token.compare("from") )
534 token = "_";
535 if( !token.compare("sup") || !token.compare("to") )
536 token = "^";
538 else if( IS_BINARY(ch) ) {
539 do token << static_cast<char>(ch);
540 while( IS_BINARY(ch = strm->get()) );
541 strm->putback(static_cast<char>(ch));
543 else if( ch != std::istream::traits_type::eof() && rtl::isAsciiDigit(ch) ) {
544 do {
545 token << static_cast<char>(ch);
546 ch = strm->get();
547 } while( ch != std::istream::traits_type::eof() && rtl::isAsciiDigit(ch) );
548 strm->putback(static_cast<char>(ch));
550 else
551 token << static_cast<char>(ch);
553 return token.length();
556 static std::istream::int_type read_white_space(MzString& outs, istream *strm)
558 std::istream::int_type result;
560 if( stk->state(strm) ) {
561 outs << stk->white;
562 stk->white = nullptr;
563 result = std::istream::traits_type::to_int_type(stk->token[0]);
565 else {
566 std::istream::int_type ch;
567 while( IS_WS(ch = strm->get()) )
568 outs << static_cast<char>(ch);
569 strm->putback(static_cast<char>(ch));
570 result = ch;
572 return result;
575 /* If the argument is not required, delimiters are space and brace for each items.
576 sqrt {ab} c = sqrt {ab} c
577 (,} are for grouping
578 ^ ,_ are for combination
580 Sorting of formats with arguments, such as sqrt
581 sqrt a -> sqrt {a}
582 sqrt {a} -> sqrt {a}
583 If there is more than one argument, it eliminates backslash between arguments.
584 \frac a b -> frac {a} {b}
585 Clean the form of over
586 a over b -> {a} over {b}
589 static int eq_word(MzString& outs, istream *strm, int status)
591 MzString token, white, state;
592 int result;
593 char keyword[256];
594 const hwpeq *eq;
596 next_token(white, token, strm);
597 if (token.length() <= 0)
598 return 0;
599 result = token[0];
601 if( token.compare("{") == 0 ) {
602 state << white << token;
603 eq_sentence(state, strm, "}");
605 else if( token.compare("left") == 0 ) {
606 state << white << token;
607 next_token(white, token, strm);
608 state << white << token;
610 eq_sentence(state, strm, "right");
612 next_token(white, token, strm);
613 state << white << token;
615 else {
616 /* Normal token */
617 int script_status = SCRIPT_NONE;
618 while( true ) {
619 state << white << token;
620 make_keyword(keyword, token);
621 if( token[0] == '^' )
622 script_status |= SCRIPT_SUP;
623 else if( token[0] == '_' )
624 script_status |= SCRIPT_SUB;
625 else
626 script_status = SCRIPT_NONE;
628 if( nullptr != (eq = lookup_eqn(keyword)) ) {
629 int nargs = eq->nargs;
630 while( nargs-- ) {
631 const std::istream::int_type ch = read_white_space(state, strm);
632 if( ch != '{' ) state << '{';
633 eq_word(state, strm, script_status);
634 if( ch != '{' ) state << '}';
638 if( !next_token(white, token, strm) )
639 break;
640 // end loop and restart with this
641 if( (token[0] == '^' && status && !(status & SCRIPT_SUP)) ||
642 (token[0] == '_' && status && !(status & SCRIPT_SUB)) ||
643 strcmp("over", token) == 0 || strcmp("atop", token) == 0 ||
644 strchr("{}#&`", token[0]) ||
645 (!strchr("^_", token[0]) && white.length()) )
647 push_token(white, token, strm);
648 break;
652 outs << state;
654 return result;
657 static bool eq_sentence(MzString& outs, istream *strm, const char *end)
659 MzString state;
660 MzString white, token;
661 bool multiline = false;
663 read_white_space(outs, strm);
664 while( eq_word(state, strm) ) {
665 if( !next_token(white, token, strm) ||
666 (end && strcmp(token.c_str(), end) == 0) )
668 state << white << token;
669 break;
671 push_token(white, token, strm);
672 if( !token.compare("atop") || !token.compare("over") )
673 outs << '{' << state << '}';
674 else {
675 if( !token.compare("#") )
676 multiline = true;
677 outs << state;
679 state = nullptr;
680 read_white_space(outs, strm);
682 outs << state;
683 return multiline;
686 static char eq2ltxconv(MzString& sstr, istream *strm, const char *sentinel)
688 MzString white, token;
689 char key[256];
690 std::istream::int_type ch;
691 int result;
693 while( 0 != (result = next_token(white, token, strm)) ) {
694 if( sentinel && (result == 1) && strchr(sentinel, token[0]) )
695 break;
696 make_keyword(key, token);
697 const hwpeq *eq = nullptr;
698 if( (eq = lookup_eqn(key)) != nullptr ) {
699 if( eq->latex )
700 strcpy(key, eq->latex);
701 else {
702 key[0] = '\\';
703 strcpy(key + 1, eq->key);
705 if( (eq->flag & EQ_CASE)
706 && rtl::isAsciiUpperCase(static_cast<unsigned char>(token[0])) )
707 key[1] = sal::static_int_cast<char>(
708 rtl::toAsciiUpperCase(static_cast<unsigned char>(key[1])));
709 token = key;
712 if( token[0] == '{' ) { // grouping
713 sstr << white << token;
714 eq2ltxconv(sstr, strm, "}");
715 sstr << '}';
717 else if( eq && (eq->flag & EQ_ENV) ) {
718 next_token(white, token, strm);
719 if( token[0] != '{' )
720 return 0;
721 sstr << "\\begin" << "{" << eq->key << "}" << ENDL ;
722 eq2ltxconv(sstr, strm, "}");
723 if( sstr[sstr.length() - 1] != '\n' )
724 sstr << ENDL ;
725 sstr << "\\end" << "{" << eq->key << "}" << ENDL ;
727 else if( eq && (eq->flag & EQ_ATOP) ) {
728 if( sstr.length() == 0 )
729 sstr << '{';
730 else {
731 int pos = sstr.rfind('}');
732 if( 0 < pos)
733 sstr.replace(pos, ' ');
735 sstr << token;
736 while( (ch = strm->get()) != std::istream::traits_type::eof()
737 && IS_WS(ch) )
738 sstr << static_cast<char>(ch);
739 if( ch != '{' )
740 sstr << "{}";
741 else {
742 eq2ltxconv(sstr, strm, "}");
743 sstr << '}';
746 else
747 sstr << white << token;
749 return token[0];
752 void eq2latex(MzString& outs, char const *s)
754 assert(s);
755 if( stk == nullptr )
756 stk = new eq_stack;
758 MzString tstr;
760 istringstream tstrm(s);
761 bool eqnarray = eq_sentence(tstr, &tstrm);
762 istringstream strm(tstr.c_str());
764 if( eqnarray )
765 outs << "\\begin{array}{rllll}" << ENDL;
766 eq2ltxconv(outs, &strm, nullptr);
767 outs << ENDL;
768 if( eqnarray )
769 outs << "\\end{array}" << ENDL;
770 delete stk;
771 stk = nullptr;
774 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */