hwpfilter/source/hwpeq.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <assert.h>
  21 #include <stdio.h>
  22 #include <string.h>
  23
  24 // DVO: always use standard headers:
  25 #include <istream>
  26 #include <sstream>
  27 using namespace std;
  28
  29 #include "mzstring.h"
  30 #include "hwpeq.h"
  31 #include <sal/types.h>
  32 #include <sal/macros.h>
  33
  34 #include <rtl/character.hxx>
  35
  36 /* @Man: change the hwp formula to LaTeX */
  37 #ifdef _WIN32
  38 # define ENDL  "\r\n"
  39 #else /* !_WIN32 */
  40 # define ENDL  "\n"
  41 #endif
  42
  43 #define EQ_CASE 0x01    // case sensitive cmd
  44 #define EQ_ENV  0x02    // equiv to latex environment
  45 #define EQ_ATOP 0x04    // must revert order
  46
  47 static bool IS_WS(std::istream::int_type ch) {
  48     return ch != std::istream::traits_type::eof()
  49         && rtl::isAsciiWhiteSpace(
  50             static_cast<unsigned char>(
  51                 std::istream::traits_type::to_char_type(ch)));
  52 }
  53
  54 static bool IS_BINARY(std::istream::int_type ch) {
  55     return ch != std::istream::traits_type::eof()
  56         && strchr("+-<=>", std::istream::traits_type::to_char_type(ch));
  57 }
  58
  59 #ifdef _WIN32
  60 #define STRICMP stricmp
  61 #else
  62 #define STRICMP strcasecmp
  63 #endif
  64
  65 // sub and sup script status
  66 enum { SCRIPT_NONE, SCRIPT_SUB, SCRIPT_SUP, SCRIPT_ALL};
  67
  68 static int  eq_word(MzString& outs, istream *strm, int script = SCRIPT_NONE);
  69 static bool eq_sentence(MzString& outs, istream *strm, const char *end = nullptr);
  70
  71 namespace {
  72
  73 struct hwpeq {
  74   const char    *key;       // hwp math keyword
  75   const char    *latex;     // corresponding latex keyword
  76   int           nargs;      // # of argument
  77   unsigned char flag;       // case sensitive?
  78 };
  79
  80 }
  81
  82 const hwpeq eq_tbl[] = {
  83   { "!=",         "\\equiv ", 0,  0   },
  84   { "#",          "\\\\",     0,  0   },
  85   { "+-",         "\\pm ",    0,  0   },
  86   { "-+",         "\\mp ",    0,  0   },
  87   { "<=",         "\\leq ",   0,  0   },
  88   { "==",         "\\equiv ", 0,  0   },
  89   { ">=",         "\\geq ",   0,  0   },
  90   { "Pr",         nullptr,       0,  0   },
  91   { "^",          "^",        1,  0   },
  92   { "_",          "_",        1,  0   },
  93   { "`",          "\\;",      0,  0   },
  94   { "acute",      nullptr,       1,  0   },
  95   { "aleph",      nullptr,       0,  0   },
  96   { "alpha",      nullptr,       0,  EQ_CASE },
  97   { "amalg",      nullptr,       0,  0   },
  98   { "and",        nullptr,       0,  0   },
  99   { "angle",      nullptr,       0,  0   },
 100   { "angstrom",   nullptr,       0,  0   },
 101   { "approx",     nullptr,       0,  0   },
 102   { "arc",        nullptr,       0,  0   },
 103   { "arccos",     nullptr,       0,  0   },
 104   { "arch",       nullptr,       0,  0   },
 105   { "arcsin",     nullptr,       0,  0   },
 106   { "arctan",     nullptr,       0,  0   },
 107   { "arg",        nullptr,       0,  0   },
 108   { "assert",     "\\vdash",  0,  0   },
 109   { "ast",        nullptr,       0,  0   },
 110   { "asymp",      nullptr,       0,  0   },
 111   { "atop",       nullptr,       1,  EQ_ATOP },
 112   { "backslash",  nullptr,       0,  0   },
 113   { "bar",        nullptr,       1,  0   },
 114   { "because",    nullptr,       0,  0   },
 115   { "beta",       nullptr,       0,  EQ_CASE },
 116   { "big",        nullptr,       0,  EQ_CASE },
 117   { "bigcap",     nullptr,       0,  0   },
 118   { "bigcirc",    nullptr,       0,  0   },
 119   { "bigcup",     nullptr,       0,  0   },
 120   { "bigg",       nullptr,       0,  EQ_CASE },
 121   { "bigodiv",    nullptr,       0,  0   },
 122   { "bigodot",    nullptr,       0,  0   },
 123   { "bigominus",  nullptr,       0,  0   },
 124   { "bigoplus",   nullptr,       0,  0   },
 125   { "bigotimes",  nullptr,       0,  0   },
 126   { "bigsqcap",   nullptr,       0,  0   },
 127   { "bigsqcup",   nullptr,       0,  0   },
 128   { "biguplus",   nullptr,       0,  0   },
 129   { "bigvee",     nullptr,       0,  0   },
 130   { "bigwedge",   nullptr,       0,  0   },
 131   { "binom",      nullptr,       2,  0   },
 132   { "bmatrix",    nullptr,       0,  EQ_ENV  },
 133   { "bold",       nullptr,       0,  0   },
 134   { "bot",        nullptr,       0,  0   },
 135   { "breve",      nullptr,       1,  0   },
 136   { "buildrel",   nullptr,       0,  0   }, // LATER
 137   { "bullet",     nullptr,       0,  0   },
 138   { "cap",        nullptr,       0,  0   },
 139   { "cases",      nullptr,       0,  EQ_ENV  },
 140   { "ccol",       nullptr,       0,  0   }, /* Center vertically */
 141   { "cdot",       nullptr,       0,  0   },
 142   { "cdots",      nullptr,       0,  0   },
 143   { "check",      nullptr,       1,  0   },
 144   { "chi",        nullptr,       0,  EQ_CASE },
 145   { "choose",     nullptr,       0,  EQ_ATOP },
 146   { "circ",       nullptr,       0,  0   },
 147   { "col",        nullptr,       0,  0   }, // LATER
 148   { "cong",       nullptr,       0,  0   },
 149   { "coprod",     nullptr,       0,  0   },
 150   { "cos",        nullptr,       0,  0   },
 151   { "cosec",      nullptr,       0,  0   },
 152   { "cosh",       nullptr,       0,  0   },
 153   { "cot",        nullptr,       0,  0   },
 154   { "coth",       nullptr,       0,  0   },
 155   { "cpile",      nullptr,       0,  0   }, // LATER
 156   { "csc",        nullptr,       0,  0   },
 157   { "cup",        nullptr,       0,  0   },
 158   { "dagger",     nullptr,       0,  0   },
 159   { "dashv",      nullptr,       0,  0   },
 160   { "ddagger",    nullptr,       0,  0   },
 161   { "ddot",       nullptr,       1,  0   },
 162   { "ddots",      nullptr,       0,  0   },
 163   { "def",        nullptr,       0,  0   },
 164   { "deg",        nullptr,       0,  0   },
 165   { "del",        nullptr,       0,  0   },
 166   { "delta",      nullptr,       0,  EQ_CASE },
 167   { "diamond",    nullptr,       0,  0   },
 168   { "dim",        nullptr,       0,  0   },
 169   { "div",        nullptr,       0,  0   },
 170   { "divide",     nullptr,       0,  0   },
 171   { "dline",      nullptr,       0,  0   },
 172   { "dmatrix",    nullptr,       0,  EQ_ENV  },
 173   { "dot",        nullptr,       1,  0   },
 174   { "doteq",      nullptr,       0,  0   },
 175   { "dotsaxis",   nullptr,       0,  0   },
 176   { "dotsdiag",   nullptr,       0,  0   },
 177   { "dotslow",    "\\ldots",  0,  0   },
 178   { "dotsvert",   "\\vdots",  0,  0   },
 179   { "downarrow",  nullptr,       0,  EQ_CASE },
 180   { "dsum",       "+",        0,  0   },
 181   { "dyad",       nullptr,       0,  0   }, // LATER
 182   { "ell",        nullptr,       0,  0   },
 183   { "emptyset",   nullptr,       0,  0   },
 184   { "epsilon",    nullptr,       0,  EQ_CASE },
 185   { "eqalign",    nullptr,       0,  EQ_ENV  },
 186   { "equiv",      nullptr,       0,  0   },
 187   { "eta",        nullptr,       0,  EQ_CASE },
 188   { "exarrow",    nullptr,       0,  0   },
 189   { "exist",      "\\exists", 0,  0   },
 190   { "exists",     nullptr,       0,  0   },
 191   { "exp",        nullptr,       0,  EQ_CASE },
 192   { "for",        nullptr,       0,  0   },
 193   { "forall",     nullptr,       0,  0   },
 194   { "from",       "_",        1,  0   },
 195   { "gamma",      nullptr,       0,  EQ_CASE },
 196   { "gcd",        nullptr,       0,  0   },
 197   { "ge",         "\\geq",    0,  0   },
 198   { "geq",        nullptr,       0,  0   },
 199   { "ggg",        nullptr,       0,  0   },
 200   { "grad",       nullptr,       0,  0   },
 201   { "grave",      nullptr,       1,  0   },
 202   { "hat",        "\\widehat",    1,  0   },
 203   { "hbar",       nullptr,       0,  0   },
 204   { "hom",        nullptr,       0,  0   },
 205   { "hookleft",   nullptr,       0,  0   },
 206   { "hookright",  nullptr,       0,  0   },
 207   { "identical",  nullptr,       0,  0   }, // LATER
 208   { "if",         nullptr,       0,  0   },
 209   { "imag",       nullptr,       0,  0   },
 210   { "image",      nullptr,       0,  0   },
 211   { "imath",      nullptr,       0,  0   },
 212   { "in",         nullptr,       0,  0   },
 213   { "inf",        "\\infty",  0,  0   },
 214   { "infinity",   "\\infty",  0,  0   },
 215   { "infty",      nullptr,       0,  0   },
 216   { "int",        nullptr,       0,  0   },
 217   { "integral",   "\\int",    0,  0   },
 218   { "inter",      "\\bigcap", 0,  0   },
 219   { "iota",       nullptr,       0,  EQ_CASE },
 220   { "iso",        nullptr,       0,  0   }, // ams
 221   { "it",         nullptr,       0,  0   },
 222   { "jmath",      nullptr,       0,  0   },
 223   { "kappa",      nullptr,       0,  EQ_CASE },
 224   { "ker",        nullptr,       0,  0   },
 225   { "lambda",     nullptr,       0,  EQ_CASE },
 226   { "land",       nullptr,       0,  0   }, // LATER
 227   { "langle",     nullptr,       0,  0   },
 228   { "larrow",     "\\leftarrow",  0,  EQ_CASE },
 229   { "lbrace",     nullptr,       0,  0   },
 230   { "lbrack",     "[",        0,  0   },
 231   { "lceil",      nullptr,       0,  0   },
 232   { "lcol",       nullptr,       0,  0   }, // LATER
 233   { "ldots",      nullptr,       0,  0   },
 234   { "le",         nullptr,       0,  0   },
 235   { "left",       nullptr,       0,  0   },
 236   { "leftarrow",  nullptr,       0,  EQ_CASE },
 237   { "leq",        nullptr,       0,  0   },
 238   { "lfloor",     nullptr,       0,  0   },
 239   { "lg",         nullptr,       0,  0   },
 240   { "lim",        nullptr,       0,  EQ_CASE },
 241   { "line",       "\\vert",   0,  0   },
 242   { "liter",      "\\ell",    0,  0   },
 243   { "lll",        nullptr,       0,  0   }, // ams
 244   { "ln",         nullptr,       0,  0   },
 245   { "log",        nullptr,       0,  0   },
 246   { "lor",        "\\vee",    0,  0   },
 247   { "lparen",     "(",        0,  0   },
 248   { "lpile",      nullptr,       0,  0   }, // LATER
 249   { "lrarrow",    "\\leftrightarrow",   0,  EQ_CASE },
 250   { "lrharpoons", "\\leftrightharpoons",0,  0   },
 251   { "mapsto",     nullptr,       0,  0   },
 252   { "massert",    "\\dashv",  0,  0   },
 253   { "matrix",     nullptr,       0,  EQ_ENV  },
 254   { "max",        nullptr,       0,  0   },
 255   { "mho",        nullptr,       0,  0   }, // ams
 256   { "min",        nullptr,       0,  0   },
 257   { "minusplus",  nullptr,       0,  0   },
 258   { "mit",        "",     0,  0   }, // font
 259   { "mod",        "\\bmod",   0,  0   },
 260   { "models",     nullptr,       0,  0   },
 261   { "msangle",    nullptr,       0,  0   }, // LATER
 262   { "mu",         nullptr,       0,  EQ_CASE },
 263   { "nabla",      nullptr,       0,  0   },
 264   { "ne",         nullptr,       0,  0   },
 265   { "nearrow",    nullptr,       0,  0   },
 266   { "neg",        nullptr,       0,  0   },
 267   { "neq",        nullptr,       0,  0   },
 268   { "nequiv",     nullptr,       0,  0   },
 269   { "ni",         nullptr,       0,  0   },
 270   { "not",        nullptr,       0,  0   },
 271   { "notin",      nullptr,       0,  0   },
 272   { "nu",         nullptr,       0,  EQ_CASE },
 273   { "nwarrow",    nullptr,       0,  0   },
 274   { "odiv",       nullptr,       0,  0   },
 275   { "odot",       nullptr,       0,  0   },
 276   { "oint",       nullptr,       0,  0   },
 277   { "omega",      nullptr,       0,  EQ_CASE },
 278   { "omicron",    nullptr,       0,  EQ_CASE },
 279   { "ominus",     nullptr,       0,  0   },
 280   { "oplus",      nullptr,       0,  0   },
 281   { "or ",        nullptr,       0,  0   },
 282   { "oslash",     nullptr,       0,  0   },
 283   { "otimes",     nullptr,       0,  0   },
 284   { "over",       nullptr,       1,  EQ_ATOP },
 285   { "overline",   nullptr,       1,  0   },
 286   { "owns",       "\\ni",     0,  0   },
 287   { "parallel",   nullptr,       0,  0   },
 288   { "partial",    nullptr,       0,  0   },
 289   { "phantom",    nullptr,       0,  0   },
 290   { "phi",        nullptr,       0,  EQ_CASE },
 291   { "pi",         nullptr,       0,  EQ_CASE },
 292   { "pile",       nullptr,       0,  0   }, // LATER
 293   { "plusminus",  "\\pm",     0,  0   },
 294   { "pmatrix",    nullptr,       0,  EQ_ENV  },
 295   { "prec",       nullptr,       0,  0   },
 296   { "prep",       nullptr,       0,  0   },
 297   { "prime",      nullptr,       0,  0   },
 298   { "prod",       nullptr,       0,  0   },
 299   { "propto",     nullptr,       0,  0   },
 300   { "psi",        nullptr,       0,  EQ_CASE },
 301   { "rangle",     nullptr,       0,  0   },
 302   { "rarrow",     "\\rightarrow", 0,  EQ_CASE },
 303   { "rbrace",     "]",        0,  0   },
 304   { "rbrace",     nullptr,       0,  0   },
 305   { "rceil",      nullptr,       0,  0   },
 306   { "rcol",       nullptr,       0,  0   }, // LATER
 307   { "real",       "\\Re",     0,  0   },
 308   { "reimage",    nullptr,       0,  0   },
 309   { "rel",        nullptr,       0,  0   },
 310   { "rfloor",     nullptr,       0,  0   },
 311   { "rho",        nullptr,       0,  EQ_CASE },
 312   { "right",      nullptr,       0,  0   },
 313   { "rightarrow", nullptr,       0,  EQ_CASE },
 314   { "rlharpoons", nullptr,       0,  0   },
 315   { "rm",         nullptr,       0,  0   },
 316   { "root",       "\\sqrt",   1,  0   },
 317   { "rparen",     ")",        0,  0   },
 318   { "rpile",      nullptr,       0,  0   }, // LATER
 319   { "rtangle",    nullptr,       0,  0   },
 320   { "sangle",     nullptr,       0,  0   },
 321   { "scale",      nullptr,       0,  0   },
 322   { "searrow",    nullptr,       0,  0   },
 323   { "sec",        nullptr,       0,  0   },
 324   { "sigma",      nullptr,       0,  EQ_CASE },
 325   { "sim",        nullptr,       0,  0   },
 326   { "simeq",      nullptr,       0,  0   },
 327   { "sin",        nullptr,       0,  0   },
 328   { "sinh",       nullptr,       0,  0   },
 329   { "slash",      nullptr,       0,  0   },
 330   { "smallint",   nullptr,       0,  0   },
 331   { "smallinter", nullptr,       0,  0   },
 332   { "smalloint",  nullptr,       0,  0   },
 333   { "smallprod",  nullptr,       0,  0   },
 334   { "smallsum",   nullptr,       0,  0   },
 335   { "smallunion", nullptr,       0,  0   },
 336   { "smcoprod",   nullptr,       0,  0   },
 337   { "sqcap",      nullptr,       0,  0   },
 338   { "sqcup",      nullptr,       0,  0   },
 339   { "sqrt",       nullptr,       1,  0   },
 340   { "sqsubset",   nullptr,       0,  0   },
 341   { "sqsubseteq", nullptr,       0,  0   },
 342   { "sqsupset",   nullptr,       0,  0   },
 343   { "sqsupseteq", nullptr,       0,  0   },
 344   { "star",       nullptr,       0,  0   },
 345   { "sub",        "_",        0,  0   },
 346   { "subset",     nullptr,       0,  0   },
 347   { "subseteq",   nullptr,       0,  0   },
 348   { "succ",       nullptr,       0,  0   },
 349   { "sum",        nullptr,       0,  0   },
 350   { "sup",        "^",        0,  0   },
 351   { "superset",   nullptr,       0,  0   },
 352   { "supset",     nullptr,       0,  0   },
 353   { "supseteq",   nullptr,       0,  0   },
 354   { "swarrow",    nullptr,       0,  0   },
 355   { "tan",        nullptr,       0,  0   },
 356   { "tanh",       nullptr,       0,  0   },
 357   { "tau",        nullptr,       0,  EQ_CASE },
 358   { "therefore",  nullptr,       0,  0   },
 359   { "theta",      nullptr,       0,  EQ_CASE },
 360   { "tilde",      "\\widetilde",  1,  0   },
 361   { "times",      nullptr,       0,  0   },
 362   { "to",         "^",        1,  0   },
 363   { "top",        nullptr,       0,  0   },
 364   { "triangle",   nullptr,       0,  0   },
 365   { "triangled",  nullptr,       0,  0   },
 366   { "trianglel",  nullptr,       0,  0   },
 367   { "triangler",  nullptr,       0,  0   },
 368   { "triangleu",  nullptr,       0,  0   },
 369   { "udarrow",    "\\updownarrow",0,  EQ_CASE },
 370   { "under",      "\\underline",  1,  0   },
 371   { "underline",  "\\underline",  1,  0   },
 372   { "union",      "\\bigcup", 0,  0   },
 373   { "uparrow",    nullptr,       0,  EQ_CASE },
 374   { "uplus",      nullptr,       0,  0   },
 375   { "upsilon",    nullptr,       0,  EQ_CASE },
 376   { "varepsilon", nullptr,       0,  0   },
 377   { "varphi",     nullptr,       0,  0   },
 378   { "varpi",      nullptr,       0,  0   },
 379   { "varrho",     nullptr,       0,  0   },
 380   { "varsigma",   nullptr,       0,  0   },
 381   { "vartheta",   nullptr,       0,  0   },
 382   { "varupsilon", nullptr,       0,  0   },
 383   { "vdash",      nullptr,       0,  0   },
 384   { "vdots",      nullptr,       0,  0   },
 385   { "vec",        nullptr,       1,  0   },
 386   { "vee",        nullptr,       0,  0   },
 387   { "vert",       nullptr,       0,  0   },
 388   { "wedge",      nullptr,       0,  0   },
 389   { "wp",         nullptr,       0,  0   },
 390   { "xi",         nullptr,       0,  EQ_CASE },
 391   { "xor",        nullptr,       0,  0   },
 392   { "zeta",       nullptr,       0,  EQ_CASE }
 393 };
 394
 395 static const hwpeq *lookup_eqn(char const *str)
 396 {
 397   static const int eqCount = SAL_N_ELEMENTS(eq_tbl);
 398   int l = 0, r = eqCount;
 399   const hwpeq *result = nullptr;
 400
 401   while( l < r ) {
 402     const int m = (l + r) / 2;
 403     const int k = strcmp(eq_tbl[m].key, str);
 404     if( k == 0 ) {
 405       result = eq_tbl + m;
 406       break;
 407     }
 408     else if( k < 0 )
 409       l = m + 1;
 410     else
 411       r = m;
 412   }
 413   return result;
 414 }
 415
 416 /* If only the first character is uppercase or all characters are uppercase, change to lowercase */
 417 static void make_keyword( char *keyword, const char *token)
 418 {
 419     char* ptr;
 420     bool result = true;
 421     int len = strlen(token);
 422     assert(keyword);
 423
 424     if( 255 < len )
 425     {
 426         len = 255;
 427     }
 428     memcpy(keyword, token, len);
 429     keyword[len] = 0;
 430
 431     if( (token[0] & 0x80) || rtl::isAsciiLowerCase(static_cast<unsigned char>(token[0])) || strlen(token) < 2 )
 432         return;
 433
 434     bool capital = rtl::isAsciiUpperCase(
 435         static_cast<unsigned char>(keyword[1]));
 436     for( ptr = keyword + 2; *ptr && result; ptr++ )
 437     {
 438         if( (*ptr & 0x80) ||
 439             (!capital && rtl::isAsciiUpperCase(static_cast<unsigned char>(*ptr))) ||
 440             (capital && rtl::isAsciiLowerCase(static_cast<unsigned char>(*ptr))) )
 441         {
 442             result = false;
 443         }
 444     }
 445
 446     if( result )
 447     {
 448         ptr = keyword;
 449         while( *ptr )
 450         {
 451             if( rtl::isAsciiUpperCase(static_cast<unsigned char>(*ptr)) )
 452                 *ptr = sal::static_int_cast<char>(
 453                     rtl::toAsciiLowerCase(static_cast<unsigned char>(*ptr)));
 454             ptr++;
 455         }
 456     }
 457 }
 458
 459 namespace {
 460
 461 // token reading function
 462 struct eq_stack {
 463   MzString  white;
 464   MzString  token;
 465   istream   *strm;
 466
 467   eq_stack() { strm = nullptr; };
 468   bool state(istream const *s) {
 469     if( strm != s) { white = nullptr; token = nullptr; }
 470     return token.length() != 0;
 471   }
 472 };
 473
 474 }
 475
 476 static eq_stack *stk = nullptr;
 477
 478 static void push_token(MzString const &white, MzString const &token, istream *strm)
 479 {
 480   // one time stack
 481   assert(stk->token.length() == 0);
 482
 483   stk->white = white;
 484   stk->token = token;
 485   stk->strm = strm;
 486 }
 487
 488 /*
 489  * It returns the length of the read tokens.
 490  *
 491  * control char, control sequence, binary sequence,
 492  * alphabet string, single character */
 493 static int next_token(MzString &white, MzString &token, istream *strm)
 494 {
 495   std::istream::int_type ch = 0;
 496
 497   if( stk->state(strm) ) {
 498     white = stk->white;
 499     token = stk->token;
 500     stk->token = nullptr;
 501     stk->white = nullptr;
 502     return token.length();
 503   }
 504
 505   token = nullptr;
 506   white = nullptr;
 507   if( !strm->good() )
 508     return 0;
 509   ch = strm->get();
 510   if( ch == std::istream::traits_type::eof() )
 511     return 0;
 512
 513   // read preceding ws
 514   if( IS_WS(ch) ) {
 515     do
 516     {
 517         white << static_cast<char>(ch);
 518         ch = strm->get();
 519     } while (IS_WS(ch));
 520   }
 521
 522   if( ch == '\\' || ch & 0x80
 523       || (ch != std::istream::traits_type::eof() && rtl::isAsciiAlpha(ch)) )
 524   {
 525     if( ch == '\\' ) {
 526       token << static_cast<char>(ch);
 527       ch = strm->get();
 528     }
 529     do {
 530       token << static_cast<char>(ch);
 531       ch = strm->get();
 532     } while( ch != std::istream::traits_type::eof()
 533              && (ch & 0x80 || rtl::isAsciiAlpha(ch)) ) ;
 534     strm->putback(static_cast<char>(ch));
 535     /* special treatment of sub, sub, over, atop
 536        The reason for this is that affect next_state().
 537      */
 538     if( !STRICMP("sub", token) || !STRICMP("from", token) ||
 539     !STRICMP("sup", token) || !STRICMP("to", token) ||
 540     !STRICMP("over", token) || !STRICMP("atop", token) ||
 541     !STRICMP("left", token) || !STRICMP("right", token) )
 542     {
 543       char buf[256];
 544       make_keyword(buf, token);
 545       token = buf;
 546     }
 547     if( !token.compare("sub") || !token.compare("from") )
 548       token = "_";
 549     if( !token.compare("sup") || !token.compare("to") )
 550       token = "^";
 551   }
 552   else if( IS_BINARY(ch) ) {
 553     do
 554     {
 555         token << static_cast<char>(ch);
 556         ch = strm->get();
 557     }
 558     while( IS_BINARY(ch) );
 559     strm->putback(static_cast<char>(ch));
 560   }
 561   else if( ch != std::istream::traits_type::eof() && rtl::isAsciiDigit(ch) ) {
 562     do {
 563         token << static_cast<char>(ch);
 564         ch = strm->get();
 565     } while( ch != std::istream::traits_type::eof() && rtl::isAsciiDigit(ch) );
 566     strm->putback(static_cast<char>(ch));
 567   }
 568   else
 569     token << static_cast<char>(ch);
 570
 571   return token.length();
 572 }
 573
 574 static std::istream::int_type read_white_space(MzString& outs, istream *strm)
 575 {
 576   std::istream::int_type result;
 577
 578   if( stk->state(strm) ) {
 579     outs << stk->white;
 580     stk->white = nullptr;
 581     result = std::istream::traits_type::to_int_type(stk->token[0]);
 582   }
 583   else {
 584     std::istream::int_type ch;
 585     for (;;)
 586     {
 587         ch = strm->get();
 588         if (!IS_WS(ch))
 589             break;
 590         outs << static_cast<char>(ch);
 591     }
 592     strm->putback(static_cast<char>(ch));
 593     result = ch;
 594   }
 595   return result;
 596 }
 597
 598 /* If the argument is not required, delimiters are space and brace for each items.
 599    sqrt {ab} c = sqrt {ab} c
 600    (,} are for grouping
 601    ^ ,_ are for combination
 602
 603    Sorting of formats with arguments, such as sqrt
 604       sqrt a -> sqrt {a}
 605       sqrt {a} -> sqrt {a}
 606    If there is more than one argument, it eliminates backslash between arguments.
 607       \frac a b -> frac {a} {b}
 608    Clean the form of over
 609       a over b -> {a} over {b}
 610  */
 611
 612 static int eq_word(MzString& outs, istream *strm, int status)
 613 {
 614   MzString  token, white, state;
 615   int       result;
 616   char      keyword[256];
 617   const hwpeq *eq;
 618
 619   next_token(white, token, strm);
 620   if (token.length() <= 0)
 621       return 0;
 622   result = token[0];
 623
 624   if( token.compare("{") == 0 ) {
 625     state << white << token;
 626     eq_sentence(state, strm, "}");
 627   }
 628   else if( token.compare("left") == 0 ) {
 629     state << white << token;
 630     next_token(white, token, strm);
 631     state << white << token;
 632
 633     eq_sentence(state, strm, "right");
 634
 635     next_token(white, token, strm);
 636     state << white << token;
 637   }
 638   else {
 639     /* Normal token */
 640     int script_status = SCRIPT_NONE;
 641     while( true ) {
 642       state << white << token;
 643       make_keyword(keyword, token);
 644       if( token[0] == '^' )
 645         script_status |= SCRIPT_SUP;
 646       else if( token[0] == '_' )
 647         script_status |= SCRIPT_SUB;
 648       else
 649         script_status = SCRIPT_NONE;
 650
 651       if( nullptr != (eq = lookup_eqn(keyword)) ) {
 652         int nargs = eq->nargs;
 653         while( nargs-- ) {
 654           const std::istream::int_type ch = read_white_space(state, strm);
 655           if( ch != '{' ) state << '{';
 656           eq_word(state, strm, script_status);
 657           if( ch != '{' ) state << '}';
 658         }
 659       }
 660
 661       if( !next_token(white, token, strm) )
 662         break;
 663       // end loop and restart with this
 664       if( (token[0] == '^' && status && !(status & SCRIPT_SUP)) ||
 665           (token[0] == '_' && status && !(status & SCRIPT_SUB)) ||
 666           strcmp("over", token) == 0 || strcmp("atop", token) == 0 ||
 667           strchr("{}#&`", token[0]) ||
 668           (!strchr("^_", token[0]) && white.length()) )
 669       {
 670         push_token(white, token, strm);
 671         break;
 672       }
 673     }
 674   }
 675   outs << state;
 676
 677   return result;
 678 }
 679
 680 static bool eq_sentence(MzString& outs, istream *strm, const char *end)
 681 {
 682   MzString  state;
 683   MzString  white, token;
 684   bool      multiline = false;
 685
 686   read_white_space(outs, strm);
 687   while( eq_word(state, strm) ) {
 688     if( !next_token(white, token, strm) ||
 689     (end && strcmp(token.c_str(), end) == 0) )
 690     {
 691       state << white << token;
 692       break;
 693     }
 694     push_token(white, token, strm);
 695     if( !token.compare("atop") || !token.compare("over") )
 696       outs << '{' << state << '}';
 697     else {
 698       if( !token.compare("#") )
 699         multiline = true;
 700       outs << state;
 701     }
 702     state =  nullptr;
 703     read_white_space(outs, strm);
 704   }
 705   outs << state;
 706   return multiline;
 707 }
 708
 709 static char eq2ltxconv(MzString& sstr, istream *strm, const char *sentinel)
 710 {
 711   MzString  white, token;
 712   char      key[256];
 713   std::istream::int_type ch;
 714   int       result;
 715
 716   while( 0 != (result = next_token(white, token, strm)) ) {
 717     if( sentinel && (result == 1) && strchr(sentinel, token[0]) )
 718       break;
 719     make_keyword(key, token);
 720     const hwpeq *eq = nullptr;
 721     if( (eq = lookup_eqn(key)) != nullptr ) {
 722       if( eq->latex )
 723         strcpy(key, eq->latex);
 724       else {
 725         key[0] = '\\';
 726         strcpy(key + 1, eq->key);
 727       }
 728       if( (eq->flag & EQ_CASE)
 729           && rtl::isAsciiUpperCase(static_cast<unsigned char>(token[0])) )
 730         key[1] = sal::static_int_cast<char>(
 731             rtl::toAsciiUpperCase(static_cast<unsigned char>(key[1])));
 732       token = key;
 733     }
 734
 735     if( token[0] == '{' ) { // grouping
 736       sstr << white << token;
 737       eq2ltxconv(sstr, strm, "}");
 738       sstr << '}';
 739     }
 740     else if( eq && (eq->flag & EQ_ENV) ) {
 741       next_token(white, token, strm);
 742       if( token[0] != '{' )
 743         return 0;
 744       sstr << "\\begin" << "{" << eq->key << "}" << ENDL ;
 745       eq2ltxconv(sstr, strm, "}");
 746       if( sstr[sstr.length() - 1] != '\n' )
 747         sstr << ENDL ;
 748       sstr << "\\end" << "{" << eq->key << "}" << ENDL ;
 749     }
 750     else if( eq && (eq->flag & EQ_ATOP) ) {
 751       if( sstr.length() == 0 )
 752         sstr << '{';
 753       else {
 754         int pos  = sstr.rfind('}');
 755         if( 0 < pos)
 756           sstr.replace(pos, ' ');
 757       }
 758       sstr << token;
 759       for (;;)
 760       {
 761         ch = strm->get();
 762         if ( ch == std::istream::traits_type::eof() || !IS_WS(ch) )
 763             break;
 764         sstr << static_cast<char>(ch);
 765       }
 766       if( ch != '{' )
 767         sstr << "{}";
 768       else {
 769         eq2ltxconv(sstr, strm, "}");
 770         sstr << '}';
 771       }
 772     }
 773     else
 774       sstr << white << token;
 775   }
 776   return token[0];
 777 }
 778
 779 void eq2latex(MzString& outs, char const *s)
 780 {
 781   assert(s);
 782   if( stk == nullptr )
 783     stk = new eq_stack;
 784
 785   MzString  tstr;
 786
 787   istringstream tstrm(s);
 788   bool eqnarray = eq_sentence(tstr, &tstrm);
 789   istringstream strm(tstr.c_str());
 790
 791   if( eqnarray )
 792     outs << "\\begin{array}{rllll}" << ENDL;
 793   eq2ltxconv(outs, &strm, nullptr);
 794   outs << ENDL;
 795   if( eqnarray )
 796     outs << "\\end{array}" << ENDL;
 797   delete stk;
 798   stk = nullptr;
 799 }
 800
 801 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */