hwpfilter/source/hwpeq.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <assert.h>
  21 #include <stdio.h>
  22 #include <string.h>
  23
  24 // DVO: always use standard headers:
  25 #include <istream>
  26 #include <sstream>
  27 using namespace std;
  28
  29 #include "mzstring.h"
  30 #include "hwpeq.h"
  31 #include <sal/types.h>
  32 #include <sal/macros.h>
  33
  34 #include <rtl/character.hxx>
  35
  36 /* @Man: change the hwp formula to LaTeX */
  37 #ifdef _WIN32
  38 # define ENDL  "\r\n"
  39 #else /* !_WIN32 */
  40 # define ENDL  "\n"
  41 #endif
  42
  43 #define EQ_CASE 0x01    // case sensitive cmd
  44 #define EQ_ENV  0x02    // equiv to latex environment
  45 #define EQ_ATOP 0x04    // must revert order
  46
  47 static bool IS_WS(std::istream::int_type ch) {
  48     return ch != std::istream::traits_type::eof()
  49         && rtl::isAsciiWhiteSpace(
  50             static_cast<unsigned char>(
  51                 std::istream::traits_type::to_char_type(ch)));
  52 }
  53
  54 static bool IS_BINARY(std::istream::int_type ch) {
  55     return ch != std::istream::traits_type::eof()
  56         && strchr("+-<=>", std::istream::traits_type::to_char_type(ch));
  57 }
  58
  59 #ifdef _WIN32
  60 #define STRICMP stricmp
  61 #else
  62 #define STRICMP strcasecmp
  63 #endif
  64
  65 // sub and sup script status
  66 enum { SCRIPT_NONE, SCRIPT_SUB, SCRIPT_SUP, SCRIPT_ALL};
  67
  68 static int  eq_word(MzString& outs, istream *strm, int script = SCRIPT_NONE);
  69 static bool eq_sentence(MzString& outs, istream *strm, const char *end = nullptr);
  70
  71 struct hwpeq {
  72   const char    *key;       // hwp math keyword
  73   const char    *latex;     // corresponding latex keyword
  74   int           nargs;      // # of argument
  75   unsigned char flag;       // case sensitive?
  76 };
  77
  78 static const hwpeq eq_tbl[] = {
  79   { "!=",         "\\equiv ", 0,  0   },
  80   { "#",          "\\\\",     0,  0   },
  81   { "+-",         "\\pm ",    0,  0   },
  82   { "-+",         "\\mp ",    0,  0   },
  83   { "<=",         "\\leq ",   0,  0   },
  84   { "==",         "\\equiv ", 0,  0   },
  85   { ">=",         "\\geq ",   0,  0   },
  86   { "Pr",         nullptr,       0,  0   },
  87   { "^",          "^",        1,  0   },
  88   { "_",          "_",        1,  0   },
  89   { "`",          "\\;",      0,  0   },
  90   { "acute",      nullptr,       1,  0   },
  91   { "aleph",      nullptr,       0,  0   },
  92   { "alpha",      nullptr,       0,  EQ_CASE },
  93   { "amalg",      nullptr,       0,  0   },
  94   { "and",        nullptr,       0,  0   },
  95   { "angle",      nullptr,       0,  0   },
  96   { "angstrom",   nullptr,       0,  0   },
  97   { "approx",     nullptr,       0,  0   },
  98   { "arc",        nullptr,       0,  0   },
  99   { "arccos",     nullptr,       0,  0   },
 100   { "arch",       nullptr,       0,  0   },
 101   { "arcsin",     nullptr,       0,  0   },
 102   { "arctan",     nullptr,       0,  0   },
 103   { "arg",        nullptr,       0,  0   },
 104   { "assert",     "\\vdash",  0,  0   },
 105   { "ast",        nullptr,       0,  0   },
 106   { "asymp",      nullptr,       0,  0   },
 107   { "atop",       nullptr,       1,  EQ_ATOP },
 108   { "backslash",  nullptr,       0,  0   },
 109   { "bar",        nullptr,       1,  0   },
 110   { "because",    nullptr,       0,  0   },
 111   { "beta",       nullptr,       0,  EQ_CASE },
 112   { "big",        nullptr,       0,  EQ_CASE },
 113   { "bigcap",     nullptr,       0,  0   },
 114   { "bigcirc",    nullptr,       0,  0   },
 115   { "bigcup",     nullptr,       0,  0   },
 116   { "bigg",       nullptr,       0,  EQ_CASE },
 117   { "bigodiv",    nullptr,       0,  0   },
 118   { "bigodot",    nullptr,       0,  0   },
 119   { "bigominus",  nullptr,       0,  0   },
 120   { "bigoplus",   nullptr,       0,  0   },
 121   { "bigotimes",  nullptr,       0,  0   },
 122   { "bigsqcap",   nullptr,       0,  0   },
 123   { "bigsqcup",   nullptr,       0,  0   },
 124   { "biguplus",   nullptr,       0,  0   },
 125   { "bigvee",     nullptr,       0,  0   },
 126   { "bigwedge",   nullptr,       0,  0   },
 127   { "binom",      nullptr,       2,  0   },
 128   { "bmatrix",    nullptr,       0,  EQ_ENV  },
 129   { "bold",       nullptr,       0,  0   },
 130   { "bot",        nullptr,       0,  0   },
 131   { "breve",      nullptr,       1,  0   },
 132   { "buildrel",   nullptr,       0,  0   }, // LATER
 133   { "bullet",     nullptr,       0,  0   },
 134   { "cap",        nullptr,       0,  0   },
 135   { "cases",      nullptr,       0,  EQ_ENV  },
 136   { "ccol",       nullptr,       0,  0   }, /* Center vertically */
 137   { "cdot",       nullptr,       0,  0   },
 138   { "cdots",      nullptr,       0,  0   },
 139   { "check",      nullptr,       1,  0   },
 140   { "chi",        nullptr,       0,  EQ_CASE },
 141   { "choose",     nullptr,       0,  EQ_ATOP },
 142   { "circ",       nullptr,       0,  0   },
 143   { "col",        nullptr,       0,  0   }, // LATER
 144   { "cong",       nullptr,       0,  0   },
 145   { "coprod",     nullptr,       0,  0   },
 146   { "cos",        nullptr,       0,  0   },
 147   { "cosec",      nullptr,       0,  0   },
 148   { "cosh",       nullptr,       0,  0   },
 149   { "cot",        nullptr,       0,  0   },
 150   { "coth",       nullptr,       0,  0   },
 151   { "cpile",      nullptr,       0,  0   }, // LATER
 152   { "csc",        nullptr,       0,  0   },
 153   { "cup",        nullptr,       0,  0   },
 154   { "dagger",     nullptr,       0,  0   },
 155   { "dashv",      nullptr,       0,  0   },
 156   { "ddagger",    nullptr,       0,  0   },
 157   { "ddot",       nullptr,       1,  0   },
 158   { "ddots",      nullptr,       0,  0   },
 159   { "def",        nullptr,       0,  0   },
 160   { "deg",        nullptr,       0,  0   },
 161   { "del",        nullptr,       0,  0   },
 162   { "delta",      nullptr,       0,  EQ_CASE },
 163   { "diamond",    nullptr,       0,  0   },
 164   { "dim",        nullptr,       0,  0   },
 165   { "div",        nullptr,       0,  0   },
 166   { "divide",     nullptr,       0,  0   },
 167   { "dline",      nullptr,       0,  0   },
 168   { "dmatrix",    nullptr,       0,  EQ_ENV  },
 169   { "dot",        nullptr,       1,  0   },
 170   { "doteq",      nullptr,       0,  0   },
 171   { "dotsaxis",   nullptr,       0,  0   },
 172   { "dotsdiag",   nullptr,       0,  0   },
 173   { "dotslow",    "\\ldots",  0,  0   },
 174   { "dotsvert",   "\\vdots",  0,  0   },
 175   { "downarrow",  nullptr,       0,  EQ_CASE },
 176   { "dsum",       "+",        0,  0   },
 177   { "dyad",       nullptr,       0,  0   }, // LATER
 178   { "ell",        nullptr,       0,  0   },
 179   { "emptyset",   nullptr,       0,  0   },
 180   { "epsilon",    nullptr,       0,  EQ_CASE },
 181   { "eqalign",    nullptr,       0,  EQ_ENV  },
 182   { "equiv",      nullptr,       0,  0   },
 183   { "eta",        nullptr,       0,  EQ_CASE },
 184   { "exarrow",    nullptr,       0,  0   },
 185   { "exist",      "\\exists", 0,  0   },
 186   { "exists",     nullptr,       0,  0   },
 187   { "exp",        nullptr,       0,  EQ_CASE },
 188   { "for",        nullptr,       0,  0   },
 189   { "forall",     nullptr,       0,  0   },
 190   { "from",       "_",        1,  0   },
 191   { "gamma",      nullptr,       0,  EQ_CASE },
 192   { "gcd",        nullptr,       0,  0   },
 193   { "ge",         "\\geq",    0,  0   },
 194   { "geq",        nullptr,       0,  0   },
 195   { "ggg",        nullptr,       0,  0   },
 196   { "grad",       nullptr,       0,  0   },
 197   { "grave",      nullptr,       1,  0   },
 198   { "hat",        "\\widehat",    1,  0   },
 199   { "hbar",       nullptr,       0,  0   },
 200   { "hom",        nullptr,       0,  0   },
 201   { "hookleft",   nullptr,       0,  0   },
 202   { "hookright",  nullptr,       0,  0   },
 203   { "identical",  nullptr,       0,  0   }, // LATER
 204   { "if",         nullptr,       0,  0   },
 205   { "imag",       nullptr,       0,  0   },
 206   { "image",      nullptr,       0,  0   },
 207   { "imath",      nullptr,       0,  0   },
 208   { "in",         nullptr,       0,  0   },
 209   { "inf",        "\\infty",  0,  0   },
 210   { "infinity",   "\\infty",  0,  0   },
 211   { "infty",      nullptr,       0,  0   },
 212   { "int",        nullptr,       0,  0   },
 213   { "integral",   "\\int",    0,  0   },
 214   { "inter",      "\\bigcap", 0,  0   },
 215   { "iota",       nullptr,       0,  EQ_CASE },
 216   { "iso",        nullptr,       0,  0   }, // ams
 217   { "it",         nullptr,       0,  0   },
 218   { "jmath",      nullptr,       0,  0   },
 219   { "kappa",      nullptr,       0,  EQ_CASE },
 220   { "ker",        nullptr,       0,  0   },
 221   { "lambda",     nullptr,       0,  EQ_CASE },
 222   { "land",       nullptr,       0,  0   }, // LATER
 223   { "langle",     nullptr,       0,  0   },
 224   { "larrow",     "\\leftarrow",  0,  EQ_CASE },
 225   { "lbrace",     nullptr,       0,  0   },
 226   { "lbrack",     "[",        0,  0   },
 227   { "lceil",      nullptr,       0,  0   },
 228   { "lcol",       nullptr,       0,  0   }, // LATER
 229   { "ldots",      nullptr,       0,  0   },
 230   { "le",         nullptr,       0,  0   },
 231   { "left",       nullptr,       0,  0   },
 232   { "leftarrow",  nullptr,       0,  EQ_CASE },
 233   { "leq",        nullptr,       0,  0   },
 234   { "lfloor",     nullptr,       0,  0   },
 235   { "lg",         nullptr,       0,  0   },
 236   { "lim",        nullptr,       0,  EQ_CASE },
 237   { "line",       "\\vert",   0,  0   },
 238   { "liter",      "\\ell",    0,  0   },
 239   { "lll",        nullptr,       0,  0   }, // ams
 240   { "ln",         nullptr,       0,  0   },
 241   { "log",        nullptr,       0,  0   },
 242   { "lor",        "\\vee",    0,  0   },
 243   { "lparen",     "(",        0,  0   },
 244   { "lpile",      nullptr,       0,  0   }, // LATER
 245   { "lrarrow",    "\\leftrightarrow",   0,  EQ_CASE },
 246   { "lrharpoons", "\\leftrightharpoons",0,  0   },
 247   { "mapsto",     nullptr,       0,  0   },
 248   { "massert",    "\\dashv",  0,  0   },
 249   { "matrix",     nullptr,       0,  EQ_ENV  },
 250   { "max",        nullptr,       0,  0   },
 251   { "mho",        nullptr,       0,  0   }, // ams
 252   { "min",        nullptr,       0,  0   },
 253   { "minusplus",  nullptr,       0,  0   },
 254   { "mit",        "",     0,  0   }, // font
 255   { "mod",        "\\bmod",   0,  0   },
 256   { "models",     nullptr,       0,  0   },
 257   { "msangle",    nullptr,       0,  0   }, // LATER
 258   { "mu",         nullptr,       0,  EQ_CASE },
 259   { "nabla",      nullptr,       0,  0   },
 260   { "ne",         nullptr,       0,  0   },
 261   { "nearrow",    nullptr,       0,  0   },
 262   { "neg",        nullptr,       0,  0   },
 263   { "neq",        nullptr,       0,  0   },
 264   { "nequiv",     nullptr,       0,  0   },
 265   { "ni",         nullptr,       0,  0   },
 266   { "not",        nullptr,       0,  0   },
 267   { "notin",      nullptr,       0,  0   },
 268   { "nu",         nullptr,       0,  EQ_CASE },
 269   { "nwarrow",    nullptr,       0,  0   },
 270   { "odiv",       nullptr,       0,  0   },
 271   { "odot",       nullptr,       0,  0   },
 272   { "oint",       nullptr,       0,  0   },
 273   { "omega",      nullptr,       0,  EQ_CASE },
 274   { "omicron",    nullptr,       0,  EQ_CASE },
 275   { "ominus",     nullptr,       0,  0   },
 276   { "oplus",      nullptr,       0,  0   },
 277   { "or ",        nullptr,       0,  0   },
 278   { "oslash",     nullptr,       0,  0   },
 279   { "otimes",     nullptr,       0,  0   },
 280   { "over",       nullptr,       1,  EQ_ATOP },
 281   { "overline",   nullptr,       1,  0   },
 282   { "owns",       "\\ni",     0,  0   },
 283   { "parallel",   nullptr,       0,  0   },
 284   { "partial",    nullptr,       0,  0   },
 285   { "phantom",    nullptr,       0,  0   },
 286   { "phi",        nullptr,       0,  EQ_CASE },
 287   { "pi",         nullptr,       0,  EQ_CASE },
 288   { "pile",       nullptr,       0,  0   }, // LATER
 289   { "plusminus",  "\\pm",     0,  0   },
 290   { "pmatrix",    nullptr,       0,  EQ_ENV  },
 291   { "prec",       nullptr,       0,  0   },
 292   { "prep",       nullptr,       0,  0   },
 293   { "prime",      nullptr,       0,  0   },
 294   { "prod",       nullptr,       0,  0   },
 295   { "propto",     nullptr,       0,  0   },
 296   { "psi",        nullptr,       0,  EQ_CASE },
 297   { "rangle",     nullptr,       0,  0   },
 298   { "rarrow",     "\\rightarrow", 0,  EQ_CASE },
 299   { "rbrace",     "]",        0,  0   },
 300   { "rbrace",     nullptr,       0,  0   },
 301   { "rceil",      nullptr,       0,  0   },
 302   { "rcol",       nullptr,       0,  0   }, // LATER
 303   { "real",       "\\Re",     0,  0   },
 304   { "reimage",    nullptr,       0,  0   },
 305   { "rel",        nullptr,       0,  0   },
 306   { "rfloor",     nullptr,       0,  0   },
 307   { "rho",        nullptr,       0,  EQ_CASE },
 308   { "right",      nullptr,       0,  0   },
 309   { "rightarrow", nullptr,       0,  EQ_CASE },
 310   { "rlharpoons", nullptr,       0,  0   },
 311   { "rm",         nullptr,       0,  0   },
 312   { "root",       "\\sqrt",   1,  0   },
 313   { "rparen",     ")",        0,  0   },
 314   { "rpile",      nullptr,       0,  0   }, // LATER
 315   { "rtangle",    nullptr,       0,  0   },
 316   { "sangle",     nullptr,       0,  0   },
 317   { "scale",      nullptr,       0,  0   },
 318   { "searrow",    nullptr,       0,  0   },
 319   { "sec",        nullptr,       0,  0   },
 320   { "sigma",      nullptr,       0,  EQ_CASE },
 321   { "sim",        nullptr,       0,  0   },
 322   { "simeq",      nullptr,       0,  0   },
 323   { "sin",        nullptr,       0,  0   },
 324   { "sinh",       nullptr,       0,  0   },
 325   { "slash",      nullptr,       0,  0   },
 326   { "smallint",   nullptr,       0,  0   },
 327   { "smallinter", nullptr,       0,  0   },
 328   { "smalloint",  nullptr,       0,  0   },
 329   { "smallprod",  nullptr,       0,  0   },
 330   { "smallsum",   nullptr,       0,  0   },
 331   { "smallunion", nullptr,       0,  0   },
 332   { "smcoprod",   nullptr,       0,  0   },
 333   { "sqcap",      nullptr,       0,  0   },
 334   { "sqcup",      nullptr,       0,  0   },
 335   { "sqrt",       nullptr,       1,  0   },
 336   { "sqsubset",   nullptr,       0,  0   },
 337   { "sqsubseteq", nullptr,       0,  0   },
 338   { "sqsupset",   nullptr,       0,  0   },
 339   { "sqsupseteq", nullptr,       0,  0   },
 340   { "star",       nullptr,       0,  0   },
 341   { "sub",        "_",        0,  0   },
 342   { "subset",     nullptr,       0,  0   },
 343   { "subseteq",   nullptr,       0,  0   },
 344   { "succ",       nullptr,       0,  0   },
 345   { "sum",        nullptr,       0,  0   },
 346   { "sup",        "^",        0,  0   },
 347   { "superset",   nullptr,       0,  0   },
 348   { "supset",     nullptr,       0,  0   },
 349   { "supseteq",   nullptr,       0,  0   },
 350   { "swarrow",    nullptr,       0,  0   },
 351   { "tan",        nullptr,       0,  0   },
 352   { "tanh",       nullptr,       0,  0   },
 353   { "tau",        nullptr,       0,  EQ_CASE },
 354   { "therefore",  nullptr,       0,  0   },
 355   { "theta",      nullptr,       0,  EQ_CASE },
 356   { "tilde",      "\\widetilde",  1,  0   },
 357   { "times",      nullptr,       0,  0   },
 358   { "to",         "^",        1,  0   },
 359   { "top",        nullptr,       0,  0   },
 360   { "triangle",   nullptr,       0,  0   },
 361   { "triangled",  nullptr,       0,  0   },
 362   { "trianglel",  nullptr,       0,  0   },
 363   { "triangler",  nullptr,       0,  0   },
 364   { "triangleu",  nullptr,       0,  0   },
 365   { "udarrow",    "\\updownarrow",0,  EQ_CASE },
 366   { "under",      "\\underline",  1,  0   },
 367   { "underline",  "\\underline",  1,  0   },
 368   { "union",      "\\bigcup", 0,  0   },
 369   { "uparrow",    nullptr,       0,  EQ_CASE },
 370   { "uplus",      nullptr,       0,  0   },
 371   { "upsilon",    nullptr,       0,  EQ_CASE },
 372   { "varepsilon", nullptr,       0,  0   },
 373   { "varphi",     nullptr,       0,  0   },
 374   { "varpi",      nullptr,       0,  0   },
 375   { "varrho",     nullptr,       0,  0   },
 376   { "varsigma",   nullptr,       0,  0   },
 377   { "vartheta",   nullptr,       0,  0   },
 378   { "varupsilon", nullptr,       0,  0   },
 379   { "vdash",      nullptr,       0,  0   },
 380   { "vdots",      nullptr,       0,  0   },
 381   { "vec",        nullptr,       1,  0   },
 382   { "vee",        nullptr,       0,  0   },
 383   { "vert",       nullptr,       0,  0   },
 384   { "wedge",      nullptr,       0,  0   },
 385   { "wp",         nullptr,       0,  0   },
 386   { "xi",         nullptr,       0,  EQ_CASE },
 387   { "xor",        nullptr,       0,  0   },
 388   { "zeta",       nullptr,       0,  EQ_CASE }
 389 };
 390
 391 static const hwpeq *lookup_eqn(char const *str)
 392 {
 393   static const int eqCount = SAL_N_ELEMENTS(eq_tbl);
 394   int l = 0, r = eqCount;
 395   const hwpeq *result = nullptr;
 396
 397   while( l < r ) {
 398     const int m = (l + r) / 2;
 399     const int k = strcmp(eq_tbl[m].key, str);
 400     if( k == 0 ) {
 401       result = eq_tbl + m;
 402       break;
 403     }
 404     else if( k < 0 )
 405       l = m + 1;
 406     else
 407       r = m;
 408   }
 409   return result;
 410 }
 411
 412 /* If only the first character is uppercase or all characters are uppercase, change to lowercase */
 413 static void make_keyword( char *keyword, const char *token)
 414 {
 415     char* ptr;
 416     bool result = true;
 417     int len = strlen(token);
 418     assert(keyword);
 419
 420     if( 255 < len )
 421     {
 422         len = 255;
 423     }
 424     memcpy(keyword, token, len);
 425     keyword[len] = 0;
 426
 427     if( (token[0] & 0x80) || rtl::isAsciiLowerCase(static_cast<unsigned char>(token[0])) || strlen(token) < 2 )
 428         return;
 429
 430     bool capital = rtl::isAsciiUpperCase(
 431         static_cast<unsigned char>(keyword[1]));
 432     for( ptr = keyword + 2; *ptr && result; ptr++ )
 433     {
 434         if( (*ptr & 0x80) ||
 435             (!capital && rtl::isAsciiUpperCase(static_cast<unsigned char>(*ptr))) ||
 436             (capital && rtl::isAsciiLowerCase(static_cast<unsigned char>(*ptr))) )
 437         {
 438             result = false;
 439         }
 440     }
 441
 442     if( result )
 443     {
 444         ptr = keyword;
 445         while( *ptr )
 446         {
 447             if( rtl::isAsciiUpperCase(static_cast<unsigned char>(*ptr)) )
 448                 *ptr = sal::static_int_cast<char>(
 449                     rtl::toAsciiLowerCase(static_cast<unsigned char>(*ptr)));
 450             ptr++;
 451         }
 452     }
 453 }
 454
 455 // token reading function
 456 struct eq_stack {
 457   MzString  white;
 458   MzString  token;
 459   istream   *strm;
 460
 461   eq_stack() { strm = nullptr; };
 462   bool state(istream const *s) {
 463     if( strm != s) { white = nullptr; token = nullptr; }
 464     return token.length() != 0;
 465   }
 466 };
 467
 468 static eq_stack *stk = nullptr;
 469
 470 static void push_token(MzString const &white, MzString const &token, istream *strm)
 471 {
 472   // one time stack
 473   assert(stk->token.length() == 0);
 474
 475   stk->white = white;
 476   stk->token = token;
 477   stk->strm = strm;
 478 }
 479
 480 /*
 481  * It returns the length of the read tokens.
 482  *
 483  * control char, control sequence, binary sequence,
 484  * alphabet string, single character */
 485 static int next_token(MzString &white, MzString &token, istream *strm)
 486 {
 487   std::istream::int_type ch = 0;
 488
 489   if( stk->state(strm) ) {
 490     white = stk->white;
 491     token = stk->token;
 492     stk->token = nullptr;
 493     stk->white = nullptr;
 494     return token.length();
 495   }
 496
 497   token = nullptr;
 498   white = nullptr;
 499   if( !strm->good() || (ch = strm->get()) == std::istream::traits_type::eof() )
 500     return 0;
 501
 502   // read preceding ws
 503   if( IS_WS(ch) ) {
 504     do white << static_cast<char>(ch);
 505     while( IS_WS(ch = strm->get()) );
 506   }
 507
 508   if( ch == '\\' || ch & 0x80
 509       || (ch != std::istream::traits_type::eof() && rtl::isAsciiAlpha(ch)) )
 510   {
 511     if( ch == '\\' ) {
 512       token << static_cast<char>(ch);
 513       ch = strm->get();
 514     }
 515     do {
 516       token << static_cast<char>(ch);
 517       ch = strm->get();
 518     } while( ch != std::istream::traits_type::eof()
 519              && (ch & 0x80 || rtl::isAsciiAlpha(ch)) ) ;
 520     strm->putback(static_cast<char>(ch));
 521     /* special treatment of sub, sub, over, atop
 522        The reason for this is that affect next_state().
 523      */
 524     if( !STRICMP("sub", token) || !STRICMP("from", token) ||
 525     !STRICMP("sup", token) || !STRICMP("to", token) ||
 526     !STRICMP("over", token) || !STRICMP("atop", token) ||
 527     !STRICMP("left", token) || !STRICMP("right", token) )
 528     {
 529       char buf[256];
 530       make_keyword(buf, token);
 531       token = buf;
 532     }
 533     if( !token.compare("sub") || !token.compare("from") )
 534       token = "_";
 535     if( !token.compare("sup") || !token.compare("to") )
 536       token = "^";
 537   }
 538   else if( IS_BINARY(ch) ) {
 539     do token << static_cast<char>(ch);
 540     while( IS_BINARY(ch = strm->get()) );
 541     strm->putback(static_cast<char>(ch));
 542   }
 543   else if( ch != std::istream::traits_type::eof() && rtl::isAsciiDigit(ch) ) {
 544     do {
 545         token << static_cast<char>(ch);
 546         ch = strm->get();
 547     } while( ch != std::istream::traits_type::eof() && rtl::isAsciiDigit(ch) );
 548     strm->putback(static_cast<char>(ch));
 549   }
 550   else
 551     token << static_cast<char>(ch);
 552
 553   return token.length();
 554 }
 555
 556 static std::istream::int_type read_white_space(MzString& outs, istream *strm)
 557 {
 558   std::istream::int_type result;
 559
 560   if( stk->state(strm) ) {
 561     outs << stk->white;
 562     stk->white = nullptr;
 563     result = std::istream::traits_type::to_int_type(stk->token[0]);
 564   }
 565   else {
 566     std::istream::int_type ch;
 567     while( IS_WS(ch = strm->get()) )
 568       outs << static_cast<char>(ch);
 569     strm->putback(static_cast<char>(ch));
 570     result = ch;
 571   }
 572   return result;
 573 }
 574
 575 /* If the argument is not required, delimiters are space and brace for each items.
 576    sqrt {ab} c = sqrt {ab} c
 577    (,} are for grouping
 578    ^ ,_ are for combination
 579
 580    Sorting of formats with arguments, such as sqrt
 581       sqrt a -> sqrt {a}
 582       sqrt {a} -> sqrt {a}
 583    If there is more than one argument, it eliminates backslash between arguments.
 584       \frac a b -> frac {a} {b}
 585    Clean the form of over
 586       a over b -> {a} over {b}
 587  */
 588
 589 static int eq_word(MzString& outs, istream *strm, int status)
 590 {
 591   MzString  token, white, state;
 592   int       result;
 593   char      keyword[256];
 594   const hwpeq *eq;
 595
 596   next_token(white, token, strm);
 597   if (token.length() <= 0)
 598       return 0;
 599   result = token[0];
 600
 601   if( token.compare("{") == 0 ) {
 602     state << white << token;
 603     eq_sentence(state, strm, "}");
 604   }
 605   else if( token.compare("left") == 0 ) {
 606     state << white << token;
 607     next_token(white, token, strm);
 608     state << white << token;
 609
 610     eq_sentence(state, strm, "right");
 611
 612     next_token(white, token, strm);
 613     state << white << token;
 614   }
 615   else {
 616     /* Normal token */
 617     int script_status = SCRIPT_NONE;
 618     while( true ) {
 619       state << white << token;
 620       make_keyword(keyword, token);
 621       if( token[0] == '^' )
 622         script_status |= SCRIPT_SUP;
 623       else if( token[0] == '_' )
 624         script_status |= SCRIPT_SUB;
 625       else
 626         script_status = SCRIPT_NONE;
 627
 628       if( nullptr != (eq = lookup_eqn(keyword)) ) {
 629         int nargs = eq->nargs;
 630         while( nargs-- ) {
 631           const std::istream::int_type ch = read_white_space(state, strm);
 632           if( ch != '{' ) state << '{';
 633           eq_word(state, strm, script_status);
 634           if( ch != '{' ) state << '}';
 635         }
 636       }
 637
 638       if( !next_token(white, token, strm) )
 639         break;
 640       // end loop and restart with this
 641       if( (token[0] == '^' && status && !(status & SCRIPT_SUP)) ||
 642           (token[0] == '_' && status && !(status & SCRIPT_SUB)) ||
 643           strcmp("over", token) == 0 || strcmp("atop", token) == 0 ||
 644           strchr("{}#&`", token[0]) ||
 645           (!strchr("^_", token[0]) && white.length()) )
 646       {
 647         push_token(white, token, strm);
 648         break;
 649       }
 650     }
 651   }
 652   outs << state;
 653
 654   return result;
 655 }
 656
 657 static bool eq_sentence(MzString& outs, istream *strm, const char *end)
 658 {
 659   MzString  state;
 660   MzString  white, token;
 661   bool      multiline = false;
 662
 663   read_white_space(outs, strm);
 664   while( eq_word(state, strm) ) {
 665     if( !next_token(white, token, strm) ||
 666     (end && strcmp(token.c_str(), end) == 0) )
 667     {
 668       state << white << token;
 669       break;
 670     }
 671     push_token(white, token, strm);
 672     if( !token.compare("atop") || !token.compare("over") )
 673       outs << '{' << state << '}';
 674     else {
 675       if( !token.compare("#") )
 676         multiline = true;
 677       outs << state;
 678     }
 679     state =  nullptr;
 680     read_white_space(outs, strm);
 681   }
 682   outs << state;
 683   return multiline;
 684 }
 685
 686 static char eq2ltxconv(MzString& sstr, istream *strm, const char *sentinel)
 687 {
 688   MzString  white, token;
 689   char      key[256];
 690   std::istream::int_type ch;
 691   int       result;
 692
 693   while( 0 != (result = next_token(white, token, strm)) ) {
 694     if( sentinel && (result == 1) && strchr(sentinel, token[0]) )
 695       break;
 696     make_keyword(key, token);
 697     const hwpeq *eq = nullptr;
 698     if( (eq = lookup_eqn(key)) != nullptr ) {
 699       if( eq->latex )
 700         strcpy(key, eq->latex);
 701       else {
 702         key[0] = '\\';
 703         strcpy(key + 1, eq->key);
 704       }
 705       if( (eq->flag & EQ_CASE)
 706           && rtl::isAsciiUpperCase(static_cast<unsigned char>(token[0])) )
 707         key[1] = sal::static_int_cast<char>(
 708             rtl::toAsciiUpperCase(static_cast<unsigned char>(key[1])));
 709       token = key;
 710     }
 711
 712     if( token[0] == '{' ) { // grouping
 713       sstr << white << token;
 714       eq2ltxconv(sstr, strm, "}");
 715       sstr << '}';
 716     }
 717     else if( eq && (eq->flag & EQ_ENV) ) {
 718       next_token(white, token, strm);
 719       if( token[0] != '{' )
 720         return 0;
 721       sstr << "\\begin" << "{" << eq->key << "}" << ENDL ;
 722       eq2ltxconv(sstr, strm, "}");
 723       if( sstr[sstr.length() - 1] != '\n' )
 724         sstr << ENDL ;
 725       sstr << "\\end" << "{" << eq->key << "}" << ENDL ;
 726     }
 727     else if( eq && (eq->flag & EQ_ATOP) ) {
 728       if( sstr.length() == 0 )
 729         sstr << '{';
 730       else {
 731         int pos  = sstr.rfind('}');
 732         if( 0 < pos)
 733           sstr.replace(pos, ' ');
 734       }
 735       sstr << token;
 736       while( (ch = strm->get()) != std::istream::traits_type::eof()
 737              && IS_WS(ch) )
 738         sstr << static_cast<char>(ch);
 739       if( ch != '{' )
 740         sstr << "{}";
 741       else {
 742         eq2ltxconv(sstr, strm, "}");
 743         sstr << '}';
 744       }
 745     }
 746     else
 747       sstr << white << token;
 748   }
 749   return token[0];
 750 }
 751
 752 void eq2latex(MzString& outs, char const *s)
 753 {
 754   assert(s);
 755   if( stk == nullptr )
 756     stk = new eq_stack;
 757
 758   MzString  tstr;
 759
 760   istringstream tstrm(s);
 761   bool eqnarray = eq_sentence(tstr, &tstrm);
 762   istringstream strm(tstr.c_str());
 763
 764   if( eqnarray )
 765     outs << "\\begin{array}{rllll}" << ENDL;
 766   eq2ltxconv(outs, &strm, nullptr);
 767   outs << ENDL;
 768   if( eqnarray )
 769     outs << "\\end{array}" << ENDL;
 770   delete stk;
 771   stk = nullptr;
 772 }
 773
 774 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */