hwpfilter/source/hwpeq.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <sal/config.h>
  21
  22 #include <assert.h>
  23 #include <stdio.h>
  24 #include <string.h>
  25
  26 // DVO: always use standard headers:
  27 #include <istream>
  28 #include <sstream>
  29
  30 #include "hwpeq.h"
  31 #include <sal/types.h>
  32 #include <sal/macros.h>
  33
  34 #include <rtl/character.hxx>
  35
  36 /* @Man: change the hwp formula to LaTeX */
  37
  38 #define EQ_CASE 0x01    // case sensitive cmd
  39 #define EQ_ENV  0x02    // equiv to latex environment
  40 #define EQ_ATOP 0x04    // must revert order
  41
  42 static bool IS_WS(std::istream::int_type ch) {
  43     return ch != std::istream::traits_type::eof()
  44         && rtl::isAsciiWhiteSpace(
  45             static_cast<unsigned char>(
  46                 std::istream::traits_type::to_char_type(ch)));
  47 }
  48
  49 static bool IS_BINARY(std::istream::int_type ch) {
  50     return ch != std::istream::traits_type::eof()
  51         && strchr("+-<=>", std::istream::traits_type::to_char_type(ch));
  52 }
  53
  54 // sub and sup script status
  55 enum { SCRIPT_NONE, SCRIPT_SUB, SCRIPT_SUP, SCRIPT_ALL};
  56
  57 static int  eq_word(OString& outs, std::istream *strm, int script = SCRIPT_NONE);
  58 static bool eq_sentence(OString& outs, std::istream *strm, const char *end = nullptr);
  59
  60 namespace {
  61
  62 struct hwpeq {
  63   const char    *key;       // hwp math keyword
  64   const char    *latex;     // corresponding latex keyword
  65   int           nargs;      // # of argument
  66   unsigned char flag;       // case sensitive?
  67 };
  68
  69 }
  70
  71 const hwpeq eq_tbl[] = {
  72   { "!=",         "\\equiv ", 0,  0   },
  73   { "#",          "\\\\",     0,  0   },
  74   { "+-",         "\\pm ",    0,  0   },
  75   { "-+",         "\\mp ",    0,  0   },
  76   { "<=",         "\\leq ",   0,  0   },
  77   { "==",         "\\equiv ", 0,  0   },
  78   { ">=",         "\\geq ",   0,  0   },
  79   { "Pr",         nullptr,       0,  0   },
  80   { "^",          "^",        1,  0   },
  81   { "_",          "_",        1,  0   },
  82   { "`",          "\\;",      0,  0   },
  83   { "acute",      nullptr,       1,  0   },
  84   { "aleph",      nullptr,       0,  0   },
  85   { "alpha",      nullptr,       0,  EQ_CASE },
  86   { "amalg",      nullptr,       0,  0   },
  87   { "and",        nullptr,       0,  0   },
  88   { "angle",      nullptr,       0,  0   },
  89   { "angstrom",   nullptr,       0,  0   },
  90   { "approx",     nullptr,       0,  0   },
  91   { "arc",        nullptr,       0,  0   },
  92   { "arccos",     nullptr,       0,  0   },
  93   { "arch",       nullptr,       0,  0   },
  94   { "arcsin",     nullptr,       0,  0   },
  95   { "arctan",     nullptr,       0,  0   },
  96   { "arg",        nullptr,       0,  0   },
  97   { "assert",     "\\vdash",  0,  0   },
  98   { "ast",        nullptr,       0,  0   },
  99   { "asymp",      nullptr,       0,  0   },
 100   { "atop",       nullptr,       1,  EQ_ATOP },
 101   { "backslash",  nullptr,       0,  0   },
 102   { "bar",        nullptr,       1,  0   },
 103   { "because",    nullptr,       0,  0   },
 104   { "beta",       nullptr,       0,  EQ_CASE },
 105   { "big",        nullptr,       0,  EQ_CASE },
 106   { "bigcap",     nullptr,       0,  0   },
 107   { "bigcirc",    nullptr,       0,  0   },
 108   { "bigcup",     nullptr,       0,  0   },
 109   { "bigg",       nullptr,       0,  EQ_CASE },
 110   { "bigodiv",    nullptr,       0,  0   },
 111   { "bigodot",    nullptr,       0,  0   },
 112   { "bigominus",  nullptr,       0,  0   },
 113   { "bigoplus",   nullptr,       0,  0   },
 114   { "bigotimes",  nullptr,       0,  0   },
 115   { "bigsqcap",   nullptr,       0,  0   },
 116   { "bigsqcup",   nullptr,       0,  0   },
 117   { "biguplus",   nullptr,       0,  0   },
 118   { "bigvee",     nullptr,       0,  0   },
 119   { "bigwedge",   nullptr,       0,  0   },
 120   { "binom",      nullptr,       2,  0   },
 121   { "bmatrix",    nullptr,       0,  EQ_ENV  },
 122   { "bold",       nullptr,       0,  0   },
 123   { "bot",        nullptr,       0,  0   },
 124   { "breve",      nullptr,       1,  0   },
 125   { "buildrel",   nullptr,       0,  0   }, // LATER
 126   { "bullet",     nullptr,       0,  0   },
 127   { "cap",        nullptr,       0,  0   },
 128   { "cases",      nullptr,       0,  EQ_ENV  },
 129   { "ccol",       nullptr,       0,  0   }, /* Center vertically */
 130   { "cdot",       nullptr,       0,  0   },
 131   { "cdots",      nullptr,       0,  0   },
 132   { "check",      nullptr,       1,  0   },
 133   { "chi",        nullptr,       0,  EQ_CASE },
 134   { "choose",     nullptr,       0,  EQ_ATOP },
 135   { "circ",       nullptr,       0,  0   },
 136   { "col",        nullptr,       0,  0   }, // LATER
 137   { "cong",       nullptr,       0,  0   },
 138   { "coprod",     nullptr,       0,  0   },
 139   { "cos",        nullptr,       0,  0   },
 140   { "cosec",      nullptr,       0,  0   },
 141   { "cosh",       nullptr,       0,  0   },
 142   { "cot",        nullptr,       0,  0   },
 143   { "coth",       nullptr,       0,  0   },
 144   { "cpile",      nullptr,       0,  0   }, // LATER
 145   { "csc",        nullptr,       0,  0   },
 146   { "cup",        nullptr,       0,  0   },
 147   { "dagger",     nullptr,       0,  0   },
 148   { "dashv",      nullptr,       0,  0   },
 149   { "ddagger",    nullptr,       0,  0   },
 150   { "ddot",       nullptr,       1,  0   },
 151   { "ddots",      nullptr,       0,  0   },
 152   { "def",        nullptr,       0,  0   },
 153   { "deg",        nullptr,       0,  0   },
 154   { "del",        nullptr,       0,  0   },
 155   { "delta",      nullptr,       0,  EQ_CASE },
 156   { "diamond",    nullptr,       0,  0   },
 157   { "dim",        nullptr,       0,  0   },
 158   { "div",        nullptr,       0,  0   },
 159   { "divide",     nullptr,       0,  0   },
 160   { "dline",      nullptr,       0,  0   },
 161   { "dmatrix",    nullptr,       0,  EQ_ENV  },
 162   { "dot",        nullptr,       1,  0   },
 163   { "doteq",      nullptr,       0,  0   },
 164   { "dotsaxis",   nullptr,       0,  0   },
 165   { "dotsdiag",   nullptr,       0,  0   },
 166   { "dotslow",    "\\ldots",  0,  0   },
 167   { "dotsvert",   "\\vdots",  0,  0   },
 168   { "downarrow",  nullptr,       0,  EQ_CASE },
 169   { "dsum",       "+",        0,  0   },
 170   { "dyad",       nullptr,       0,  0   }, // LATER
 171   { "ell",        nullptr,       0,  0   },
 172   { "emptyset",   nullptr,       0,  0   },
 173   { "epsilon",    nullptr,       0,  EQ_CASE },
 174   { "eqalign",    nullptr,       0,  EQ_ENV  },
 175   { "equiv",      nullptr,       0,  0   },
 176   { "eta",        nullptr,       0,  EQ_CASE },
 177   { "exarrow",    nullptr,       0,  0   },
 178   { "exist",      "\\exists", 0,  0   },
 179   { "exists",     nullptr,       0,  0   },
 180   { "exp",        nullptr,       0,  EQ_CASE },
 181   { "for",        nullptr,       0,  0   },
 182   { "forall",     nullptr,       0,  0   },
 183   { "from",       "_",        1,  0   },
 184   { "gamma",      nullptr,       0,  EQ_CASE },
 185   { "gcd",        nullptr,       0,  0   },
 186   { "ge",         "\\geq",    0,  0   },
 187   { "geq",        nullptr,       0,  0   },
 188   { "ggg",        nullptr,       0,  0   },
 189   { "grad",       nullptr,       0,  0   },
 190   { "grave",      nullptr,       1,  0   },
 191   { "hat",        "\\widehat",    1,  0   },
 192   { "hbar",       nullptr,       0,  0   },
 193   { "hom",        nullptr,       0,  0   },
 194   { "hookleft",   nullptr,       0,  0   },
 195   { "hookright",  nullptr,       0,  0   },
 196   { "identical",  nullptr,       0,  0   }, // LATER
 197   { "if",         nullptr,       0,  0   },
 198   { "imag",       nullptr,       0,  0   },
 199   { "image",      nullptr,       0,  0   },
 200   { "imath",      nullptr,       0,  0   },
 201   { "in",         nullptr,       0,  0   },
 202   { "inf",        "\\infty",  0,  0   },
 203   { "infinity",   "\\infty",  0,  0   },
 204   { "infty",      nullptr,       0,  0   },
 205   { "int",        nullptr,       0,  0   },
 206   { "integral",   "\\int",    0,  0   },
 207   { "inter",      "\\bigcap", 0,  0   },
 208   { "iota",       nullptr,       0,  EQ_CASE },
 209   { "iso",        nullptr,       0,  0   }, // ams
 210   { "it",         nullptr,       0,  0   },
 211   { "jmath",      nullptr,       0,  0   },
 212   { "kappa",      nullptr,       0,  EQ_CASE },
 213   { "ker",        nullptr,       0,  0   },
 214   { "lambda",     nullptr,       0,  EQ_CASE },
 215   { "land",       nullptr,       0,  0   }, // LATER
 216   { "langle",     nullptr,       0,  0   },
 217   { "larrow",     "\\leftarrow",  0,  EQ_CASE },
 218   { "lbrace",     nullptr,       0,  0   },
 219   { "lbrack",     "[",        0,  0   },
 220   { "lceil",      nullptr,       0,  0   },
 221   { "lcol",       nullptr,       0,  0   }, // LATER
 222   { "ldots",      nullptr,       0,  0   },
 223   { "le",         nullptr,       0,  0   },
 224   { "left",       nullptr,       0,  0   },
 225   { "leftarrow",  nullptr,       0,  EQ_CASE },
 226   { "leq",        nullptr,       0,  0   },
 227   { "lfloor",     nullptr,       0,  0   },
 228   { "lg",         nullptr,       0,  0   },
 229   { "lim",        nullptr,       0,  EQ_CASE },
 230   { "line",       "\\vert",   0,  0   },
 231   { "liter",      "\\ell",    0,  0   },
 232   { "lll",        nullptr,       0,  0   }, // ams
 233   { "ln",         nullptr,       0,  0   },
 234   { "log",        nullptr,       0,  0   },
 235   { "lor",        "\\vee",    0,  0   },
 236   { "lparen",     "(",        0,  0   },
 237   { "lpile",      nullptr,       0,  0   }, // LATER
 238   { "lrarrow",    "\\leftrightarrow",   0,  EQ_CASE },
 239   { "lrharpoons", "\\leftrightharpoons",0,  0   },
 240   { "mapsto",     nullptr,       0,  0   },
 241   { "massert",    "\\dashv",  0,  0   },
 242   { "matrix",     nullptr,       0,  EQ_ENV  },
 243   { "max",        nullptr,       0,  0   },
 244   { "mho",        nullptr,       0,  0   }, // ams
 245   { "min",        nullptr,       0,  0   },
 246   { "minusplus",  nullptr,       0,  0   },
 247   { "mit",        "",     0,  0   }, // font
 248   { "mod",        "\\bmod",   0,  0   },
 249   { "models",     nullptr,       0,  0   },
 250   { "msangle",    nullptr,       0,  0   }, // LATER
 251   { "mu",         nullptr,       0,  EQ_CASE },
 252   { "nabla",      nullptr,       0,  0   },
 253   { "ne",         nullptr,       0,  0   },
 254   { "nearrow",    nullptr,       0,  0   },
 255   { "neg",        nullptr,       0,  0   },
 256   { "neq",        nullptr,       0,  0   },
 257   { "nequiv",     nullptr,       0,  0   },
 258   { "ni",         nullptr,       0,  0   },
 259   { "not",        nullptr,       0,  0   },
 260   { "notin",      nullptr,       0,  0   },
 261   { "nu",         nullptr,       0,  EQ_CASE },
 262   { "nwarrow",    nullptr,       0,  0   },
 263   { "odiv",       nullptr,       0,  0   },
 264   { "odot",       nullptr,       0,  0   },
 265   { "oint",       nullptr,       0,  0   },
 266   { "omega",      nullptr,       0,  EQ_CASE },
 267   { "omicron",    nullptr,       0,  EQ_CASE },
 268   { "ominus",     nullptr,       0,  0   },
 269   { "oplus",      nullptr,       0,  0   },
 270   { "or ",        nullptr,       0,  0   },
 271   { "oslash",     nullptr,       0,  0   },
 272   { "otimes",     nullptr,       0,  0   },
 273   { "over",       nullptr,       1,  EQ_ATOP },
 274   { "overline",   nullptr,       1,  0   },
 275   { "owns",       "\\ni",     0,  0   },
 276   { "parallel",   nullptr,       0,  0   },
 277   { "partial",    nullptr,       0,  0   },
 278   { "phantom",    nullptr,       0,  0   },
 279   { "phi",        nullptr,       0,  EQ_CASE },
 280   { "pi",         nullptr,       0,  EQ_CASE },
 281   { "pile",       nullptr,       0,  0   }, // LATER
 282   { "plusminus",  "\\pm",     0,  0   },
 283   { "pmatrix",    nullptr,       0,  EQ_ENV  },
 284   { "prec",       nullptr,       0,  0   },
 285   { "prep",       nullptr,       0,  0   },
 286   { "prime",      nullptr,       0,  0   },
 287   { "prod",       nullptr,       0,  0   },
 288   { "propto",     nullptr,       0,  0   },
 289   { "psi",        nullptr,       0,  EQ_CASE },
 290   { "rangle",     nullptr,       0,  0   },
 291   { "rarrow",     "\\rightarrow", 0,  EQ_CASE },
 292   { "rbrace",     "]",        0,  0   },
 293   { "rbrace",     nullptr,       0,  0   },
 294   { "rceil",      nullptr,       0,  0   },
 295   { "rcol",       nullptr,       0,  0   }, // LATER
 296   { "real",       "\\Re",     0,  0   },
 297   { "reimage",    nullptr,       0,  0   },
 298   { "rel",        nullptr,       0,  0   },
 299   { "rfloor",     nullptr,       0,  0   },
 300   { "rho",        nullptr,       0,  EQ_CASE },
 301   { "right",      nullptr,       0,  0   },
 302   { "rightarrow", nullptr,       0,  EQ_CASE },
 303   { "rlharpoons", nullptr,       0,  0   },
 304   { "rm",         nullptr,       0,  0   },
 305   { "root",       "\\sqrt",   1,  0   },
 306   { "rparen",     ")",        0,  0   },
 307   { "rpile",      nullptr,       0,  0   }, // LATER
 308   { "rtangle",    nullptr,       0,  0   },
 309   { "sangle",     nullptr,       0,  0   },
 310   { "scale",      nullptr,       0,  0   },
 311   { "searrow",    nullptr,       0,  0   },
 312   { "sec",        nullptr,       0,  0   },
 313   { "sigma",      nullptr,       0,  EQ_CASE },
 314   { "sim",        nullptr,       0,  0   },
 315   { "simeq",      nullptr,       0,  0   },
 316   { "sin",        nullptr,       0,  0   },
 317   { "sinh",       nullptr,       0,  0   },
 318   { "slash",      nullptr,       0,  0   },
 319   { "smallint",   nullptr,       0,  0   },
 320   { "smallinter", nullptr,       0,  0   },
 321   { "smalloint",  nullptr,       0,  0   },
 322   { "smallprod",  nullptr,       0,  0   },
 323   { "smallsum",   nullptr,       0,  0   },
 324   { "smallunion", nullptr,       0,  0   },
 325   { "smcoprod",   nullptr,       0,  0   },
 326   { "sqcap",      nullptr,       0,  0   },
 327   { "sqcup",      nullptr,       0,  0   },
 328   { "sqrt",       nullptr,       1,  0   },
 329   { "sqsubset",   nullptr,       0,  0   },
 330   { "sqsubseteq", nullptr,       0,  0   },
 331   { "sqsupset",   nullptr,       0,  0   },
 332   { "sqsupseteq", nullptr,       0,  0   },
 333   { "star",       nullptr,       0,  0   },
 334   { "sub",        "_",        0,  0   },
 335   { "subset",     nullptr,       0,  0   },
 336   { "subseteq",   nullptr,       0,  0   },
 337   { "succ",       nullptr,       0,  0   },
 338   { "sum",        nullptr,       0,  0   },
 339   { "sup",        "^",        0,  0   },
 340   { "superset",   nullptr,       0,  0   },
 341   { "supset",     nullptr,       0,  0   },
 342   { "supseteq",   nullptr,       0,  0   },
 343   { "swarrow",    nullptr,       0,  0   },
 344   { "tan",        nullptr,       0,  0   },
 345   { "tanh",       nullptr,       0,  0   },
 346   { "tau",        nullptr,       0,  EQ_CASE },
 347   { "therefore",  nullptr,       0,  0   },
 348   { "theta",      nullptr,       0,  EQ_CASE },
 349   { "tilde",      "\\widetilde",  1,  0   },
 350   { "times",      nullptr,       0,  0   },
 351   { "to",         "^",        1,  0   },
 352   { "top",        nullptr,       0,  0   },
 353   { "triangle",   nullptr,       0,  0   },
 354   { "triangled",  nullptr,       0,  0   },
 355   { "trianglel",  nullptr,       0,  0   },
 356   { "triangler",  nullptr,       0,  0   },
 357   { "triangleu",  nullptr,       0,  0   },
 358   { "udarrow",    "\\updownarrow",0,  EQ_CASE },
 359   { "under",      "\\underline",  1,  0   },
 360   { "underline",  "\\underline",  1,  0   },
 361   { "union",      "\\bigcup", 0,  0   },
 362   { "uparrow",    nullptr,       0,  EQ_CASE },
 363   { "uplus",      nullptr,       0,  0   },
 364   { "upsilon",    nullptr,       0,  EQ_CASE },
 365   { "varepsilon", nullptr,       0,  0   },
 366   { "varphi",     nullptr,       0,  0   },
 367   { "varpi",      nullptr,       0,  0   },
 368   { "varrho",     nullptr,       0,  0   },
 369   { "varsigma",   nullptr,       0,  0   },
 370   { "vartheta",   nullptr,       0,  0   },
 371   { "varupsilon", nullptr,       0,  0   },
 372   { "vdash",      nullptr,       0,  0   },
 373   { "vdots",      nullptr,       0,  0   },
 374   { "vec",        nullptr,       1,  0   },
 375   { "vee",        nullptr,       0,  0   },
 376   { "vert",       nullptr,       0,  0   },
 377   { "wedge",      nullptr,       0,  0   },
 378   { "wp",         nullptr,       0,  0   },
 379   { "xi",         nullptr,       0,  EQ_CASE },
 380   { "xor",        nullptr,       0,  0   },
 381   { "zeta",       nullptr,       0,  EQ_CASE }
 382 };
 383
 384 static const hwpeq *lookup_eqn(char const *str)
 385 {
 386   static const int eqCount = SAL_N_ELEMENTS(eq_tbl);
 387   int l = 0, r = eqCount;
 388   const hwpeq *result = nullptr;
 389
 390   while( l < r ) {
 391     const int m = (l + r) / 2;
 392     const int k = strcmp(eq_tbl[m].key, str);
 393     if( k == 0 ) {
 394       result = eq_tbl + m;
 395       break;
 396     }
 397     else if( k < 0 )
 398       l = m + 1;
 399     else
 400       r = m;
 401   }
 402   return result;
 403 }
 404
 405 /* If only the first character is uppercase or all characters are uppercase, change to lowercase */
 406 static void make_keyword( char *keyword, std::string_view token)
 407 {
 408     char* ptr;
 409     bool result = true;
 410     int len = token.length();
 411     assert(keyword);
 412
 413     if( 255 < len )
 414     {
 415         len = 255;
 416     }
 417     memcpy(keyword, token.data(), len);
 418     keyword[len] = 0;
 419
 420     if( (token[0] & 0x80) || rtl::isAsciiLowerCase(static_cast<unsigned char>(token[0])) || token.length() < 2 )
 421         return;
 422
 423     bool capital = rtl::isAsciiUpperCase(
 424         static_cast<unsigned char>(keyword[1]));
 425     for( ptr = keyword + 2; *ptr && result; ptr++ )
 426     {
 427         if( (*ptr & 0x80) ||
 428             (!capital && rtl::isAsciiUpperCase(static_cast<unsigned char>(*ptr))) ||
 429             (capital && rtl::isAsciiLowerCase(static_cast<unsigned char>(*ptr))) )
 430         {
 431             result = false;
 432         }
 433     }
 434
 435     if( result )
 436     {
 437         ptr = keyword;
 438         while( *ptr )
 439         {
 440             if( rtl::isAsciiUpperCase(static_cast<unsigned char>(*ptr)) )
 441                 *ptr = sal::static_int_cast<char>(
 442                     rtl::toAsciiLowerCase(static_cast<unsigned char>(*ptr)));
 443             ptr++;
 444         }
 445     }
 446 }
 447
 448 namespace {
 449
 450 // token reading function
 451 struct eq_stack {
 452   OString  white;
 453   OString  token;
 454   std::istream   *strm;
 455
 456   eq_stack() { strm = nullptr; };
 457   bool state(std::istream const *s) {
 458     if( strm != s) { white.clear(); token.clear(); }
 459     return token.getLength() != 0;
 460   }
 461 };
 462
 463 }
 464
 465 static eq_stack *stk = nullptr;
 466
 467 static void push_token(OString const &white, OString const &token, std::istream *strm)
 468 {
 469   // one time stack
 470   assert(stk->token.getLength() == 0);
 471
 472   stk->white = white;
 473   stk->token = token;
 474   stk->strm = strm;
 475 }
 476
 477 /*
 478  * It returns the length of the read tokens.
 479  *
 480  * control char, control sequence, binary sequence,
 481  * alphabet string, single character */
 482 static int next_token(OString &white, OString &token, std::istream *strm)
 483 {
 484   std::istream::int_type ch = 0;
 485
 486   if( stk->state(strm) ) {
 487     white = stk->white;
 488     token = stk->token;
 489     stk->token.clear();
 490     stk->white.clear();
 491     return token.getLength();
 492   }
 493
 494   token.clear();
 495   white.clear();
 496   if( !strm->good() )
 497     return 0;
 498   ch = strm->get();
 499   if( ch == std::istream::traits_type::eof() )
 500     return 0;
 501
 502   // read preceding ws
 503   if( IS_WS(ch) ) {
 504     do
 505     {
 506         white += OStringChar(static_cast<char>(ch));
 507         ch = strm->get();
 508     } while (IS_WS(ch));
 509   }
 510
 511   if( ch == '\\' || ch & 0x80
 512       || (ch != std::istream::traits_type::eof() && rtl::isAsciiAlpha(ch)) )
 513   {
 514     if( ch == '\\' ) {
 515       token += OStringChar(static_cast<char>(ch));
 516       ch = strm->get();
 517     }
 518     do {
 519       token += OStringChar(static_cast<char>(ch));
 520       ch = strm->get();
 521     } while( ch != std::istream::traits_type::eof()
 522              && (ch & 0x80 || rtl::isAsciiAlpha(ch)) ) ;
 523     strm->putback(static_cast<char>(ch));
 524     /* special treatment of sub, sub, over, atop
 525        The reason for this is that affect next_state().
 526      */
 527     if( token.equalsIgnoreAsciiCase("sub") || token.equalsIgnoreAsciiCase("from") ||
 528     token.equalsIgnoreAsciiCase("sup") || token.equalsIgnoreAsciiCase("to") ||
 529     token.equalsIgnoreAsciiCase("over") || token.equalsIgnoreAsciiCase("atop") ||
 530     token.equalsIgnoreAsciiCase("left") || token.equalsIgnoreAsciiCase("right") )
 531     {
 532       char buf[256];
 533       make_keyword(buf, token);
 534       token = buf;
 535     }
 536     if( token == "sub" || token == "from" )
 537       token = "_"_ostr;
 538     if( token == "sup" || token == "to" )
 539       token = "^"_ostr;
 540   }
 541   else if( IS_BINARY(ch) ) {
 542     do
 543     {
 544         token += OStringChar(static_cast<char>(ch));
 545         ch = strm->get();
 546     }
 547     while( IS_BINARY(ch) );
 548     strm->putback(static_cast<char>(ch));
 549   }
 550   else if( ch != std::istream::traits_type::eof() && rtl::isAsciiDigit(ch) ) {
 551     do {
 552         token += OStringChar(static_cast<char>(ch));
 553         ch = strm->get();
 554     } while( ch != std::istream::traits_type::eof() && rtl::isAsciiDigit(ch) );
 555     strm->putback(static_cast<char>(ch));
 556   }
 557   else
 558     token += OStringChar(static_cast<char>(ch));
 559
 560   return token.getLength();
 561 }
 562
 563 static std::istream::int_type read_white_space(OString& outs, std::istream *strm)
 564 {
 565   std::istream::int_type result;
 566
 567   if( stk->state(strm) ) {
 568     outs += stk->white;
 569     stk->white.clear();
 570     result = std::istream::traits_type::to_int_type(stk->token[0]);
 571   }
 572   else {
 573     std::istream::int_type ch;
 574     for (;;)
 575     {
 576         ch = strm->get();
 577         if (!IS_WS(ch))
 578             break;
 579         outs += OStringChar(static_cast<char>(ch));
 580     }
 581     strm->putback(static_cast<char>(ch));
 582     result = ch;
 583   }
 584   return result;
 585 }
 586
 587 /* If the argument is not required, delimiters are space and brace for each items.
 588    sqrt {ab} c = sqrt {ab} c
 589    (,} are for grouping
 590    ^ ,_ are for combination
 591
 592    Sorting of formats with arguments, such as sqrt
 593       sqrt a -> sqrt {a}
 594       sqrt {a} -> sqrt {a}
 595    If there is more than one argument, it eliminates backslash between arguments.
 596       \frac a b -> frac {a} {b}
 597    Clean the form of over
 598       a over b -> {a} over {b}
 599  */
 600
 601 static int eq_word(OString& outs, std::istream *strm, int status)
 602 {
 603   OString  token, white, state;
 604   int       result;
 605   char      keyword[256];
 606   const hwpeq *eq;
 607
 608   next_token(white, token, strm);
 609   if (token.getLength() <= 0)
 610       return 0;
 611   result = token[0];
 612
 613   if( token == "{" ) {
 614     state += white + token;
 615     eq_sentence(state, strm, "}");
 616   }
 617   else if( token == "left" ) {
 618     state += white + token;
 619     next_token(white, token, strm);
 620     state += white + token;
 621
 622     eq_sentence(state, strm, "right");
 623
 624     next_token(white, token, strm);
 625     state += white + token;
 626   }
 627   else {
 628     /* Normal token */
 629     int script_status = SCRIPT_NONE;
 630     while( true ) {
 631       state += white + token;
 632       make_keyword(keyword, token);
 633       if( token[0] == '^' )
 634         script_status |= SCRIPT_SUP;
 635       else if( token[0] == '_' )
 636         script_status |= SCRIPT_SUB;
 637       else
 638         script_status = SCRIPT_NONE;
 639
 640       if( nullptr != (eq = lookup_eqn(keyword)) ) {
 641         int nargs = eq->nargs;
 642         while( nargs-- ) {
 643           const std::istream::int_type ch = read_white_space(state, strm);
 644           if( ch != '{' ) state += OStringChar('{');
 645           eq_word(state, strm, script_status);
 646           if( ch != '{' ) state += OStringChar('}');
 647         }
 648       }
 649
 650       if( !next_token(white, token, strm) )
 651         break;
 652       // end loop and restart with this
 653       if( (token[0] == '^' && status && !(status & SCRIPT_SUP)) ||
 654           (token[0] == '_' && status && !(status & SCRIPT_SUB)) ||
 655           "over" == token || "atop" == token ||
 656           strchr("{}#&`", token[0]) ||
 657           (!strchr("^_", token[0]) && white.getLength()) )
 658       {
 659         push_token(white, token, strm);
 660         break;
 661       }
 662     }
 663   }
 664   outs += state;
 665
 666   return result;
 667 }
 668
 669 static bool eq_sentence(OString& outs, std::istream *strm, const char *end)
 670 {
 671   OString  state;
 672   OString  white, token;
 673   bool      multiline = false;
 674
 675   read_white_space(outs, strm);
 676   while( eq_word(state, strm) ) {
 677     if( !next_token(white, token, strm) ||
 678     (end && token == end) )
 679     {
 680       state += white + token;
 681       break;
 682     }
 683     push_token(white, token, strm);
 684     if( token == "atop" || token == "over" )
 685       outs += OStringChar('{') + state + OStringChar('}');
 686     else {
 687       if( token == "#" )
 688         multiline = true;
 689       outs += state;
 690     }
 691     state.clear();
 692     read_white_space(outs, strm);
 693   }
 694   outs += state;
 695   return multiline;
 696 }
 697
 698 static char eq2ltxconv(OString& sstr, std::istream *strm, const char *sentinel)
 699 {
 700   OString  white, token;
 701   char      key[256];
 702   std::istream::int_type ch;
 703   int       result;
 704
 705   while( 0 != (result = next_token(white, token, strm)) ) {
 706     if( sentinel && (result == 1) && strchr(sentinel, token[0]) )
 707       break;
 708     make_keyword(key, token);
 709     const hwpeq *eq = nullptr;
 710     if( (eq = lookup_eqn(key)) != nullptr ) {
 711       const bool bUpperFollowingChar = ( (eq->flag & EQ_CASE)
 712           && rtl::isAsciiUpperCase(static_cast<unsigned char>(token[0])) );
 713
 714       if( eq->latex )
 715         token = eq->latex;
 716       else {
 717         token = OString::Concat("\\") + eq->key;
 718       }
 719
 720       if (bUpperFollowingChar)
 721         token = token.replaceAt(1, 1, token.copy(1, 1).toAsciiUpperCase());
 722     }
 723
 724     if( token[0] == '{' ) { // grouping
 725       sstr += white + token;
 726       eq2ltxconv(sstr, strm, "}");
 727       sstr += OStringChar('}');
 728     }
 729     else if( eq && (eq->flag & EQ_ENV) ) {
 730       next_token(white, token, strm);
 731       if( token[0] != '{' )
 732         return 0;
 733       sstr += OString::Concat("\\begin{") + eq->key + "}" SAL_NEWLINE_STRING ;
 734       eq2ltxconv(sstr, strm, "}");
 735       if( sstr[sstr.getLength() - 1] != '\n' )
 736         sstr += SAL_NEWLINE_STRING ;
 737       sstr += OString::Concat("\\end{") + eq->key + "}" SAL_NEWLINE_STRING ;
 738     }
 739     else if( eq && (eq->flag & EQ_ATOP) ) {
 740       if( sstr.getLength() == 0 )
 741         sstr += OStringChar('{');
 742       else {
 743         int pos  = sstr.lastIndexOf('}');
 744         if( 0 < pos)
 745           sstr = sstr.replaceAt(pos, 1, " ");
 746       }
 747       sstr += token;
 748       for (;;)
 749       {
 750         ch = strm->get();
 751         if ( ch == std::istream::traits_type::eof() || !IS_WS(ch) )
 752             break;
 753         sstr += OStringChar(static_cast<char>(ch));
 754       }
 755       if( ch != '{' )
 756         sstr += "{}";
 757       else {
 758         eq2ltxconv(sstr, strm, "}");
 759         sstr += OStringChar('}');
 760       }
 761     }
 762     else
 763       sstr += white + token;
 764   }
 765   return token[0];
 766 }
 767
 768 void eq2latex(OString& outs, char const *s)
 769 {
 770   assert(s);
 771   if( stk == nullptr )
 772     stk = new eq_stack;
 773
 774   OString  tstr;
 775
 776   std::istringstream tstrm(s);
 777   bool eqnarray = eq_sentence(tstr, &tstrm);
 778   std::istringstream strm((std::string(tstr)));
 779
 780   if( eqnarray )
 781     outs += "\\begin{array}{rllll}" SAL_NEWLINE_STRING;
 782   eq2ltxconv(outs, &strm, nullptr);
 783   outs += SAL_NEWLINE_STRING;
 784   if( eqnarray )
 785     outs += "\\end{array}" SAL_NEWLINE_STRING;
 786   delete stk;
 787   stk = nullptr;
 788 }
 789
 790 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */