winsup/cygwin/strfuncs.cc

   1 /* strfuncs.cc: string functions
   2
   3 This file is part of Cygwin.
   4
   5 This software is a copyrighted work licensed under the terms of the
   6 Cygwin license.  Please consult the file "CYGWIN_LICENSE" for
   7 details. */
   8
   9 #include "winsup.h"
  10 #include <stdlib.h>
  11 #include <sys/param.h>
  12 #include <wchar.h>
  13 #include <uchar.h>
  14 #include <ntdll.h>
  15 #include "path.h"
  16 #include "fhandler.h"
  17 #include "dtable.h"
  18 #include "cygheap.h"
  19
  20 /* Transform characters invalid for Windows filenames to the Unicode private
  21    use area in the U+f0XX range.  The affected characters are all control
  22    chars 1 <= c <= 31, as well as the characters " * : < > ? |.  The backslash
  23    is affected as well, but we can't transform it as long as we accept Win32
  24    paths as input. */
  25 static const WCHAR tfx_chars[] = {
  26  0xf000 |   0, 0xf000 |   1, 0xf000 |   2, 0xf000 |   3,
  27  0xf000 |   4, 0xf000 |   5, 0xf000 |   6, 0xf000 |   7,
  28  0xf000 |   8, 0xf000 |   9, 0xf000 |  10, 0xf000 |  11,
  29  0xf000 |  12, 0xf000 |  13, 0xf000 |  14, 0xf000 |  15,
  30  0xf000 |  16, 0xf000 |  17, 0xf000 |  18, 0xf000 |  19,
  31  0xf000 |  20, 0xf000 |  21, 0xf000 |  22, 0xf000 |  23,
  32  0xf000 |  24, 0xf000 |  25, 0xf000 |  26, 0xf000 |  27,
  33  0xf000 |  28, 0xf000 |  29, 0xf000 |  30, 0xf000 |  31,
  34           ' ',          '!', 0xf000 | '"',          '#',
  35           '$',          '%',          '&',           39,
  36           '(',          ')', 0xf000 | '*',          '+',
  37           ',',          '-',          '.',          '\\',
  38           '0',          '1',          '2',          '3',
  39           '4',          '5',          '6',          '7',
  40           '8',          '9', 0xf000 | ':',          ';',
  41  0xf000 | '<',          '=', 0xf000 | '>', 0xf000 | '?',
  42           '@',          'A',          'B',          'C',
  43           'D',          'E',          'F',          'G',
  44           'H',          'I',          'J',          'K',
  45           'L',          'M',          'N',          'O',
  46           'P',          'Q',          'R',          'S',
  47           'T',          'U',          'V',          'W',
  48           'X',          'Y',          'Z',          '[',
  49           '\\',          ']',          '^',          '_',
  50           '`',          'a',          'b',          'c',
  51           'd',          'e',          'f',          'g',
  52           'h',          'i',          'j',          'k',
  53           'l',          'm',          'n',          'o',
  54           'p',          'q',          'r',          's',
  55           't',          'u',          'v',          'w',
  56           'x',          'y',          'z',          '{',
  57  0xf000 | '|',          '}',          '~',          127
  58 };
  59
  60 /* This is the table for the reverse functionality in sys_wcstombs.
  61    It differs deliberately in two code places (space and dot) to allow
  62    converting back space and dot on filesystems only supporting DOS
  63    filenames. */
  64 static const WCHAR tfx_rev_chars[] = {
  65  0xf000 |   0, 0xf000 |   1, 0xf000 |   2, 0xf000 |   3,
  66  0xf000 |   4, 0xf000 |   5, 0xf000 |   6, 0xf000 |   7,
  67  0xf000 |   8, 0xf000 |   9, 0xf000 |  10, 0xf000 |  11,
  68  0xf000 |  12, 0xf000 |  13, 0xf000 |  14, 0xf000 |  15,
  69  0xf000 |  16, 0xf000 |  17, 0xf000 |  18, 0xf000 |  19,
  70  0xf000 |  20, 0xf000 |  21, 0xf000 |  22, 0xf000 |  23,
  71  0xf000 |  24, 0xf000 |  25, 0xf000 |  26, 0xf000 |  27,
  72  0xf000 |  28, 0xf000 |  29, 0xf000 |  30, 0xf000 |  31,
  73  0xf000 | ' ',          '!', 0xf000 | '"',          '#',
  74           '$',          '%',          '&',           39,
  75           '(',          ')', 0xf000 | '*',          '+',
  76           ',',          '-', 0xf000 | '.',          '\\',
  77           '0',          '1',          '2',          '3',
  78           '4',          '5',          '6',          '7',
  79           '8',          '9', 0xf000 | ':',          ';',
  80  0xf000 | '<',          '=', 0xf000 | '>', 0xf000 | '?',
  81           '@',          'A',          'B',          'C',
  82           'D',          'E',          'F',          'G',
  83           'H',          'I',          'J',          'K',
  84           'L',          'M',          'N',          'O',
  85           'P',          'Q',          'R',          'S',
  86           'T',          'U',          'V',          'W',
  87           'X',          'Y',          'Z',          '[',
  88           '\\',          ']',          '^',          '_',
  89           '`',          'a',          'b',          'c',
  90           'd',          'e',          'f',          'g',
  91           'h',          'i',          'j',          'k',
  92           'l',          'm',          'n',          'o',
  93           'p',          'q',          'r',          's',
  94           't',          'u',          'v',          'w',
  95           'x',          'y',          'z',          '{',
  96  0xf000 | '|',          '}',          '~',          127
  97 };
  98
  99 void
 100 transform_chars (PWCHAR path, PWCHAR path_end)
 101 {
 102   for (; path <= path_end; ++path)
 103     if (*path < 128)
 104       *path = tfx_chars[*path];
 105 }
 106
 107 PWCHAR
 108 transform_chars_af_unix (PWCHAR out, const char *path, __socklen_t len)
 109 {
 110   len -= sizeof (__sa_family_t);
 111   for (const unsigned char *p = (const unsigned char *) path; len-- > 0; ++p)
 112     *out++ = (*p <= 0x7f) ? tfx_chars[*p] : *p;
 113   return out;
 114 }
 115
 116 /* convert wint_t string to wchar_t string.  Make sure dest
 117    has room for at least twice as much characters to account
 118    for surrogate pairs, plus a wchar_t NUL. */
 119 extern "C" void
 120 wcintowcs (wchar_t *dest, wint_t *src, size_t len)
 121 {
 122   while (*src && len-- > 0)
 123     if (*src > 0xffff)
 124       {
 125         *dest++ = ((*src - 0x10000) >> 10) + 0xd800;
 126         *dest++ = ((*src++ - 0x10000) & 0x3ff) + 0xdc00;
 127       }
 128     else
 129         *dest++ = *src++;
 130   *dest = '\0';
 131 }
 132
 133 /* replacement function for wcrtomb, converting a UTF-32 char to a
 134    multibyte string. */
 135 extern "C" size_t
 136 c32rtomb (char *s, char32_t wc, mbstate_t *ps)
 137 {
 138   if (ps == NULL)
 139     {
 140       _REENT_CHECK_MISC(_REENT);
 141       ps = &(_REENT_C32RTOMB_STATE(_REENT));
 142     }
 143
 144     /* If s is NULL, behave as if s pointed to an internal buffer and wc
 145        was a null wide character (L'').  wcrtomb will do that for us*/
 146     if (wc <= 0xffff || !s)
 147       return wcrtomb (s, (wchar_t) wc, ps);
 148
 149     wchar_t wc_arr[2];
 150     const wchar_t *wcp = wc_arr;
 151
 152     wc -= 0x10000;
 153     wc_arr[0] = (wc >> 10) + 0xd800;
 154     wc_arr[1] = (wc & 0x3ff) + 0xdc00;
 155     return wcsnrtombs (s, &wcp, 2, SIZE_MAX, ps);
 156 }
 157
 158 extern "C" size_t
 159 c16rtomb (char *s, char16_t wc, mbstate_t *ps)
 160 {
 161   if (ps == NULL)
 162     {
 163       _REENT_CHECK_MISC(_REENT);
 164       ps = &(_REENT_C16RTOMB_STATE(_REENT));
 165     }
 166
 167   return wcrtomb (s, (wchar_t) wc, ps);
 168 }
 169
 170 extern "C" size_t
 171 c8rtomb (char *s, char8_t c8, mbstate_t *ps)
 172 {
 173   struct _reent *reent = _REENT;
 174   char32_t wc;
 175
 176   if (ps == NULL)
 177     {
 178       _REENT_CHECK_MISC(reent);
 179       ps = &(_REENT_C8RTOMB_STATE(reent));
 180     }
 181
 182   if (s == NULL)
 183     {
 184       ps->__count = 0;
 185       return 1;
 186     }
 187   if ((ps->__count & 0xff00) != 0xc800)
 188     {
 189       switch (c8)
 190         {
 191         case 0 ... 0x7f:        /* single octet */
 192           ps->__count = 0;
 193           wc = c8;
 194           break;
 195         case 0xc2 ... 0xf4:     /* valid lead byte */
 196           ps->__count = 0xc801;
 197           ps->__value.__wchb[0] = c8;
 198           return 0;
 199         default:
 200           goto ilseq;
 201         }
 202     }
 203   else
 204     {
 205       /* We already collected something... */
 206       int idx = ps->__count & 0x3;
 207       char8_t &c1 = ps->__value.__wchb[0];
 208       char8_t &c2 = ps->__value.__wchb[1];
 209       char8_t &c3 = ps->__value.__wchb[2];
 210
 211       switch (idx)
 212         {
 213           case 1:
 214             /* Annoyingly complex check for validity for 2nd octet. */
 215             if (c8 <= 0x7f || c8 >= 0xc0)
 216               goto ilseq;
 217             if (c1 == 0xe0 && c8 <= 0x9f)
 218               goto ilseq;
 219             if (c1 == 0xed && c8 >= 0xa0)
 220               goto ilseq;
 221             if (c1 == 0xf0 && c8 <= 0x8f)
 222               goto ilseq;
 223             if (c1 == 0xf4 && c8 >= 0x90)
 224               goto ilseq;
 225             if (c1 >= 0xe0)
 226               {
 227                 ps->__count = 0xc802;
 228                 c2 = c8;
 229                 return 0;
 230               }
 231             wc =   ((c1 & 0x1f) << 6)
 232                  |  (c8 & 0x3f);
 233             break;
 234           case 2:
 235             if (c8 <= 0x7f || c8 >= 0xc0)
 236               goto ilseq;
 237             if (c1 >= 0xf0)
 238               {
 239                 ps->__count = 0xc803;
 240                 c3 = c8;
 241                 return 0;
 242               }
 243             wc =   ((c1 & 0x0f) << 12)
 244                  | ((c2 & 0x3f) <<  6)
 245                  |  (c8 & 0x3f);
 246             break;
 247           case 3:
 248             if (c8 <= 0x7f || c8 >= 0xc0)
 249               goto ilseq;
 250             wc =   ((c1 & 0x07) << 18)
 251                  | ((c2 & 0x3f) << 12)
 252                  | ((c3 & 0x3f) <<  6)
 253                  |  (c8 & 0x3f);
 254             break;
 255           default: /* Shouldn't happen */
 256             goto ilseq;
 257         }
 258     }
 259   ps->__count = 0;
 260   return c32rtomb (s, wc, ps);
 261 ilseq:
 262   ps->__count = 0;
 263   _REENT_ERRNO(reent) = EILSEQ;
 264   return (size_t)(-1);
 265 }
 266
 267 extern "C" size_t
 268 mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
 269 {
 270   size_t len, len2;
 271   wchar_t w1, w2;
 272
 273   if (ps == NULL)
 274     {
 275       _REENT_CHECK_MISC(_REENT);
 276       ps = &(_REENT_MBRTOC32_STATE(_REENT));
 277     }
 278
 279   len = mbrtowc (&w1, s, n, ps);
 280   if (len == (size_t) -1 || len == (size_t) -2)
 281     return len;
 282   if (pwc && s)
 283     *pwc = w1;
 284   /* Convert surrogate pair to wint_t value */
 285   if (len > 0 && w1 >= 0xd800 && w1 <= 0xdbff)
 286     {
 287       s += len;
 288       n -= len;
 289       len2 = mbrtowc (&w2, s, n, ps);
 290       if (len2 > 0 && w2 >= 0xdc00 && w2 <= 0xdfff)
 291         {
 292           len += len2;
 293           if (pwc && s)
 294             *pwc = (((w1 & 0x3ff) << 10) | (w2 & 0x3ff)) + 0x10000;
 295         }
 296       else
 297         {
 298           len = (size_t) -1;
 299           errno = EILSEQ;
 300         }
 301     }
 302   return len;
 303 }
 304
 305 /* Like mbrtowc, but we already defined how to return a surrogate, and
 306    the definition of mbrtoc16 differes from that.
 307    Return the high surrogate with a return value representing the length
 308    of the entire multibyte sequence, and in the next call return the low
 309    surrogate with a return value of -3. */
 310 extern "C" size_t
 311 mbrtoc16 (char16_t *pwc, const char *s, size_t n, mbstate_t *ps)
 312 {
 313   int retval = 0;
 314   struct _reent *reent = _REENT;
 315   wchar_t wc;
 316
 317   if (ps == NULL)
 318     {
 319       _REENT_CHECK_MISC(reent);
 320       ps = &(_REENT_MBRTOC16_STATE(reent));
 321     }
 322
 323   if (s == NULL)
 324     retval = __MBTOWC (reent, NULL, "", 1, ps);
 325   else if (ps->__count == 0xdc00)
 326     {
 327       /* Return stored second half of the surrogate. */
 328       if (pwc)
 329         *pwc = ps->__value.__wch;
 330       ps->__count = 0;
 331       return -3;
 332     }
 333   else
 334     retval = __MBTOWC (reent, &wc, s, n, ps);
 335
 336   if (retval == -1)
 337     goto ilseq;
 338
 339   if (pwc)
 340     *pwc = wc;
 341   /* Did we catch the first half of a surrogate? */
 342   if (wc >= 0xd800 && wc <= 0xdbff)
 343     {
 344       if (n <= (size_t) retval)
 345         goto ilseq;
 346       int r2 = __MBTOWC (reent, &wc, s + retval, n, ps);
 347       if (r2 == -1)
 348         goto ilseq;
 349       /* Store second half of the surrogate in state, and return the
 350          length of the entire multibyte sequence. */
 351       ps->__count = 0xdc00;
 352       ps->__value.__wch = wc;
 353       retval += r2;
 354     }
 355   return (size_t)retval;
 356
 357 ilseq:
 358   ps->__count = 0;
 359   _REENT_ERRNO(reent) = EILSEQ;
 360   return (size_t)(-1);
 361 }
 362
 363 extern "C" size_t
 364 mbrtoc8 (char8_t *pc8, const char *s, size_t n, mbstate_t *ps)
 365 {
 366   struct _reent *reent = _REENT;
 367   size_t len;
 368   char32_t wc;
 369
 370   if (ps == NULL)
 371     {
 372       _REENT_CHECK_MISC(reent);
 373       ps = &(_REENT_MBRTOC8_STATE(reent));
 374     }
 375
 376   if (s == NULL)
 377     {
 378       if (ps)
 379         ps->__count = 0;
 380       return 1;
 381     }
 382   else if ((ps->__count & 0xff00) == 0xc800)
 383     {
 384       /* Return next utf-8 octet in line. */
 385       int idx = ps->__count & 0x3;
 386
 387       if (pc8)
 388         *pc8 = ps->__value.__wchb[--idx];
 389       if (idx == 0)
 390         ps->__count = 0;
 391       return -3;
 392     }
 393   len = mbrtoc32 (&wc, s, n, ps);
 394   if (len > 0)
 395     {
 396       /* octets stored back to front for easier indexing */
 397       switch (wc)
 398         {
 399         case 0 ... 0x7f:
 400           ps->__value.__wchb[0] = wc;
 401           ps->__count = 0;
 402           break;
 403         case 0x80 ... 0x7ff:
 404           ps->__value.__wchb[1] = 0xc0 | ((wc & 0x7c0) >> 6);
 405           ps->__value.__wchb[0] = 0x80 |  (wc &  0x3f);
 406           ps->__count = 0xc800 | 1;
 407           break;
 408         case 0x800 ... 0xffff:
 409           ps->__value.__wchb[2] = 0xe0 | ((wc & 0xf000) >> 12);
 410           ps->__value.__wchb[1] = 0x80 | ((wc &  0xfc0) >> 6);
 411           ps->__value.__wchb[0] = 0x80 |  (wc &   0x3f);
 412           ps->__count = 0xc800 | 2;
 413           break;
 414         case 0x10000 ... 0x10ffff:
 415           ps->__value.__wchb[3] = 0xf0 | ((wc & 0x1c0000) >> 18);
 416           ps->__value.__wchb[2] = 0x80 | ((wc &  0x3f000) >> 12);
 417           ps->__value.__wchb[1] = 0x80 | ((wc &    0xfc0) >> 6);
 418           ps->__value.__wchb[0] = 0x80 |  (wc &     0x3f);
 419           ps->__count = 0xc800 | 3;
 420           break;
 421         default:
 422           ps->__count = 0;
 423           _REENT_ERRNO(reent) = EILSEQ;
 424           return (size_t)(-1);
 425         }
 426       if (pc8)
 427         *pc8 = ps->__value.__wchb[ps->__count & 0x3];
 428     }
 429   return len;
 430 }
 431
 432 extern "C" size_t
 433 mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps)
 434 {
 435   wint_t *ptr = dst;
 436   const char *tmp_src;
 437   size_t max;
 438   size_t count = 0;
 439   size_t bytes;
 440
 441   if (dst == NULL)
 442     {
 443       /* Ignore original len value and do not alter src pointer if the
 444          dst pointer is NULL.  */
 445       len = (size_t)-1;
 446       tmp_src = *src;
 447       src = &tmp_src;
 448     }
 449   max = len;
 450   while (len > 0)
 451     {
 452       bytes = mbrtowi (ptr, *src, MB_CUR_MAX, ps);
 453       if (bytes > 0)
 454         {
 455           *src += bytes;
 456           nms -= bytes;
 457           ++count;
 458           ptr = (dst == NULL) ? NULL : ptr + 1;
 459           --len;
 460         }
 461       else if (bytes == 0)
 462         {
 463           *src = NULL;
 464           return count;
 465         }
 466       else
 467         {
 468           /* Deviation from standard: If the input is broken, the output
 469              will be broken.  I. e., we just copy the current byte over
 470              into the wint_t destination and try to pick up on the next
 471              byte.  This is in line with the way fnmatch works. */
 472           ps->__count = 0;
 473           if (dst)
 474             {
 475               *ptr++ = (const wint_t) *(*src)++;
 476               ++count;
 477               --nms;
 478               --len;
 479             }
 480         }
 481     }
 482   return (size_t) max;
 483 }
 484
 485 /* The SJIS, JIS and eucJP conversion in newlib does not use UTF as
 486    wchar_t character representation.  That's unfortunate for us since
 487    we require UTF for the OS.  What we do here is to have our own
 488    implementation of the base functions for the conversion using
 489    the MulitByteToWideChar/WideCharToMultiByte functions. */
 490
 491 /* FIXME: We can't support JIS (ISO-2022-JP) at all right now.  It's a
 492    stateful charset encoding.  The translation from mbtowc to
 493    MulitByteToWideChar is quite complex.  Given that we support SJIS and
 494    eucJP, the both most used Japanese charset encodings, this shouldn't
 495    be such a big problem. */
 496
 497 /* GBK, GB18030, eucKR, and Big5 conversions are not available so far
 498    in newlib. */
 499
 500 static int
 501 __db_wctomb (struct _reent *r, char *s, wchar_t wchar, UINT cp)
 502 {
 503   if (s == NULL)
 504     return 0;
 505
 506   if (wchar < 0x80)
 507     {
 508       *s = (char) wchar;
 509       return 1;
 510     }
 511
 512   BOOL def_used = false;
 513   int ret = WideCharToMultiByte (cp, WC_NO_BEST_FIT_CHARS, &wchar, 1, s,
 514                                  2, NULL, &def_used);
 515   if (ret > 0 && !def_used)
 516     return ret;
 517
 518   _REENT_ERRNO(r) = EILSEQ;
 519   return -1;
 520 }
 521
 522 extern "C" int
 523 __sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 524 {
 525   return __db_wctomb (r,s, wchar, 932);
 526 }
 527
 528 extern "C" int
 529 __eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 530 {
 531   /* Unfortunately, the Windows eucJP codepage 20932 is not really 100%
 532      compatible to eucJP.  It's a cute approximation which makes it a
 533      doublebyte codepage.
 534      The JIS-X-0212 three byte codes (0x8f,0xa1-0xfe,0xa1-0xfe) are folded
 535      into two byte codes as follows: The 0x8f is stripped, the next byte is
 536      taken as is, the third byte is mapped into the lower 7-bit area by
 537      masking it with 0x7f.  So, for instance, the eucJP code 0x8f,0xdd,0xf8
 538      becomes 0xdd,0x78 in CP 20932.
 539
 540      To be really eucJP compatible, we have to map the JIS-X-0212 characters
 541      between CP 20932 and eucJP ourselves. */
 542   if (s == NULL)
 543     return 0;
 544
 545   if (wchar < 0x80)
 546     {
 547       *s = (char) wchar;
 548       return 1;
 549     }
 550
 551   BOOL def_used = false;
 552   int ret = WideCharToMultiByte (20932, WC_NO_BEST_FIT_CHARS, &wchar, 1, s,
 553                                  3, NULL, &def_used);
 554   if (ret > 0 && !def_used)
 555     {
 556       /* CP20932 representation of JIS-X-0212 character? */
 557       if (ret == 2 && (unsigned char) s[1] <= 0x7f)
 558         {
 559           /* Yes, convert to eucJP three byte sequence */
 560           s[2] = s[1] | 0x80;
 561           s[1] = s[0];
 562           s[0] = 0x8f;
 563           ++ret;
 564         }
 565       return ret;
 566     }
 567
 568   _REENT_ERRNO(r) = EILSEQ;
 569   return -1;
 570 }
 571
 572 extern "C" int
 573 __gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 574 {
 575   return __db_wctomb (r,s, wchar, 936);
 576 }
 577
 578 extern "C" int
 579 __gb18030_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 580 {
 581   int ret;
 582   wchar_t wres[2];
 583
 584   if (s == NULL)
 585     return 0;
 586
 587   if (state->__count == 0)
 588     {
 589       if (wchar <= 0x7f)
 590         {
 591           *s = (char) wchar;
 592           return 1;
 593         }
 594
 595       if (wchar >= 0xd800 && wchar <= 0xdbff)
 596         {
 597           /* First half of a surrogate pair */
 598           state->__count = 18030;
 599           state->__value.__wch = wchar;
 600           return 0;
 601         }
 602       ret = WideCharToMultiByte (54936, WC_ERR_INVALID_CHARS, &wchar, 1, s,
 603                                  4, NULL, NULL);
 604       if (ret > 0)
 605         return ret;
 606       goto ilseq;
 607     }
 608   else if (state->__count == 18030 && state->__value.__wch >= 0xd800
 609            && state->__value.__wch <= 0xdbff)
 610     {
 611       if (wchar >= 0xdc00 && wchar <= 0xdfff)
 612         {
 613           /* Create multibyte sequence from full surrogate pair. */
 614           wres[0] = state->__value.__wch;
 615           wres[1] = wchar;
 616           ret = WideCharToMultiByte (54936, WC_ERR_INVALID_CHARS, wres, 2, s, 4,
 617                                      NULL, NULL);
 618           if (ret > 0)
 619             {
 620               state->__count = 0;
 621               return ret;
 622             }
 623         }
 624 ilseq:
 625       _REENT_ERRNO(r) = EILSEQ;
 626       return -1;
 627     }
 628   _REENT_ERRNO(r) = EINVAL;
 629   return -1;
 630 }
 631
 632 extern "C" int
 633 __kr_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 634 {
 635   return __db_wctomb (r,s, wchar, 949);
 636 }
 637
 638 extern "C" int
 639 __big5_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
 640 {
 641   return __db_wctomb (r,s, wchar, 950);
 642 }
 643
 644 static int
 645 __db_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, UINT cp,
 646              mbstate_t *state)
 647 {
 648   wchar_t dummy;
 649   int ret;
 650
 651   if (s == NULL)
 652     return 0;  /* not state-dependent */
 653
 654   if (n == 0)
 655     return -2;
 656
 657   if (pwc == NULL)
 658     pwc = &dummy;
 659
 660   if (state->__count == 0)
 661     {
 662       if (*(unsigned char *) s < 0x80)
 663         {
 664           *pwc = *(unsigned char *) s;
 665           return *s ? 1 : 0;
 666         }
 667       size_t cnt = MIN (n, 2);
 668       ret = MultiByteToWideChar (cp, MB_ERR_INVALID_CHARS, s, cnt, pwc, 1);
 669       if (ret)
 670         return cnt;
 671       if (n == 1)
 672         {
 673           state->__count = n;
 674           state->__value.__wchb[0] = *s;
 675           return -2;
 676         }
 677       /* These Win32 functions are really crappy.  Assuming n is 2 but the
 678          first byte is a singlebyte charcode, the function does not convert
 679          that byte and return 1, rather it just returns 0.  So, what we do
 680          here is to check if the first byte returns a valid value... */
 681       else if (MultiByteToWideChar (cp, MB_ERR_INVALID_CHARS, s, 1, pwc, 1))
 682         return 1;
 683       _REENT_ERRNO(r) = EILSEQ;
 684       return -1;
 685     }
 686   state->__value.__wchb[state->__count] = *s;
 687   ret = MultiByteToWideChar (cp, MB_ERR_INVALID_CHARS,
 688                              (const char *) state->__value.__wchb, 2, pwc, 1);
 689   if (!ret)
 690     {
 691       _REENT_ERRNO(r) = EILSEQ;
 692       return -1;
 693     }
 694   state->__count = 0;
 695   return 1;
 696 }
 697
 698 extern "C" int
 699 __sjis_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
 700                mbstate_t *state)
 701 {
 702   return __db_mbtowc (r, pwc, s, n, 932, state);
 703 }
 704
 705 extern "C" int
 706 __eucjp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
 707                 mbstate_t *state)
 708 {
 709   /* See comment in __eucjp_wctomb above. */
 710   wchar_t dummy;
 711   int ret = 0;
 712
 713   if (s == NULL)
 714     return 0;  /* not state-dependent */
 715
 716   if (n == 0)
 717     return -2;
 718
 719   if (pwc == NULL)
 720     pwc = &dummy;
 721
 722   if (state->__count == 0)
 723     {
 724       if (*(unsigned char *) s < 0x80)
 725         {
 726           *pwc = *(unsigned char *) s;
 727           return *s ? 1 : 0;
 728         }
 729       if (*(unsigned char *) s == 0x8f) /* JIS-X-0212 lead byte? */
 730         {
 731           /* Yes.  Store sequence in mbstate and handle in the __count != 0
 732              case at the end of the function. */
 733           size_t i;
 734           for (i = 0; i < 3 && i < n; i++)
 735             state->__value.__wchb[i] = s[i];
 736           if ((state->__count = i) < 3) /* Incomplete sequence? */
 737             return -2;
 738           ret = 3;
 739           goto jis_x_0212;
 740         }
 741       size_t cnt = MIN (n, 2);
 742       if (MultiByteToWideChar (20932, MB_ERR_INVALID_CHARS, s, cnt, pwc, 1))
 743         return cnt;
 744       if (n == 1)
 745         {
 746           state->__count = 1;
 747           state->__value.__wchb[0] = *s;
 748           return -2;
 749         }
 750       else if (MultiByteToWideChar (20932, MB_ERR_INVALID_CHARS, s, 1, pwc, 1))
 751         return 1;
 752       _REENT_ERRNO(r) = EILSEQ;
 753       return -1;
 754     }
 755   state->__value.__wchb[state->__count++] = *s;
 756   ret = 1;
 757 jis_x_0212:
 758   if (state->__value.__wchb[0] == 0x8f)
 759     {
 760       if (state->__count == 2)
 761         {
 762           if (n == 1)
 763             return -2;
 764           state->__value.__wchb[state->__count] = s[1];
 765           ret = 2;
 766         }
 767       /* Ok, we have a full JIS-X-0212 sequence in mbstate.  Convert it
 768          to the CP 20932 representation and feed it to MultiByteToWideChar. */
 769       state->__value.__wchb[0] = state->__value.__wchb[1];
 770       state->__value.__wchb[1] = state->__value.__wchb[2] & 0x7f;
 771     }
 772   if (!MultiByteToWideChar (20932, MB_ERR_INVALID_CHARS,
 773                             (const char *) state->__value.__wchb, 2, pwc, 1))
 774     {
 775       _REENT_ERRNO(r) = EILSEQ;
 776       return -1;
 777     }
 778   state->__count = 0;
 779   return ret;
 780 }
 781
 782 extern "C" int
 783 __gbk_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
 784               mbstate_t *state)
 785 {
 786   return __db_mbtowc (r, pwc, s, n, 936, state);
 787 }
 788
 789 extern "C" int
 790 __gb18030_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
 791                   mbstate_t *state)
 792 {
 793   wchar_t wres[2], dummy;
 794   unsigned char ch;
 795   int ret, len, ocount;
 796   size_t ncopy;
 797
 798   if (state->__count < 0 || (state->__count > (int) sizeof state->__value.__wchb
 799                              && state->__count != 18030))
 800     {
 801       errno = EINVAL;
 802       return -1;
 803     }
 804
 805   if (s == NULL)
 806     {
 807       s = "";
 808       n = 1;
 809       pwc = NULL;
 810     }
 811
 812   if (state->__count == 18030)
 813     {
 814       /* Return second half of the surrogate pair */
 815       *pwc = state->__value.__wch;
 816       state->__count = 0;
 817       return 1;
 818     }
 819
 820   ncopy = MIN (MIN (n, MB_CUR_MAX),
 821                sizeof state->__value.__wchb - state->__count);
 822   memcpy (state->__value.__wchb + state->__count, s, ncopy);
 823   ocount = state->__count;
 824   state->__count += ncopy;
 825   s = (char *) state->__value.__wchb;
 826   n = state->__count;
 827
 828   if (n == 0) /* Incomplete multibyte sequence */
 829     return -2;
 830
 831   if (!pwc)
 832     pwc = &dummy;
 833
 834   /* Check if input is a valid GB18030 char (per FreeBSD):
 835    * Single byte:         [00-7f]
 836    * Two byte:            [81-fe][40-7e,80-fe]
 837    * Four byte:           [81-fe][30-39][81-fe][30-39]
 838    */
 839   ch = *(unsigned char *) s;
 840   if (ch <= 0x7f)
 841     {
 842       *pwc = ch;
 843       state->__count = 0;
 844       return ch ? 1 : 0;
 845     }
 846   if (ch >= 0x81 && ch <= 0xfe)
 847     {
 848       if (n < 2)
 849         return -2;
 850       ch = (unsigned char) s[1];
 851       if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe))
 852         len = 2;
 853       else if (ch >= 0x30 && ch <= 0x39)
 854         {
 855           if (n < 3)
 856             return -2;
 857           ch = (unsigned char) s[2];
 858           if (ch < 0x81 || ch > 0xfe)
 859             goto ilseq;
 860           if (n < 4)
 861             return -2;
 862           ch = (unsigned char) s[3];
 863           if (ch < 0x30 || ch > 0x39)
 864             goto ilseq;
 865           len = 4;
 866         }
 867       else
 868         goto ilseq;
 869     }
 870   else
 871     goto ilseq;
 872   ret = MultiByteToWideChar (54936, MB_ERR_INVALID_CHARS, s, len, wres, 2);
 873   if (ret)
 874     {
 875       *pwc = wres[0];
 876       if (ret == 2)
 877         {
 878           /* Surrogate pair. Store second half for later and return
 879              first half. Return real count - 1, return 1 when the second
 880              half of the pair is returned in the next run. */
 881           state->__count = 18030;
 882           state->__value.__wch = wres[1];
 883           --len;
 884         }
 885       else
 886         state->__count = 0;
 887       return len - ocount;
 888     }
 889 ilseq:
 890   _REENT_ERRNO(r) = EILSEQ;
 891   return -1;
 892 }
 893
 894 extern "C" int
 895 __kr_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
 896              mbstate_t *state)
 897 {
 898   return __db_mbtowc (r, pwc, s, n, 949, state);
 899 }
 900
 901 extern "C" int
 902 __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
 903                mbstate_t *state)
 904 {
 905   return __db_mbtowc (r, pwc, s, n, 950, state);
 906 }
 907
 908 /* Our own sys_wcstombs/sys_mbstowcs functions differ from the
 909    wcstombs/mbstowcs API in three ways:
 910
 911    - The UNICODE private use area is used in filenames to specify
 912      characters not allowed in Windows filenames ('*', '?', etc).
 913      The sys_wcstombs converts characters in the private use area
 914      back to the corresponding ASCII chars.
 915
 916    - If a wide character in a filename has no representation in the current
 917      multibyte charset, then usually you wouldn't be able to access the
 918      file.  To fix this problem, sys_wcstombs creates a replacement multibyte
 919      sequences for the non-representable wide-char.  The sequence starts with
 920      an ASCII CAN (0x18, Ctrl-X), followed by the UTF-8 representation of the
 921      character.  The sys_(cp_)mbstowcs function detects ASCII CAN characters
 922      in the input multibyte string and converts the following multibyte
 923      sequence in by treating it as an UTF-8 char.  If that fails, the ASCII
 924      CAN was probably standalone and it gets just copied over as ASCII CAN.
 925
 926    - Three cases have to be distinguished for the return value:
 927
 928      - dst == NULL; len is ignored, the return value is the number of bytes
 929        required for the string without the trailing NUL, just like the return
 930        value of the wcstombs function.
 931
 932      - dst != NULL, len == (size_t) -1; the return value is the size in bytes
 933        of the destination string without the trailing NUL.  If the incoming
 934        wide char string was not NUL-terminated, the target string won't be
 935        NUL-terminated either.
 936
 937      - dst != NULL; len != (size_t) -1; the return value is the size in bytes
 938        of the destination string without the trailing NUL.  The target string
 939        will be NUL-terminated, no matter what.  If the result is truncated due
 940        to buffer size, it's a bug in Cygwin and the buffer in the calling
 941        function should be raised.
 942 */
 943 size_t
 944 _sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
 945                bool is_path)
 946 {
 947   char buf[10];
 948   char *ptr = dst;
 949   wchar_t *pwcs = (wchar_t *) src;
 950   size_t n = 0;
 951   mbstate_t ps;
 952   save_errno save;
 953   wctomb_p f_wctomb = __WCTOMB;
 954
 955   if (f_wctomb == __ascii_wctomb)
 956     f_wctomb = __utf8_wctomb;
 957   memset (&ps, 0, sizeof ps);
 958   if (dst == NULL)
 959     len = (size_t) -1;
 960   while (n < len && nwc-- > 0)
 961     {
 962       wchar_t pw = *pwcs;
 963       int bytes;
 964       unsigned char cwc;
 965
 966       /* Convert UNICODE private use area.  Reverse functionality for the
 967          ASCII area <= 0x7f (only for path names) is transform_chars above.
 968          Reverse functionality for invalid bytes in a multibyte sequence is
 969          in _sys_mbstowcs below. */
 970       if (is_path && (pw & 0xff00) == 0xf000
 971           && (((cwc = (pw & 0xff)) <= 0x7f && tfx_rev_chars[cwc] >= 0xf000)
 972               || (cwc >= 0x80 && MB_CUR_MAX > 1)))
 973         {
 974           buf[0] = (char) cwc;
 975           bytes = 1;
 976         }
 977       else
 978         {
 979           bytes = f_wctomb (_REENT, buf, pw, &ps);
 980           if (bytes == -1 && f_wctomb != __utf8_wctomb)
 981             {
 982               /* Convert chars invalid in the current codepage to a sequence
 983                  ASCII CAN; UTF-8 representation of invalid char. */
 984               buf[0] = 0x18; /* ASCII CAN */
 985               bytes = __utf8_wctomb (_REENT, buf + 1, pw, &ps);
 986               if (bytes == -1)
 987                 {
 988                   ++pwcs;
 989                   ps.__count = 0;
 990                   continue;
 991                 }
 992               ++bytes; /* Add the ASCII CAN to the byte count. */
 993               if (ps.__count == -4 && nwc > 0)
 994                 {
 995                   /* First half of a surrogate pair. */
 996                   ++pwcs;
 997                   if ((*pwcs & 0xfc00) != 0xdc00) /* Invalid second half. */
 998                     {
 999                       ++pwcs;
1000                       ps.__count = 0;
1001                       continue;
1002                     }
1003                   bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, &ps);
1004                   nwc--;
1005                 }
1006             }
1007         }
1008       if (n + bytes <= len)
1009         {
1010           if (dst)
1011             {
1012               for (int i = 0; i < bytes; ++i)
1013                 *ptr++ = buf[i];
1014             }
1015           if (*pwcs++ == 0x00)
1016             break;
1017           n += bytes;
1018         }
1019       else
1020         break;
1021     }
1022   if (n && dst && len != (size_t) -1)
1023     {
1024       n = (n < len) ? n : len - 1;
1025       dst[n] = '\0';
1026     }
1027
1028   return n;
1029 }
1030
1031 /* Allocate a buffer big enough for the string, always including the
1032    terminating '\0'.  The buffer pointer is returned in *dst_p, the return
1033    value is the number of bytes written to the buffer, as usual.
1034    The "type" argument determines where the resulting buffer is stored.
1035    It's either one of the cygheap_types values, or it's "HEAP_NOTHEAP".
1036    In the latter case the allocation uses simple calloc.
1037
1038    Note that this code is shared by cygserver (which requires it via
1039    __small_vsprintf) and so when built there plain calloc is the
1040    only choice.  */
1041 size_t
1042 _sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc,
1043                 bool is_path)
1044 {
1045   size_t ret;
1046
1047   ret = _sys_wcstombs (NULL, (size_t) -1, src, nwc, is_path);
1048   if (ret > 0)
1049     {
1050       size_t dlen = ret + 1;
1051
1052       if (type == HEAP_NOTHEAP)
1053         *dst_p = (char *) calloc (dlen, sizeof (char));
1054       else
1055         *dst_p = (char *) ccalloc ((cygheap_types) type, dlen, sizeof (char));
1056       if (!*dst_p)
1057         return 0;
1058       ret = _sys_wcstombs (*dst_p, dlen, src, nwc, is_path);
1059     }
1060   return ret;
1061 }
1062
1063 /* _sys_mbstowcs is actually most of the time called as sys_mbstowcs with
1064    a 0 codepage.  If cp is not 0, the codepage is evaluated and used for the
1065    conversion.  This is so that fhandler_console can switch to an alternate
1066    charset, which is the charset returned by GetConsoleCP ().  Most of the
1067    time this is used for box and line drawing characters. */
1068 size_t
1069 _sys_mbstowcs (mbtowc_p f_mbtowc, wchar_t *dst, size_t dlen, const char *src,
1070                size_t nms)
1071 {
1072   wchar_t *ptr = dst;
1073   unsigned const char *pmbs = (unsigned const char *) src;
1074   size_t count = 0;
1075   size_t len = dlen;
1076   int bytes;
1077   mbstate_t ps;
1078   save_errno save;
1079
1080   memset (&ps, 0, sizeof ps);
1081   if (dst == NULL)
1082     len = (size_t)-1;
1083   while (len > 0 && nms > 0)
1084     {
1085       /* ASCII CAN handling. */
1086       if (*pmbs == 0x18)
1087         {
1088           /* Sanity check: If this is a lead CAN byte for a following UTF-8
1089              sequence, there must be at least two more bytes left, and the
1090              next byte must be a valid UTF-8 start byte.  If the charset
1091              isn't UTF-8 anyway, try to convert the following bytes as UTF-8
1092              sequence. */
1093           if (nms > 2 && pmbs[1] >= 0xc2 && pmbs[1] <= 0xf4
1094               && f_mbtowc != __utf8_mbtowc)
1095             {
1096               bytes = __utf8_mbtowc (_REENT, ptr, (const char *) pmbs + 1,
1097                                      nms - 1, &ps);
1098               if (bytes < 0)
1099                 {
1100                   /* Invalid UTF-8 sequence?  Treat the ASCII CAN character as
1101                      stand-alone ASCII CAN char. */
1102                   bytes = 1;
1103                   if (dst)
1104                     *ptr = 0x18;
1105                   memset (&ps, 0, sizeof ps);
1106                 }
1107               else
1108                 {
1109                   ++bytes; /* Count CAN byte */
1110                   if (bytes > 1 && ps.__count == 4)
1111                     {
1112                       /* First half of a surrogate. */
1113                       wchar_t *ptr2 = dst ? ptr + 1 : NULL;
1114                       int bytes2 = __utf8_mbtowc (_REENT, ptr2,
1115                                                   (const char *) pmbs + bytes,
1116                                                   nms - bytes, &ps);
1117                       if (bytes2 < 0)
1118                         memset (&ps, 0, sizeof ps);
1119                       else
1120                         {
1121                           bytes += bytes2;
1122                           ++count;
1123                           ptr = dst ? ptr + 1 : NULL;
1124                           --len;
1125                         }
1126                     }
1127                 }
1128             }
1129           /* Otherwise it's just a simple ASCII CAN. */
1130           else
1131             {
1132               bytes = 1;
1133               if (dst)
1134                 *ptr = 0x18;
1135             }
1136         }
1137       else if ((bytes = f_mbtowc (_REENT, ptr, (const char *) pmbs, nms,
1138                                   &ps)) < 0)
1139         {
1140           /* The technique is based on a discussion here:
1141              http://www.mail-archive.com/linux-utf8@nl.linux.org/msg00080.html
1142
1143              Invalid bytes in a multibyte sequence are converted to
1144              the private use area which is already used to store ASCII
1145              chars invalid in Windows filenames.  This technque allows
1146              to store them in a symmetric way. */
1147           bytes = 1;
1148           if (dst)
1149             *ptr = L'\xf000' | *pmbs;
1150           memset (&ps, 0, sizeof ps);
1151         }
1152
1153       if (bytes > 0)
1154         {
1155           pmbs += bytes;
1156           nms -= bytes;
1157           ++count;
1158           ptr = dst ? ptr + 1 : NULL;
1159           --len;
1160         }
1161       else
1162         {
1163           if (bytes == 0)
1164             ++count;
1165           break;
1166         }
1167     }
1168
1169   if (count && dst)
1170     {
1171       count = (count < dlen) ? count : dlen - 1;
1172       dst[count] = L'\0';
1173     }
1174
1175   return count;
1176 }
1177
1178 /* Same as sys_wcstombs_alloc, just backwards. */
1179 size_t
1180 sys_mbstowcs_alloc (wchar_t **dst_p, int type, const char *src, size_t nms)
1181 {
1182   size_t ret;
1183
1184   ret = sys_mbstowcs (NULL, (size_t) -1, src, nms);
1185   if (ret > 0)
1186     {
1187       size_t dlen = ret + 1;
1188
1189       if (type == HEAP_NOTHEAP)
1190         *dst_p = (wchar_t *) calloc (dlen, sizeof (wchar_t));
1191       else
1192         *dst_p = (wchar_t *) ccalloc ((cygheap_types) type, dlen,
1193                                       sizeof (wchar_t));
1194       if (!*dst_p)
1195         return 0;
1196       ret = sys_mbstowcs (*dst_p, dlen, src, nms);
1197     }
1198   return ret;
1199 }
1200
1201 /* Copy string, until c or <nul> is encountered.
1202    NUL-terminate the destination string (s1).
1203    Return pointer to terminating byte in dst string.  */
1204 char *
1205 strccpy (char *__restrict s1, const char **__restrict s2, char c)
1206 {
1207   while (**s2 && **s2 != c)
1208     *s1++ = *((*s2)++);
1209   *s1 = 0;
1210
1211   return s1;
1212 }
1213
1214 const unsigned char case_folded_lower[] = {
1215    0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
1216   16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
1217   32, '!', '"', '#', '$', '%', '&',  39, '(', ')', '*', '+', ',', '-', '.', '/',
1218  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
1219  '@', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
1220  'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[',  92, ']', '^', '_',
1221  '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
1222  'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 127,
1223  128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
1224  144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
1225  160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
1226  176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
1227  192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
1228  208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
1229  224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
1230  240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
1231 };
1232
1233 const unsigned char case_folded_upper[] = {
1234    0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
1235   16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
1236   32, '!', '"', '#', '$', '%', '&',  39, '(', ')', '*', '+', ',', '-', '.', '/',
1237  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
1238  '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1239  'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[',  92, ']', '^', '_',
1240  '`', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1241  'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', 127,
1242  128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
1243  144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
1244  160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
1245  176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
1246  192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
1247  208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
1248  224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
1249  240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
1250 };
1251
1252 const char isalpha_array[] = {
1253    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
1254    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
1255    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
1256    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
1257    0,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
1258 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,   0,   0,   0,   0,   0,
1259    0,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
1260 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,   0,   0,   0,   0,   0,
1261    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
1262    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
1263    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
1264    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
1265    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
1266    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
1267    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
1268    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0
1269 };
1270
1271 extern "C" int
1272 cygwin_wcscasecmp (const wchar_t *ws, const wchar_t *wt)
1273 {
1274   UNICODE_STRING us, ut;
1275
1276   RtlInitUnicodeString (&us, ws);
1277   RtlInitUnicodeString (&ut, wt);
1278   return RtlCompareUnicodeString (&us, &ut, TRUE);
1279 }
1280
1281 extern "C" int
1282 cygwin_wcsncasecmp (const wchar_t  *ws, const wchar_t *wt, size_t n)
1283 {
1284   UNICODE_STRING us, ut;
1285   size_t ls = 0, lt = 0;
1286
1287   while (ws[ls] && ls < n)
1288     ++ls;
1289   RtlInitCountedUnicodeString (&us, ws, ls * sizeof (WCHAR));
1290   while (wt[lt] && lt < n)
1291     ++lt;
1292   RtlInitCountedUnicodeString (&ut, wt, lt * sizeof (WCHAR));
1293   return RtlCompareUnicodeString (&us, &ut, TRUE);
1294 }
1295
1296 extern "C" int
1297 cygwin_strcasecmp (const char *cs, const char *ct)
1298 {
1299   UNICODE_STRING us, ut;
1300   ULONG len, ulen;
1301
1302   len = strlen (cs) + 1;
1303   ulen = len * sizeof (WCHAR);
1304   RtlInitEmptyUnicodeString (&us, (PWCHAR) alloca (ulen), ulen);
1305   us.Length = sys_mbstowcs (us.Buffer, len, cs) * sizeof (WCHAR);
1306
1307   len = strlen (ct) + 1;
1308   ulen = len * sizeof (WCHAR);
1309   RtlInitEmptyUnicodeString (&ut, (PWCHAR) alloca (ulen), ulen);
1310   ut.Length = sys_mbstowcs (ut.Buffer, len, ct) * sizeof (WCHAR);
1311
1312   return RtlCompareUnicodeString (&us, &ut, TRUE);
1313 }
1314
1315 extern "C" int
1316 cygwin_strncasecmp (const char *cs, const char *ct, size_t n)
1317 {
1318   UNICODE_STRING us, ut;
1319   ULONG ulen;
1320   size_t ls = 0, lt = 0;
1321
1322   while (cs[ls] && ls < n)
1323     ++ls;
1324   ulen = (ls + 1) * sizeof (WCHAR);
1325   RtlInitEmptyUnicodeString (&us, (PWCHAR) alloca (ulen), ulen);
1326   us.Length = sys_mbstowcs (us.Buffer, ls + 1, cs, ls) * sizeof (WCHAR);
1327
1328   while (ct[lt] && lt < n)
1329     ++lt;
1330   ulen = (lt + 1) * sizeof (WCHAR);
1331   RtlInitEmptyUnicodeString (&ut, (PWCHAR) alloca (ulen), ulen);
1332   ut.Length = sys_mbstowcs (ut.Buffer, lt + 1, ct, lt)  * sizeof (WCHAR);
1333
1334   return RtlCompareUnicodeString (&us, &ut, TRUE);
1335 }
1336
1337 extern "C" char *
1338 strlwr (char *string)
1339 {
1340   UNICODE_STRING us;
1341   size_t len = (strlen (string) + 1) * sizeof (WCHAR);
1342
1343   us.MaximumLength = len; us.Buffer = (PWCHAR) alloca (len);
1344   us.Length = sys_mbstowcs (us.Buffer, len, string) * sizeof (WCHAR)
1345               - sizeof (WCHAR);
1346   RtlDowncaseUnicodeString (&us, &us, FALSE);
1347   sys_wcstombs (string, len / sizeof (WCHAR), us.Buffer);
1348   return string;
1349 }
1350
1351 extern "C" char *
1352 strupr (char *string)
1353 {
1354   UNICODE_STRING us;
1355   size_t len = (strlen (string) + 1) * sizeof (WCHAR);
1356
1357   us.MaximumLength = len; us.Buffer = (PWCHAR) alloca (len);
1358   us.Length = sys_mbstowcs (us.Buffer, len, string) * sizeof (WCHAR)
1359               - sizeof (WCHAR);
1360   RtlUpcaseUnicodeString (&us, &us, FALSE);
1361   sys_wcstombs (string, len / sizeof (WCHAR), us.Buffer);
1362   return string;
1363 }
1364
1365 /* backslashify: Convert all forward slashes in src path to back slashes
1366    in dst path.  Add a trailing slash to dst when trailing_slash_p arg
1367    is set to 1. */
1368
1369 void
1370 backslashify (const char *src, char *dst, bool trailing_slash_p)
1371 {
1372   const char *start = src;
1373
1374   while (*src)
1375     {
1376       if (*src == '/')
1377         *dst++ = '\\';
1378       else
1379         *dst++ = *src;
1380       ++src;
1381     }
1382   if (trailing_slash_p
1383       && src > start
1384       && !isdirsep (src[-1]))
1385     *dst++ = '\\';
1386   *dst++ = 0;
1387 }
1388
1389 /* slashify: Convert all back slashes in src path to forward slashes
1390    in dst path.  Add a trailing slash to dst when trailing_slash_p arg
1391    is set to 1. */
1392
1393 void
1394 slashify (const char *src, char *dst, bool trailing_slash_p)
1395 {
1396   const char *start = src;
1397
1398   while (*src)
1399     {
1400       if (*src == '\\')
1401         *dst++ = '/';
1402       else
1403         *dst++ = *src;
1404       ++src;
1405     }
1406   if (trailing_slash_p
1407       && src > start
1408       && !isdirsep (src[-1]))
1409     *dst++ = '/';
1410   *dst++ = 0;
1411 }
1412
1413 static WCHAR hex_wchars[] = L"0123456789abcdef";
1414
1415 NTSTATUS
1416 RtlInt64ToHexUnicodeString (ULONGLONG value, PUNICODE_STRING dest,
1417                             BOOLEAN append)
1418 {
1419   USHORT len = append ? dest->Length : 0;
1420   if (dest->MaximumLength - len < 16 * (int) sizeof (WCHAR))
1421     return STATUS_BUFFER_OVERFLOW;
1422   wchar_t *end = (PWCHAR) ((PBYTE) dest->Buffer + len);
1423   PWCHAR p = end + 16;
1424   while (p-- > end)
1425     {
1426       *p = hex_wchars[value & 0xf];
1427       value >>= 4;
1428     }
1429   dest->Length += 16 * sizeof (WCHAR);
1430   return STATUS_SUCCESS;
1431 }