features.h: support POSIX.1-2024
[newlib-cygwin.git] / winsup / cygwin / strfuncs.cc
blob66667bdb3a3cba1f3b525b916764401cd76e5369
1 /* strfuncs.cc: string functions
3 This file is part of Cygwin.
5 This software is a copyrighted work licensed under the terms of the
6 Cygwin license. Please consult the file "CYGWIN_LICENSE" for
7 details. */
9 #include "winsup.h"
10 #include <stdlib.h>
11 #include <sys/param.h>
12 #include <wchar.h>
13 #include <uchar.h>
14 #include <ntdll.h>
15 #include "path.h"
16 #include "fhandler.h"
17 #include "dtable.h"
18 #include "cygheap.h"
20 /* Transform characters invalid for Windows filenames to the Unicode private
21 use area in the U+f0XX range. The affected characters are all control
22 chars 1 <= c <= 31, as well as the characters " * : < > ? |. The backslash
23 is affected as well, but we can't transform it as long as we accept Win32
24 paths as input. */
25 static const WCHAR tfx_chars[] = {
26 0xf000 | 0, 0xf000 | 1, 0xf000 | 2, 0xf000 | 3,
27 0xf000 | 4, 0xf000 | 5, 0xf000 | 6, 0xf000 | 7,
28 0xf000 | 8, 0xf000 | 9, 0xf000 | 10, 0xf000 | 11,
29 0xf000 | 12, 0xf000 | 13, 0xf000 | 14, 0xf000 | 15,
30 0xf000 | 16, 0xf000 | 17, 0xf000 | 18, 0xf000 | 19,
31 0xf000 | 20, 0xf000 | 21, 0xf000 | 22, 0xf000 | 23,
32 0xf000 | 24, 0xf000 | 25, 0xf000 | 26, 0xf000 | 27,
33 0xf000 | 28, 0xf000 | 29, 0xf000 | 30, 0xf000 | 31,
34 ' ', '!', 0xf000 | '"', '#',
35 '$', '%', '&', 39,
36 '(', ')', 0xf000 | '*', '+',
37 ',', '-', '.', '\\',
38 '0', '1', '2', '3',
39 '4', '5', '6', '7',
40 '8', '9', 0xf000 | ':', ';',
41 0xf000 | '<', '=', 0xf000 | '>', 0xf000 | '?',
42 '@', 'A', 'B', 'C',
43 'D', 'E', 'F', 'G',
44 'H', 'I', 'J', 'K',
45 'L', 'M', 'N', 'O',
46 'P', 'Q', 'R', 'S',
47 'T', 'U', 'V', 'W',
48 'X', 'Y', 'Z', '[',
49 '\\', ']', '^', '_',
50 '`', 'a', 'b', 'c',
51 'd', 'e', 'f', 'g',
52 'h', 'i', 'j', 'k',
53 'l', 'm', 'n', 'o',
54 'p', 'q', 'r', 's',
55 't', 'u', 'v', 'w',
56 'x', 'y', 'z', '{',
57 0xf000 | '|', '}', '~', 127
60 /* This is the table for the reverse functionality in sys_wcstombs.
61 It differs deliberately in two code places (space and dot) to allow
62 converting back space and dot on filesystems only supporting DOS
63 filenames. */
64 static const WCHAR tfx_rev_chars[] = {
65 0xf000 | 0, 0xf000 | 1, 0xf000 | 2, 0xf000 | 3,
66 0xf000 | 4, 0xf000 | 5, 0xf000 | 6, 0xf000 | 7,
67 0xf000 | 8, 0xf000 | 9, 0xf000 | 10, 0xf000 | 11,
68 0xf000 | 12, 0xf000 | 13, 0xf000 | 14, 0xf000 | 15,
69 0xf000 | 16, 0xf000 | 17, 0xf000 | 18, 0xf000 | 19,
70 0xf000 | 20, 0xf000 | 21, 0xf000 | 22, 0xf000 | 23,
71 0xf000 | 24, 0xf000 | 25, 0xf000 | 26, 0xf000 | 27,
72 0xf000 | 28, 0xf000 | 29, 0xf000 | 30, 0xf000 | 31,
73 0xf000 | ' ', '!', 0xf000 | '"', '#',
74 '$', '%', '&', 39,
75 '(', ')', 0xf000 | '*', '+',
76 ',', '-', 0xf000 | '.', '\\',
77 '0', '1', '2', '3',
78 '4', '5', '6', '7',
79 '8', '9', 0xf000 | ':', ';',
80 0xf000 | '<', '=', 0xf000 | '>', 0xf000 | '?',
81 '@', 'A', 'B', 'C',
82 'D', 'E', 'F', 'G',
83 'H', 'I', 'J', 'K',
84 'L', 'M', 'N', 'O',
85 'P', 'Q', 'R', 'S',
86 'T', 'U', 'V', 'W',
87 'X', 'Y', 'Z', '[',
88 '\\', ']', '^', '_',
89 '`', 'a', 'b', 'c',
90 'd', 'e', 'f', 'g',
91 'h', 'i', 'j', 'k',
92 'l', 'm', 'n', 'o',
93 'p', 'q', 'r', 's',
94 't', 'u', 'v', 'w',
95 'x', 'y', 'z', '{',
96 0xf000 | '|', '}', '~', 127
99 void
100 transform_chars (PWCHAR path, PWCHAR path_end)
102 for (; path <= path_end; ++path)
103 if (*path < 128)
104 *path = tfx_chars[*path];
107 PWCHAR
108 transform_chars_af_unix (PWCHAR out, const char *path, __socklen_t len)
110 len -= sizeof (__sa_family_t);
111 for (const unsigned char *p = (const unsigned char *) path; len-- > 0; ++p)
112 *out++ = (*p <= 0x7f) ? tfx_chars[*p] : *p;
113 return out;
116 /* convert wint_t string to wchar_t string. Make sure dest
117 has room for at least twice as much characters to account
118 for surrogate pairs, plus a wchar_t NUL. */
119 extern "C" void
120 wcintowcs (wchar_t *dest, wint_t *src, size_t len)
122 while (*src && len-- > 0)
123 if (*src > 0xffff)
125 *dest++ = ((*src - 0x10000) >> 10) + 0xd800;
126 *dest++ = ((*src++ - 0x10000) & 0x3ff) + 0xdc00;
128 else
129 *dest++ = *src++;
130 *dest = '\0';
133 /* replacement function for wcrtomb, converting a UTF-32 char to a
134 multibyte string. */
135 extern "C" size_t
136 c32rtomb (char *s, char32_t wc, mbstate_t *ps)
138 if (ps == NULL)
140 _REENT_CHECK_MISC(_REENT);
141 ps = &(_REENT_C32RTOMB_STATE(_REENT));
144 /* If s is NULL, behave as if s pointed to an internal buffer and wc
145 was a null wide character (L''). wcrtomb will do that for us*/
146 if (wc <= 0xffff || !s)
147 return wcrtomb (s, (wchar_t) wc, ps);
149 wchar_t wc_arr[2];
150 const wchar_t *wcp = wc_arr;
152 wc -= 0x10000;
153 wc_arr[0] = (wc >> 10) + 0xd800;
154 wc_arr[1] = (wc & 0x3ff) + 0xdc00;
155 return wcsnrtombs (s, &wcp, 2, SIZE_MAX, ps);
158 extern "C" size_t
159 c16rtomb (char *s, char16_t wc, mbstate_t *ps)
161 if (ps == NULL)
163 _REENT_CHECK_MISC(_REENT);
164 ps = &(_REENT_C16RTOMB_STATE(_REENT));
167 return wcrtomb (s, (wchar_t) wc, ps);
170 extern "C" size_t
171 c8rtomb (char *s, char8_t c8, mbstate_t *ps)
173 struct _reent *reent = _REENT;
174 char32_t wc;
176 if (ps == NULL)
178 _REENT_CHECK_MISC(reent);
179 ps = &(_REENT_C8RTOMB_STATE(reent));
182 if (s == NULL)
184 ps->__count = 0;
185 return 1;
187 if ((ps->__count & 0xff00) != 0xc800)
189 switch (c8)
191 case 0 ... 0x7f: /* single octet */
192 ps->__count = 0;
193 wc = c8;
194 break;
195 case 0xc2 ... 0xf4: /* valid lead byte */
196 ps->__count = 0xc801;
197 ps->__value.__wchb[0] = c8;
198 return 0;
199 default:
200 goto ilseq;
203 else
205 /* We already collected something... */
206 int idx = ps->__count & 0x3;
207 char8_t &c1 = ps->__value.__wchb[0];
208 char8_t &c2 = ps->__value.__wchb[1];
209 char8_t &c3 = ps->__value.__wchb[2];
211 switch (idx)
213 case 1:
214 /* Annoyingly complex check for validity for 2nd octet. */
215 if (c8 <= 0x7f || c8 >= 0xc0)
216 goto ilseq;
217 if (c1 == 0xe0 && c8 <= 0x9f)
218 goto ilseq;
219 if (c1 == 0xed && c8 >= 0xa0)
220 goto ilseq;
221 if (c1 == 0xf0 && c8 <= 0x8f)
222 goto ilseq;
223 if (c1 == 0xf4 && c8 >= 0x90)
224 goto ilseq;
225 if (c1 >= 0xe0)
227 ps->__count = 0xc802;
228 c2 = c8;
229 return 0;
231 wc = ((c1 & 0x1f) << 6)
232 | (c8 & 0x3f);
233 break;
234 case 2:
235 if (c8 <= 0x7f || c8 >= 0xc0)
236 goto ilseq;
237 if (c1 >= 0xf0)
239 ps->__count = 0xc803;
240 c3 = c8;
241 return 0;
243 wc = ((c1 & 0x0f) << 12)
244 | ((c2 & 0x3f) << 6)
245 | (c8 & 0x3f);
246 break;
247 case 3:
248 if (c8 <= 0x7f || c8 >= 0xc0)
249 goto ilseq;
250 wc = ((c1 & 0x07) << 18)
251 | ((c2 & 0x3f) << 12)
252 | ((c3 & 0x3f) << 6)
253 | (c8 & 0x3f);
254 break;
255 default: /* Shouldn't happen */
256 goto ilseq;
259 ps->__count = 0;
260 return c32rtomb (s, wc, ps);
261 ilseq:
262 ps->__count = 0;
263 _REENT_ERRNO(reent) = EILSEQ;
264 return (size_t)(-1);
267 extern "C" size_t
268 mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
270 size_t len, len2;
271 wchar_t w1, w2;
273 if (ps == NULL)
275 _REENT_CHECK_MISC(_REENT);
276 ps = &(_REENT_MBRTOC32_STATE(_REENT));
279 len = mbrtowc (&w1, s, n, ps);
280 if (len == (size_t) -1 || len == (size_t) -2)
281 return len;
282 if (pwc && s)
283 *pwc = w1;
284 /* Convert surrogate pair to wint_t value */
285 if (len > 0 && w1 >= 0xd800 && w1 <= 0xdbff)
287 s += len;
288 n -= len;
289 len2 = mbrtowc (&w2, s, n, ps);
290 if (len2 > 0 && w2 >= 0xdc00 && w2 <= 0xdfff)
292 len += len2;
293 if (pwc && s)
294 *pwc = (((w1 & 0x3ff) << 10) | (w2 & 0x3ff)) + 0x10000;
296 else
298 len = (size_t) -1;
299 errno = EILSEQ;
302 return len;
305 /* Like mbrtowc, but we already defined how to return a surrogate, and
306 the definition of mbrtoc16 differes from that.
307 Return the high surrogate with a return value representing the length
308 of the entire multibyte sequence, and in the next call return the low
309 surrogate with a return value of -3. */
310 extern "C" size_t
311 mbrtoc16 (char16_t *pwc, const char *s, size_t n, mbstate_t *ps)
313 int retval = 0;
314 struct _reent *reent = _REENT;
315 wchar_t wc;
317 if (ps == NULL)
319 _REENT_CHECK_MISC(reent);
320 ps = &(_REENT_MBRTOC16_STATE(reent));
323 if (s == NULL)
324 retval = __MBTOWC (reent, NULL, "", 1, ps);
325 else if (ps->__count == 0xdc00)
327 /* Return stored second half of the surrogate. */
328 if (pwc)
329 *pwc = ps->__value.__wch;
330 ps->__count = 0;
331 return -3;
333 else
334 retval = __MBTOWC (reent, &wc, s, n, ps);
336 if (retval == -1)
337 goto ilseq;
339 if (pwc)
340 *pwc = wc;
341 /* Did we catch the first half of a surrogate? */
342 if (wc >= 0xd800 && wc <= 0xdbff)
344 if (n <= (size_t) retval)
345 goto ilseq;
346 int r2 = __MBTOWC (reent, &wc, s + retval, n, ps);
347 if (r2 == -1)
348 goto ilseq;
349 /* Store second half of the surrogate in state, and return the
350 length of the entire multibyte sequence. */
351 ps->__count = 0xdc00;
352 ps->__value.__wch = wc;
353 retval += r2;
355 return (size_t)retval;
357 ilseq:
358 ps->__count = 0;
359 _REENT_ERRNO(reent) = EILSEQ;
360 return (size_t)(-1);
363 extern "C" size_t
364 mbrtoc8 (char8_t *pc8, const char *s, size_t n, mbstate_t *ps)
366 struct _reent *reent = _REENT;
367 size_t len;
368 char32_t wc;
370 if (ps == NULL)
372 _REENT_CHECK_MISC(reent);
373 ps = &(_REENT_MBRTOC8_STATE(reent));
376 if (s == NULL)
378 if (ps)
379 ps->__count = 0;
380 return 1;
382 else if ((ps->__count & 0xff00) == 0xc800)
384 /* Return next utf-8 octet in line. */
385 int idx = ps->__count & 0x3;
387 if (pc8)
388 *pc8 = ps->__value.__wchb[--idx];
389 if (idx == 0)
390 ps->__count = 0;
391 return -3;
393 len = mbrtoc32 (&wc, s, n, ps);
394 if (len > 0)
396 /* octets stored back to front for easier indexing */
397 switch (wc)
399 case 0 ... 0x7f:
400 ps->__value.__wchb[0] = wc;
401 ps->__count = 0;
402 break;
403 case 0x80 ... 0x7ff:
404 ps->__value.__wchb[1] = 0xc0 | ((wc & 0x7c0) >> 6);
405 ps->__value.__wchb[0] = 0x80 | (wc & 0x3f);
406 ps->__count = 0xc800 | 1;
407 break;
408 case 0x800 ... 0xffff:
409 ps->__value.__wchb[2] = 0xe0 | ((wc & 0xf000) >> 12);
410 ps->__value.__wchb[1] = 0x80 | ((wc & 0xfc0) >> 6);
411 ps->__value.__wchb[0] = 0x80 | (wc & 0x3f);
412 ps->__count = 0xc800 | 2;
413 break;
414 case 0x10000 ... 0x10ffff:
415 ps->__value.__wchb[3] = 0xf0 | ((wc & 0x1c0000) >> 18);
416 ps->__value.__wchb[2] = 0x80 | ((wc & 0x3f000) >> 12);
417 ps->__value.__wchb[1] = 0x80 | ((wc & 0xfc0) >> 6);
418 ps->__value.__wchb[0] = 0x80 | (wc & 0x3f);
419 ps->__count = 0xc800 | 3;
420 break;
421 default:
422 ps->__count = 0;
423 _REENT_ERRNO(reent) = EILSEQ;
424 return (size_t)(-1);
426 if (pc8)
427 *pc8 = ps->__value.__wchb[ps->__count & 0x3];
429 return len;
432 extern "C" size_t
433 mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps)
435 wint_t *ptr = dst;
436 const char *tmp_src;
437 size_t max;
438 size_t count = 0;
439 size_t bytes;
441 if (dst == NULL)
443 /* Ignore original len value and do not alter src pointer if the
444 dst pointer is NULL. */
445 len = (size_t)-1;
446 tmp_src = *src;
447 src = &tmp_src;
449 max = len;
450 while (len > 0)
452 bytes = mbrtowi (ptr, *src, MB_CUR_MAX, ps);
453 if (bytes > 0)
455 *src += bytes;
456 nms -= bytes;
457 ++count;
458 ptr = (dst == NULL) ? NULL : ptr + 1;
459 --len;
461 else if (bytes == 0)
463 *src = NULL;
464 return count;
466 else
468 /* Deviation from standard: If the input is broken, the output
469 will be broken. I. e., we just copy the current byte over
470 into the wint_t destination and try to pick up on the next
471 byte. This is in line with the way fnmatch works. */
472 ps->__count = 0;
473 if (dst)
475 *ptr++ = (const wint_t) *(*src)++;
476 ++count;
477 --nms;
478 --len;
482 return (size_t) max;
485 /* The SJIS, JIS and eucJP conversion in newlib does not use UTF as
486 wchar_t character representation. That's unfortunate for us since
487 we require UTF for the OS. What we do here is to have our own
488 implementation of the base functions for the conversion using
489 the MulitByteToWideChar/WideCharToMultiByte functions. */
491 /* FIXME: We can't support JIS (ISO-2022-JP) at all right now. It's a
492 stateful charset encoding. The translation from mbtowc to
493 MulitByteToWideChar is quite complex. Given that we support SJIS and
494 eucJP, the both most used Japanese charset encodings, this shouldn't
495 be such a big problem. */
497 /* GBK, GB18030, eucKR, and Big5 conversions are not available so far
498 in newlib. */
500 static int
501 __db_wctomb (struct _reent *r, char *s, wchar_t wchar, UINT cp)
503 if (s == NULL)
504 return 0;
506 if (wchar < 0x80)
508 *s = (char) wchar;
509 return 1;
512 BOOL def_used = false;
513 int ret = WideCharToMultiByte (cp, WC_NO_BEST_FIT_CHARS, &wchar, 1, s,
514 2, NULL, &def_used);
515 if (ret > 0 && !def_used)
516 return ret;
518 _REENT_ERRNO(r) = EILSEQ;
519 return -1;
522 extern "C" int
523 __sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
525 return __db_wctomb (r,s, wchar, 932);
528 extern "C" int
529 __eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
531 /* Unfortunately, the Windows eucJP codepage 20932 is not really 100%
532 compatible to eucJP. It's a cute approximation which makes it a
533 doublebyte codepage.
534 The JIS-X-0212 three byte codes (0x8f,0xa1-0xfe,0xa1-0xfe) are folded
535 into two byte codes as follows: The 0x8f is stripped, the next byte is
536 taken as is, the third byte is mapped into the lower 7-bit area by
537 masking it with 0x7f. So, for instance, the eucJP code 0x8f,0xdd,0xf8
538 becomes 0xdd,0x78 in CP 20932.
540 To be really eucJP compatible, we have to map the JIS-X-0212 characters
541 between CP 20932 and eucJP ourselves. */
542 if (s == NULL)
543 return 0;
545 if (wchar < 0x80)
547 *s = (char) wchar;
548 return 1;
551 BOOL def_used = false;
552 int ret = WideCharToMultiByte (20932, WC_NO_BEST_FIT_CHARS, &wchar, 1, s,
553 3, NULL, &def_used);
554 if (ret > 0 && !def_used)
556 /* CP20932 representation of JIS-X-0212 character? */
557 if (ret == 2 && (unsigned char) s[1] <= 0x7f)
559 /* Yes, convert to eucJP three byte sequence */
560 s[2] = s[1] | 0x80;
561 s[1] = s[0];
562 s[0] = 0x8f;
563 ++ret;
565 return ret;
568 _REENT_ERRNO(r) = EILSEQ;
569 return -1;
572 extern "C" int
573 __gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
575 return __db_wctomb (r,s, wchar, 936);
578 extern "C" int
579 __gb18030_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
581 int ret;
582 wchar_t wres[2];
584 if (s == NULL)
585 return 0;
587 if (state->__count == 0)
589 if (wchar <= 0x7f)
591 *s = (char) wchar;
592 return 1;
595 if (wchar >= 0xd800 && wchar <= 0xdbff)
597 /* First half of a surrogate pair */
598 state->__count = 18030;
599 state->__value.__wch = wchar;
600 return 0;
602 ret = WideCharToMultiByte (54936, WC_ERR_INVALID_CHARS, &wchar, 1, s,
603 4, NULL, NULL);
604 if (ret > 0)
605 return ret;
606 goto ilseq;
608 else if (state->__count == 18030 && state->__value.__wch >= 0xd800
609 && state->__value.__wch <= 0xdbff)
611 if (wchar >= 0xdc00 && wchar <= 0xdfff)
613 /* Create multibyte sequence from full surrogate pair. */
614 wres[0] = state->__value.__wch;
615 wres[1] = wchar;
616 ret = WideCharToMultiByte (54936, WC_ERR_INVALID_CHARS, wres, 2, s, 4,
617 NULL, NULL);
618 if (ret > 0)
620 state->__count = 0;
621 return ret;
624 ilseq:
625 _REENT_ERRNO(r) = EILSEQ;
626 return -1;
628 _REENT_ERRNO(r) = EINVAL;
629 return -1;
632 extern "C" int
633 __kr_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
635 return __db_wctomb (r,s, wchar, 949);
638 extern "C" int
639 __big5_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
641 return __db_wctomb (r,s, wchar, 950);
644 static int
645 __db_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, UINT cp,
646 mbstate_t *state)
648 wchar_t dummy;
649 int ret;
651 if (s == NULL)
652 return 0; /* not state-dependent */
654 if (n == 0)
655 return -2;
657 if (pwc == NULL)
658 pwc = &dummy;
660 if (state->__count == 0)
662 if (*(unsigned char *) s < 0x80)
664 *pwc = *(unsigned char *) s;
665 return *s ? 1 : 0;
667 size_t cnt = MIN (n, 2);
668 ret = MultiByteToWideChar (cp, MB_ERR_INVALID_CHARS, s, cnt, pwc, 1);
669 if (ret)
670 return cnt;
671 if (n == 1)
673 state->__count = n;
674 state->__value.__wchb[0] = *s;
675 return -2;
677 /* These Win32 functions are really crappy. Assuming n is 2 but the
678 first byte is a singlebyte charcode, the function does not convert
679 that byte and return 1, rather it just returns 0. So, what we do
680 here is to check if the first byte returns a valid value... */
681 else if (MultiByteToWideChar (cp, MB_ERR_INVALID_CHARS, s, 1, pwc, 1))
682 return 1;
683 _REENT_ERRNO(r) = EILSEQ;
684 return -1;
686 state->__value.__wchb[state->__count] = *s;
687 ret = MultiByteToWideChar (cp, MB_ERR_INVALID_CHARS,
688 (const char *) state->__value.__wchb, 2, pwc, 1);
689 if (!ret)
691 _REENT_ERRNO(r) = EILSEQ;
692 return -1;
694 state->__count = 0;
695 return 1;
698 extern "C" int
699 __sjis_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
700 mbstate_t *state)
702 return __db_mbtowc (r, pwc, s, n, 932, state);
705 extern "C" int
706 __eucjp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
707 mbstate_t *state)
709 /* See comment in __eucjp_wctomb above. */
710 wchar_t dummy;
711 int ret = 0;
713 if (s == NULL)
714 return 0; /* not state-dependent */
716 if (n == 0)
717 return -2;
719 if (pwc == NULL)
720 pwc = &dummy;
722 if (state->__count == 0)
724 if (*(unsigned char *) s < 0x80)
726 *pwc = *(unsigned char *) s;
727 return *s ? 1 : 0;
729 if (*(unsigned char *) s == 0x8f) /* JIS-X-0212 lead byte? */
731 /* Yes. Store sequence in mbstate and handle in the __count != 0
732 case at the end of the function. */
733 size_t i;
734 for (i = 0; i < 3 && i < n; i++)
735 state->__value.__wchb[i] = s[i];
736 if ((state->__count = i) < 3) /* Incomplete sequence? */
737 return -2;
738 ret = 3;
739 goto jis_x_0212;
741 size_t cnt = MIN (n, 2);
742 if (MultiByteToWideChar (20932, MB_ERR_INVALID_CHARS, s, cnt, pwc, 1))
743 return cnt;
744 if (n == 1)
746 state->__count = 1;
747 state->__value.__wchb[0] = *s;
748 return -2;
750 else if (MultiByteToWideChar (20932, MB_ERR_INVALID_CHARS, s, 1, pwc, 1))
751 return 1;
752 _REENT_ERRNO(r) = EILSEQ;
753 return -1;
755 state->__value.__wchb[state->__count++] = *s;
756 ret = 1;
757 jis_x_0212:
758 if (state->__value.__wchb[0] == 0x8f)
760 if (state->__count == 2)
762 if (n == 1)
763 return -2;
764 state->__value.__wchb[state->__count] = s[1];
765 ret = 2;
767 /* Ok, we have a full JIS-X-0212 sequence in mbstate. Convert it
768 to the CP 20932 representation and feed it to MultiByteToWideChar. */
769 state->__value.__wchb[0] = state->__value.__wchb[1];
770 state->__value.__wchb[1] = state->__value.__wchb[2] & 0x7f;
772 if (!MultiByteToWideChar (20932, MB_ERR_INVALID_CHARS,
773 (const char *) state->__value.__wchb, 2, pwc, 1))
775 _REENT_ERRNO(r) = EILSEQ;
776 return -1;
778 state->__count = 0;
779 return ret;
782 extern "C" int
783 __gbk_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
784 mbstate_t *state)
786 return __db_mbtowc (r, pwc, s, n, 936, state);
789 extern "C" int
790 __gb18030_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
791 mbstate_t *state)
793 wchar_t wres[2], dummy;
794 unsigned char ch;
795 int ret, len, ocount;
796 size_t ncopy;
798 if (state->__count < 0 || (state->__count > (int) sizeof state->__value.__wchb
799 && state->__count != 18030))
801 errno = EINVAL;
802 return -1;
805 if (s == NULL)
807 s = "";
808 n = 1;
809 pwc = NULL;
812 if (state->__count == 18030)
814 /* Return second half of the surrogate pair */
815 *pwc = state->__value.__wch;
816 state->__count = 0;
817 return 1;
820 ncopy = MIN (MIN (n, MB_CUR_MAX),
821 sizeof state->__value.__wchb - state->__count);
822 memcpy (state->__value.__wchb + state->__count, s, ncopy);
823 ocount = state->__count;
824 state->__count += ncopy;
825 s = (char *) state->__value.__wchb;
826 n = state->__count;
828 if (n == 0) /* Incomplete multibyte sequence */
829 return -2;
831 if (!pwc)
832 pwc = &dummy;
834 /* Check if input is a valid GB18030 char (per FreeBSD):
835 * Single byte: [00-7f]
836 * Two byte: [81-fe][40-7e,80-fe]
837 * Four byte: [81-fe][30-39][81-fe][30-39]
839 ch = *(unsigned char *) s;
840 if (ch <= 0x7f)
842 *pwc = ch;
843 state->__count = 0;
844 return ch ? 1 : 0;
846 if (ch >= 0x81 && ch <= 0xfe)
848 if (n < 2)
849 return -2;
850 ch = (unsigned char) s[1];
851 if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe))
852 len = 2;
853 else if (ch >= 0x30 && ch <= 0x39)
855 if (n < 3)
856 return -2;
857 ch = (unsigned char) s[2];
858 if (ch < 0x81 || ch > 0xfe)
859 goto ilseq;
860 if (n < 4)
861 return -2;
862 ch = (unsigned char) s[3];
863 if (ch < 0x30 || ch > 0x39)
864 goto ilseq;
865 len = 4;
867 else
868 goto ilseq;
870 else
871 goto ilseq;
872 ret = MultiByteToWideChar (54936, MB_ERR_INVALID_CHARS, s, len, wres, 2);
873 if (ret)
875 *pwc = wres[0];
876 if (ret == 2)
878 /* Surrogate pair. Store second half for later and return
879 first half. Return real count - 1, return 1 when the second
880 half of the pair is returned in the next run. */
881 state->__count = 18030;
882 state->__value.__wch = wres[1];
883 --len;
885 else
886 state->__count = 0;
887 return len - ocount;
889 ilseq:
890 _REENT_ERRNO(r) = EILSEQ;
891 return -1;
894 extern "C" int
895 __kr_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
896 mbstate_t *state)
898 return __db_mbtowc (r, pwc, s, n, 949, state);
901 extern "C" int
902 __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
903 mbstate_t *state)
905 return __db_mbtowc (r, pwc, s, n, 950, state);
908 /* Our own sys_wcstombs/sys_mbstowcs functions differ from the
909 wcstombs/mbstowcs API in three ways:
911 - The UNICODE private use area is used in filenames to specify
912 characters not allowed in Windows filenames ('*', '?', etc).
913 The sys_wcstombs converts characters in the private use area
914 back to the corresponding ASCII chars.
916 - If a wide character in a filename has no representation in the current
917 multibyte charset, then usually you wouldn't be able to access the
918 file. To fix this problem, sys_wcstombs creates a replacement multibyte
919 sequences for the non-representable wide-char. The sequence starts with
920 an ASCII CAN (0x18, Ctrl-X), followed by the UTF-8 representation of the
921 character. The sys_(cp_)mbstowcs function detects ASCII CAN characters
922 in the input multibyte string and converts the following multibyte
923 sequence in by treating it as an UTF-8 char. If that fails, the ASCII
924 CAN was probably standalone and it gets just copied over as ASCII CAN.
926 - Three cases have to be distinguished for the return value:
928 - dst == NULL; len is ignored, the return value is the number of bytes
929 required for the string without the trailing NUL, just like the return
930 value of the wcstombs function.
932 - dst != NULL, len == (size_t) -1; the return value is the size in bytes
933 of the destination string without the trailing NUL. If the incoming
934 wide char string was not NUL-terminated, the target string won't be
935 NUL-terminated either.
937 - dst != NULL; len != (size_t) -1; the return value is the size in bytes
938 of the destination string without the trailing NUL. The target string
939 will be NUL-terminated, no matter what. If the result is truncated due
940 to buffer size, it's a bug in Cygwin and the buffer in the calling
941 function should be raised.
943 size_t
944 _sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
945 bool is_path)
947 char buf[10];
948 char *ptr = dst;
949 wchar_t *pwcs = (wchar_t *) src;
950 size_t n = 0;
951 mbstate_t ps;
952 save_errno save;
953 wctomb_p f_wctomb = __WCTOMB;
955 if (f_wctomb == __ascii_wctomb)
956 f_wctomb = __utf8_wctomb;
957 memset (&ps, 0, sizeof ps);
958 if (dst == NULL)
959 len = (size_t) -1;
960 while (n < len && nwc-- > 0)
962 wchar_t pw = *pwcs;
963 int bytes;
964 unsigned char cwc;
966 /* Convert UNICODE private use area. Reverse functionality for the
967 ASCII area <= 0x7f (only for path names) is transform_chars above.
968 Reverse functionality for invalid bytes in a multibyte sequence is
969 in _sys_mbstowcs below. */
970 if (is_path && (pw & 0xff00) == 0xf000
971 && (((cwc = (pw & 0xff)) <= 0x7f && tfx_rev_chars[cwc] >= 0xf000)
972 || (cwc >= 0x80 && MB_CUR_MAX > 1)))
974 buf[0] = (char) cwc;
975 bytes = 1;
977 else
979 bytes = f_wctomb (_REENT, buf, pw, &ps);
980 if (bytes == -1 && f_wctomb != __utf8_wctomb)
982 /* Convert chars invalid in the current codepage to a sequence
983 ASCII CAN; UTF-8 representation of invalid char. */
984 buf[0] = 0x18; /* ASCII CAN */
985 bytes = __utf8_wctomb (_REENT, buf + 1, pw, &ps);
986 if (bytes == -1)
988 ++pwcs;
989 ps.__count = 0;
990 continue;
992 ++bytes; /* Add the ASCII CAN to the byte count. */
993 if (ps.__count == -4 && nwc > 0)
995 /* First half of a surrogate pair. */
996 ++pwcs;
997 if ((*pwcs & 0xfc00) != 0xdc00) /* Invalid second half. */
999 ++pwcs;
1000 ps.__count = 0;
1001 continue;
1003 bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, &ps);
1004 nwc--;
1008 if (n + bytes <= len)
1010 if (dst)
1012 for (int i = 0; i < bytes; ++i)
1013 *ptr++ = buf[i];
1015 if (*pwcs++ == 0x00)
1016 break;
1017 n += bytes;
1019 else
1020 break;
1022 if (n && dst && len != (size_t) -1)
1024 n = (n < len) ? n : len - 1;
1025 dst[n] = '\0';
1028 return n;
1031 /* Allocate a buffer big enough for the string, always including the
1032 terminating '\0'. The buffer pointer is returned in *dst_p, the return
1033 value is the number of bytes written to the buffer, as usual.
1034 The "type" argument determines where the resulting buffer is stored.
1035 It's either one of the cygheap_types values, or it's "HEAP_NOTHEAP".
1036 In the latter case the allocation uses simple calloc.
1038 Note that this code is shared by cygserver (which requires it via
1039 __small_vsprintf) and so when built there plain calloc is the
1040 only choice. */
1041 size_t
1042 _sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc,
1043 bool is_path)
1045 size_t ret;
1047 ret = _sys_wcstombs (NULL, (size_t) -1, src, nwc, is_path);
1048 if (ret > 0)
1050 size_t dlen = ret + 1;
1052 if (type == HEAP_NOTHEAP)
1053 *dst_p = (char *) calloc (dlen, sizeof (char));
1054 else
1055 *dst_p = (char *) ccalloc ((cygheap_types) type, dlen, sizeof (char));
1056 if (!*dst_p)
1057 return 0;
1058 ret = _sys_wcstombs (*dst_p, dlen, src, nwc, is_path);
1060 return ret;
1063 /* _sys_mbstowcs is actually most of the time called as sys_mbstowcs with
1064 a 0 codepage. If cp is not 0, the codepage is evaluated and used for the
1065 conversion. This is so that fhandler_console can switch to an alternate
1066 charset, which is the charset returned by GetConsoleCP (). Most of the
1067 time this is used for box and line drawing characters. */
1068 size_t
1069 _sys_mbstowcs (mbtowc_p f_mbtowc, wchar_t *dst, size_t dlen, const char *src,
1070 size_t nms)
1072 wchar_t *ptr = dst;
1073 unsigned const char *pmbs = (unsigned const char *) src;
1074 size_t count = 0;
1075 size_t len = dlen;
1076 int bytes;
1077 mbstate_t ps;
1078 save_errno save;
1080 memset (&ps, 0, sizeof ps);
1081 if (dst == NULL)
1082 len = (size_t)-1;
1083 while (len > 0 && nms > 0)
1085 /* ASCII CAN handling. */
1086 if (*pmbs == 0x18)
1088 /* Sanity check: If this is a lead CAN byte for a following UTF-8
1089 sequence, there must be at least two more bytes left, and the
1090 next byte must be a valid UTF-8 start byte. If the charset
1091 isn't UTF-8 anyway, try to convert the following bytes as UTF-8
1092 sequence. */
1093 if (nms > 2 && pmbs[1] >= 0xc2 && pmbs[1] <= 0xf4
1094 && f_mbtowc != __utf8_mbtowc)
1096 bytes = __utf8_mbtowc (_REENT, ptr, (const char *) pmbs + 1,
1097 nms - 1, &ps);
1098 if (bytes < 0)
1100 /* Invalid UTF-8 sequence? Treat the ASCII CAN character as
1101 stand-alone ASCII CAN char. */
1102 bytes = 1;
1103 if (dst)
1104 *ptr = 0x18;
1105 memset (&ps, 0, sizeof ps);
1107 else
1109 ++bytes; /* Count CAN byte */
1110 if (bytes > 1 && ps.__count == 4)
1112 /* First half of a surrogate. */
1113 wchar_t *ptr2 = dst ? ptr + 1 : NULL;
1114 int bytes2 = __utf8_mbtowc (_REENT, ptr2,
1115 (const char *) pmbs + bytes,
1116 nms - bytes, &ps);
1117 if (bytes2 < 0)
1118 memset (&ps, 0, sizeof ps);
1119 else
1121 bytes += bytes2;
1122 ++count;
1123 ptr = dst ? ptr + 1 : NULL;
1124 --len;
1129 /* Otherwise it's just a simple ASCII CAN. */
1130 else
1132 bytes = 1;
1133 if (dst)
1134 *ptr = 0x18;
1137 else if ((bytes = f_mbtowc (_REENT, ptr, (const char *) pmbs, nms,
1138 &ps)) < 0)
1140 /* The technique is based on a discussion here:
1141 http://www.mail-archive.com/linux-utf8@nl.linux.org/msg00080.html
1143 Invalid bytes in a multibyte sequence are converted to
1144 the private use area which is already used to store ASCII
1145 chars invalid in Windows filenames. This technque allows
1146 to store them in a symmetric way. */
1147 bytes = 1;
1148 if (dst)
1149 *ptr = L'\xf000' | *pmbs;
1150 memset (&ps, 0, sizeof ps);
1153 if (bytes > 0)
1155 pmbs += bytes;
1156 nms -= bytes;
1157 ++count;
1158 ptr = dst ? ptr + 1 : NULL;
1159 --len;
1161 else
1163 if (bytes == 0)
1164 ++count;
1165 break;
1169 if (count && dst)
1171 count = (count < dlen) ? count : dlen - 1;
1172 dst[count] = L'\0';
1175 return count;
1178 /* Same as sys_wcstombs_alloc, just backwards. */
1179 size_t
1180 sys_mbstowcs_alloc (wchar_t **dst_p, int type, const char *src, size_t nms)
1182 size_t ret;
1184 ret = sys_mbstowcs (NULL, (size_t) -1, src, nms);
1185 if (ret > 0)
1187 size_t dlen = ret + 1;
1189 if (type == HEAP_NOTHEAP)
1190 *dst_p = (wchar_t *) calloc (dlen, sizeof (wchar_t));
1191 else
1192 *dst_p = (wchar_t *) ccalloc ((cygheap_types) type, dlen,
1193 sizeof (wchar_t));
1194 if (!*dst_p)
1195 return 0;
1196 ret = sys_mbstowcs (*dst_p, dlen, src, nms);
1198 return ret;
1201 /* Copy string, until c or <nul> is encountered.
1202 NUL-terminate the destination string (s1).
1203 Return pointer to terminating byte in dst string. */
1204 char *
1205 strccpy (char *__restrict s1, const char **__restrict s2, char c)
1207 while (**s2 && **s2 != c)
1208 *s1++ = *((*s2)++);
1209 *s1 = 0;
1211 return s1;
1214 const unsigned char case_folded_lower[] = {
1215 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1216 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
1217 32, '!', '"', '#', '$', '%', '&', 39, '(', ')', '*', '+', ',', '-', '.', '/',
1218 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
1219 '@', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
1220 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[', 92, ']', '^', '_',
1221 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
1222 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 127,
1223 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
1224 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
1225 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
1226 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
1227 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
1228 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
1229 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
1230 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
1233 const unsigned char case_folded_upper[] = {
1234 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1235 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
1236 32, '!', '"', '#', '$', '%', '&', 39, '(', ')', '*', '+', ',', '-', '.', '/',
1237 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
1238 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1239 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', 92, ']', '^', '_',
1240 '`', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1241 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', 127,
1242 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
1243 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
1244 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
1245 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
1246 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
1247 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
1248 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
1249 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
1252 const char isalpha_array[] = {
1253 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1254 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1255 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1256 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1257 0,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
1258 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, 0, 0, 0, 0, 0,
1259 0,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
1260 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, 0, 0, 0, 0, 0,
1261 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1262 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1263 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1264 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1267 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1268 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1271 extern "C" int
1272 cygwin_wcscasecmp (const wchar_t *ws, const wchar_t *wt)
1274 UNICODE_STRING us, ut;
1276 RtlInitUnicodeString (&us, ws);
1277 RtlInitUnicodeString (&ut, wt);
1278 return RtlCompareUnicodeString (&us, &ut, TRUE);
1281 extern "C" int
1282 cygwin_wcsncasecmp (const wchar_t *ws, const wchar_t *wt, size_t n)
1284 UNICODE_STRING us, ut;
1285 size_t ls = 0, lt = 0;
1287 while (ws[ls] && ls < n)
1288 ++ls;
1289 RtlInitCountedUnicodeString (&us, ws, ls * sizeof (WCHAR));
1290 while (wt[lt] && lt < n)
1291 ++lt;
1292 RtlInitCountedUnicodeString (&ut, wt, lt * sizeof (WCHAR));
1293 return RtlCompareUnicodeString (&us, &ut, TRUE);
1296 extern "C" int
1297 cygwin_strcasecmp (const char *cs, const char *ct)
1299 UNICODE_STRING us, ut;
1300 ULONG len, ulen;
1302 len = strlen (cs) + 1;
1303 ulen = len * sizeof (WCHAR);
1304 RtlInitEmptyUnicodeString (&us, (PWCHAR) alloca (ulen), ulen);
1305 us.Length = sys_mbstowcs (us.Buffer, len, cs) * sizeof (WCHAR);
1307 len = strlen (ct) + 1;
1308 ulen = len * sizeof (WCHAR);
1309 RtlInitEmptyUnicodeString (&ut, (PWCHAR) alloca (ulen), ulen);
1310 ut.Length = sys_mbstowcs (ut.Buffer, len, ct) * sizeof (WCHAR);
1312 return RtlCompareUnicodeString (&us, &ut, TRUE);
1315 extern "C" int
1316 cygwin_strncasecmp (const char *cs, const char *ct, size_t n)
1318 UNICODE_STRING us, ut;
1319 ULONG ulen;
1320 size_t ls = 0, lt = 0;
1322 while (cs[ls] && ls < n)
1323 ++ls;
1324 ulen = (ls + 1) * sizeof (WCHAR);
1325 RtlInitEmptyUnicodeString (&us, (PWCHAR) alloca (ulen), ulen);
1326 us.Length = sys_mbstowcs (us.Buffer, ls + 1, cs, ls) * sizeof (WCHAR);
1328 while (ct[lt] && lt < n)
1329 ++lt;
1330 ulen = (lt + 1) * sizeof (WCHAR);
1331 RtlInitEmptyUnicodeString (&ut, (PWCHAR) alloca (ulen), ulen);
1332 ut.Length = sys_mbstowcs (ut.Buffer, lt + 1, ct, lt) * sizeof (WCHAR);
1334 return RtlCompareUnicodeString (&us, &ut, TRUE);
1337 extern "C" char *
1338 strlwr (char *string)
1340 UNICODE_STRING us;
1341 size_t len = (strlen (string) + 1) * sizeof (WCHAR);
1343 us.MaximumLength = len; us.Buffer = (PWCHAR) alloca (len);
1344 us.Length = sys_mbstowcs (us.Buffer, len, string) * sizeof (WCHAR)
1345 - sizeof (WCHAR);
1346 RtlDowncaseUnicodeString (&us, &us, FALSE);
1347 sys_wcstombs (string, len / sizeof (WCHAR), us.Buffer);
1348 return string;
1351 extern "C" char *
1352 strupr (char *string)
1354 UNICODE_STRING us;
1355 size_t len = (strlen (string) + 1) * sizeof (WCHAR);
1357 us.MaximumLength = len; us.Buffer = (PWCHAR) alloca (len);
1358 us.Length = sys_mbstowcs (us.Buffer, len, string) * sizeof (WCHAR)
1359 - sizeof (WCHAR);
1360 RtlUpcaseUnicodeString (&us, &us, FALSE);
1361 sys_wcstombs (string, len / sizeof (WCHAR), us.Buffer);
1362 return string;
1365 /* backslashify: Convert all forward slashes in src path to back slashes
1366 in dst path. Add a trailing slash to dst when trailing_slash_p arg
1367 is set to 1. */
1369 void
1370 backslashify (const char *src, char *dst, bool trailing_slash_p)
1372 const char *start = src;
1374 while (*src)
1376 if (*src == '/')
1377 *dst++ = '\\';
1378 else
1379 *dst++ = *src;
1380 ++src;
1382 if (trailing_slash_p
1383 && src > start
1384 && !isdirsep (src[-1]))
1385 *dst++ = '\\';
1386 *dst++ = 0;
1389 /* slashify: Convert all back slashes in src path to forward slashes
1390 in dst path. Add a trailing slash to dst when trailing_slash_p arg
1391 is set to 1. */
1393 void
1394 slashify (const char *src, char *dst, bool trailing_slash_p)
1396 const char *start = src;
1398 while (*src)
1400 if (*src == '\\')
1401 *dst++ = '/';
1402 else
1403 *dst++ = *src;
1404 ++src;
1406 if (trailing_slash_p
1407 && src > start
1408 && !isdirsep (src[-1]))
1409 *dst++ = '/';
1410 *dst++ = 0;
1413 static WCHAR hex_wchars[] = L"0123456789abcdef";
1415 NTSTATUS
1416 RtlInt64ToHexUnicodeString (ULONGLONG value, PUNICODE_STRING dest,
1417 BOOLEAN append)
1419 USHORT len = append ? dest->Length : 0;
1420 if (dest->MaximumLength - len < 16 * (int) sizeof (WCHAR))
1421 return STATUS_BUFFER_OVERFLOW;
1422 wchar_t *end = (PWCHAR) ((PBYTE) dest->Buffer + len);
1423 PWCHAR p = end + 16;
1424 while (p-- > end)
1426 *p = hex_wchars[value & 0xf];
1427 value >>= 4;
1429 dest->Length += 16 * sizeof (WCHAR);
1430 return STATUS_SUCCESS;