Update my e-mail address.
[binutils-gdb.git] / binutils / winduni.c
blobec4f71c5baa417ca934711d2626945c7cfcb30b2
1 /* winduni.c -- unicode support for the windres program.
2 Copyright (C) 1997-2017 Free Software Foundation, Inc.
3 Written by Ian Lance Taylor, Cygnus Support.
4 Rewritten by Kai Tietz, Onevision.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
24 /* This file contains unicode support routines for the windres
25 program. Ideally, we would have generic unicode support which
26 would work on all systems. However, we don't. Instead, on a
27 Windows host, we are prepared to call some Windows routines. This
28 means that we will generate different output on Windows and Unix
29 hosts, but that seems better than not really supporting unicode at
30 all. */
32 #include "sysdep.h"
33 #include "bfd.h"
34 #include "libiberty.h" /* for xstrdup */
35 #include "bucomm.h"
36 /* Must be include before windows.h and winnls.h. */
37 #if defined (_WIN32) || defined (__CYGWIN__)
38 #include <windows.h>
39 #include <winnls.h>
40 #endif
41 #include "winduni.h"
42 #include "safe-ctype.h"
44 #if HAVE_ICONV
45 #include <iconv.h>
46 #endif
48 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
49 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
50 static int unichar_isascii (const unichar *, rc_uint_type);
52 /* Convert an ASCII string to a unicode string. We just copy it,
53 expanding chars to shorts, rather than doing something intelligent. */
55 #if !defined (_WIN32) && !defined (__CYGWIN__)
57 /* Codepages mapped. */
58 static local_iconv_map codepages[] =
60 { 0, "cp1252" },
61 { 1, "WINDOWS-1252" },
62 { 437, "MS-ANSI" },
63 { 737, "MS-GREEK" },
64 { 775, "WINBALTRIM" },
65 { 850, "MS-ANSI" },
66 { 852, "MS-EE" },
67 { 857, "MS-TURK" },
68 { 862, "CP862" },
69 { 864, "CP864" },
70 { 866, "MS-CYRL" },
71 { 874, "WINDOWS-874" },
72 { 932, "CP932" },
73 { 936, "CP936" },
74 { 949, "CP949" },
75 { 950, "CP950" },
76 { 1250, "WINDOWS-1250" },
77 { 1251, "WINDOWS-1251" },
78 { 1252, "WINDOWS-1252" },
79 { 1253, "WINDOWS-1253" },
80 { 1254, "WINDOWS-1254" },
81 { 1255, "WINDOWS-1255" },
82 { 1256, "WINDOWS-1256" },
83 { 1257, "WINDOWS-1257" },
84 { 1258, "WINDOWS-1258" },
85 { CP_UTF7, "UTF-7" },
86 { CP_UTF8, "UTF-8" },
87 { CP_UTF16, "UTF-16LE" },
88 { (rc_uint_type) -1, NULL }
91 /* Languages supported. */
92 static const wind_language_t languages[] =
94 { 0x0000, 437, 1252, "Neutral", "Neutral" },
95 { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
96 { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" },
97 { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" },
98 { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" },
99 { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
100 { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" },
101 { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" },
102 { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" },
103 { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" },
104 { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" },
105 { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" },
106 { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" },
107 { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" },
108 { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" },
109 { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" },
110 { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
111 { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" },
112 { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" },
113 { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
114 { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
115 { 0x042D, 850, 1252, "Basque", "Spain" },
116 { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
117 { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
118 { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
119 { 0x043C, 437, 1252, "Irish", "Ireland" },
120 { 0x043E, 850, 1252, "Malay", "Malaysia" },
121 { 0x0801, 864, 1256, "Arabic", "Iraq" },
122 { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" },
123 { 0x0807, 850, 1252, "German", "Switzerland" },
124 { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
125 { 0x080C, 850, 1252, "French", "Belgium" },
126 { 0x0810, 850, 1252, "Italian", "Switzerland" },
127 { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
128 { 0x0816, 850, 1252, "Portuguese", "Portugal" },
129 { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
130 { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
131 { 0x0C01, 864, 1256, "Arabic", "Egypt" },
132 { 0x0C04, 950, 950, "Chinese", "Hong Kong" },
133 { 0x0C07, 850, 1252, "German", "Austria" },
134 { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
135 { 0x0C0C, 850, 1252, "French", "Canada"},
136 { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
137 { 0x1001, 864, 1256, "Arabic", "Libya" },
138 { 0x1004, 936, 936, "Chinese", "Singapore" },
139 { 0x1007, 850, 1252, "German", "Luxembourg" },
140 { 0x1009, 850, 1252, "English", "Canada" },
141 { 0x100A, 850, 1252, "Spanish", "Guatemala" },
142 { 0x100C, 850, 1252, "French", "Switzerland" },
143 { 0x1401, 864, 1256, "Arabic", "Algeria" },
144 { 0x1407, 850, 1252, "German", "Liechtenstein" },
145 { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
146 { 0x140C, 850, 1252, "French", "Luxembourg" },
147 { 0x1801, 864, 1256, "Arabic", "Morocco" },
148 { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" },
149 { 0x180C, 850, 1252, "French", "Monaco" },
150 { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
151 { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
152 { 0x2001, 864, 1256, "Arabic", "Oman" },
153 { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" },
154 { 0x2401, 864, 1256, "Arabic", "Yemen" },
155 { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" },
156 { 0x2801, 864, 1256, "Arabic", "Syria" },
157 { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" },
158 { 0x2C01, 864, 1256, "Arabic", "Jordan" },
159 { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
160 { 0x3001, 864, 1256, "Arabic", "Lebanon" },
161 { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" },
162 { 0x3401, 864, 1256, "Arabic", "Kuwait" },
163 { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" },
164 { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
165 { 0x380A, 850, 1252, "Spanish", "Uruguay" },
166 { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
167 { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
168 { 0x4001, 864, 1256, "Arabic", "Qatar" },
169 { 0x400A, 850, 1252, "Spanish", "Bolivia" },
170 { 0x440A, 850, 1252, "Spanish", "El Salvador" },
171 { 0x480A, 850, 1252, "Spanish", "Honduras" },
172 { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
173 { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
174 { (unsigned) -1, 0, 0, NULL, NULL }
177 #endif
179 /* Specifies the default codepage to be used for unicode
180 transformations. By default this is CP_ACP. */
181 rc_uint_type wind_default_codepage = CP_ACP;
183 /* Specifies the currently used codepage for unicode
184 transformations. By default this is CP_ACP. */
185 rc_uint_type wind_current_codepage = CP_ACP;
187 /* Convert an ASCII string to a unicode string. We just copy it,
188 expanding chars to shorts, rather than doing something intelligent. */
190 void
191 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
193 unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
196 /* Convert an ASCII string with length A_LENGTH to a unicode string. We just
197 copy it, expanding chars to shorts, rather than doing something intelligent.
198 This routine converts also \0 within a string. */
200 void
201 unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
203 char *tmp, *p;
204 rc_uint_type tlen, elen, idx = 0;
206 *unicode = NULL;
208 if (!a_length)
210 if (length)
211 *length = 0;
212 return;
215 /* Make sure we have zero terminated string. */
216 p = tmp = (char *) xmalloc (a_length + 1);
217 memcpy (tmp, ascii, a_length);
218 tmp[a_length] = 0;
220 while (a_length > 0)
222 unichar *utmp, *up;
224 tlen = strlen (p);
226 if (tlen > a_length)
227 tlen = a_length;
228 if (*p == 0)
230 /* Make room for one more character. */
231 utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
232 if (idx > 0)
234 memcpy (utmp, *unicode, idx * sizeof (unichar));
236 *unicode = utmp;
237 utmp[idx++] = 0;
238 --a_length;
239 p++;
240 continue;
242 utmp = NULL;
243 elen = 0;
244 elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
245 if (elen)
247 utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
248 wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
249 elen /= sizeof (unichar);
250 elen --;
252 else
254 /* Make room for one more character. */
255 utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
256 if (idx > 0)
258 memcpy (utmp, *unicode, idx * sizeof (unichar));
260 *unicode = utmp;
261 utmp[idx++] = ((unichar) *p) & 0xff;
262 --a_length;
263 p++;
264 continue;
266 p += tlen;
267 a_length -= tlen;
269 up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
270 if (idx > 0)
271 memcpy (up, *unicode, idx * sizeof (unichar));
273 *unicode = up;
274 if (elen)
275 memcpy (&up[idx], utmp, sizeof (unichar) * elen);
277 idx += elen;
280 if (length)
281 *length = idx;
283 free (tmp);
286 /* Convert an unicode string to an ASCII string. We just copy it,
287 shrink shorts to chars, rather than doing something intelligent.
288 Shorts with not within the char range are replaced by '_'. */
290 void
291 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
293 codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
296 /* Print the unicode string UNICODE to the file E. LENGTH is the
297 number of characters to print, or -1 if we should print until the
298 end of the string. FIXME: On a Windows host, we should be calling
299 some Windows function, probably WideCharToMultiByte. */
301 void
302 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
304 while (1)
306 unichar ch;
308 if (length == 0)
309 return;
310 if ((bfd_signed_vma) length > 0)
311 --length;
313 ch = *unicode;
315 if (ch == 0 && (bfd_signed_vma) length < 0)
316 return;
318 ++unicode;
320 if ((ch & 0x7f) == ch)
322 if (ch == '\\')
323 fputs ("\\\\", e);
324 else if (ch == '"')
325 fputs ("\"\"", e);
326 else if (ISPRINT (ch))
327 putc (ch, e);
328 else
330 switch (ch)
332 case ESCAPE_A:
333 fputs ("\\a", e);
334 break;
336 case ESCAPE_B:
337 fputs ("\\b", e);
338 break;
340 case ESCAPE_F:
341 fputs ("\\f", e);
342 break;
344 case ESCAPE_N:
345 fputs ("\\n", e);
346 break;
348 case ESCAPE_R:
349 fputs ("\\r", e);
350 break;
352 case ESCAPE_T:
353 fputs ("\\t", e);
354 break;
356 case ESCAPE_V:
357 fputs ("\\v", e);
358 break;
360 default:
361 fprintf (e, "\\%03o", (unsigned int) ch);
362 break;
366 else if ((ch & 0xff) == ch)
367 fprintf (e, "\\%03o", (unsigned int) ch);
368 else
369 fprintf (e, "\\x%04x", (unsigned int) ch);
373 /* Print a unicode string to a file. */
375 void
376 ascii_print (FILE *e, const char *s, rc_uint_type length)
378 while (1)
380 char ch;
382 if (length == 0)
383 return;
384 if ((bfd_signed_vma) length > 0)
385 --length;
387 ch = *s;
389 if (ch == 0 && (bfd_signed_vma) length < 0)
390 return;
392 ++s;
394 if ((ch & 0x7f) == ch)
396 if (ch == '\\')
397 fputs ("\\\\", e);
398 else if (ch == '"')
399 fputs ("\"\"", e);
400 else if (ISPRINT (ch))
401 putc (ch, e);
402 else
404 switch (ch)
406 case ESCAPE_A:
407 fputs ("\\a", e);
408 break;
410 case ESCAPE_B:
411 fputs ("\\b", e);
412 break;
414 case ESCAPE_F:
415 fputs ("\\f", e);
416 break;
418 case ESCAPE_N:
419 fputs ("\\n", e);
420 break;
422 case ESCAPE_R:
423 fputs ("\\r", e);
424 break;
426 case ESCAPE_T:
427 fputs ("\\t", e);
428 break;
430 case ESCAPE_V:
431 fputs ("\\v", e);
432 break;
434 default:
435 fprintf (e, "\\%03o", (unsigned int) ch);
436 break;
440 else
441 fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
445 rc_uint_type
446 unichar_len (const unichar *unicode)
448 rc_uint_type r = 0;
450 if (unicode)
451 while (unicode[r] != 0)
452 r++;
453 else
454 --r;
455 return r;
458 unichar *
459 unichar_dup (const unichar *unicode)
461 unichar *r;
462 int len;
464 if (! unicode)
465 return NULL;
466 for (len = 0; unicode[len] != 0; ++len)
468 ++len;
469 r = ((unichar *) res_alloc (len * sizeof (unichar)));
470 memcpy (r, unicode, len * sizeof (unichar));
471 return r;
474 unichar *
475 unichar_dup_uppercase (const unichar *u)
477 unichar *r = unichar_dup (u);
478 int i;
480 if (! r)
481 return NULL;
483 for (i = 0; r[i] != 0; ++i)
485 if (r[i] >= 'a' && r[i] <= 'z')
486 r[i] &= 0xdf;
488 return r;
491 static int
492 unichar_isascii (const unichar *u, rc_uint_type len)
494 rc_uint_type i;
496 if ((bfd_signed_vma) len < 0)
498 if (u)
499 len = (rc_uint_type) unichar_len (u);
500 else
501 len = 0;
504 for (i = 0; i < len; i++)
505 if ((u[i] & 0xff80) != 0)
506 return 0;
507 return 1;
510 void
511 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
513 if (! unichar_isascii (u, len))
514 fputc ('L', e);
515 fputc ('"', e);
516 unicode_print (e, u, len);
517 fputc ('"', e);
521 unicode_is_valid_codepage (rc_uint_type cp)
523 if ((cp & 0xffff) != cp)
524 return 0;
525 if (cp == CP_UTF16 || cp == CP_ACP)
526 return 1;
528 #if !defined (_WIN32) && !defined (__CYGWIN__)
529 if (! wind_find_codepage_info (cp))
530 return 0;
531 return 1;
532 #else
533 return !! IsValidCodePage ((UINT) cp);
534 #endif
537 #if defined (_WIN32) || defined (__CYGWIN__)
539 #define max_cp_string_len 6
541 static unsigned int
542 codepage_from_langid (unsigned short langid)
544 char cp_string [max_cp_string_len];
545 int c;
547 memset (cp_string, 0, max_cp_string_len);
548 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
549 but is unavailable on Win95. */
550 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
551 LOCALE_IDEFAULTANSICODEPAGE,
552 cp_string, max_cp_string_len);
553 /* If codepage data for an LCID is not installed on users's system,
554 GetLocaleInfo returns an empty string. Fall back to system ANSI
555 default. */
556 if (c == 0)
557 return CP_ACP;
558 return strtoul (cp_string, 0, 10);
561 static unsigned int
562 wincodepage_from_langid (unsigned short langid)
564 char cp_string [max_cp_string_len];
565 int c;
567 memset (cp_string, 0, max_cp_string_len);
568 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
569 but is unavailable on Win95. */
570 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
571 LOCALE_IDEFAULTCODEPAGE,
572 cp_string, max_cp_string_len);
573 /* If codepage data for an LCID is not installed on users's system,
574 GetLocaleInfo returns an empty string. Fall back to system ANSI
575 default. */
576 if (c == 0)
577 return CP_OEM;
578 return strtoul (cp_string, 0, 10);
581 static char *
582 lang_from_langid (unsigned short langid)
584 char cp_string[261];
585 int c;
587 memset (cp_string, 0, 261);
588 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
589 LOCALE_SENGLANGUAGE,
590 cp_string, 260);
591 /* If codepage data for an LCID is not installed on users's system,
592 GetLocaleInfo returns an empty string. Fall back to system ANSI
593 default. */
594 if (c == 0)
595 strcpy (cp_string, "Neutral");
596 return xstrdup (cp_string);
599 static char *
600 country_from_langid (unsigned short langid)
602 char cp_string[261];
603 int c;
605 memset (cp_string, 0, 261);
606 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
607 LOCALE_SENGCOUNTRY,
608 cp_string, 260);
609 /* If codepage data for an LCID is not installed on users's system,
610 GetLocaleInfo returns an empty string. Fall back to system ANSI
611 default. */
612 if (c == 0)
613 strcpy (cp_string, "Neutral");
614 return xstrdup (cp_string);
617 #endif
619 const wind_language_t *
620 wind_find_language_by_id (unsigned id)
622 #if !defined (_WIN32) && !defined (__CYGWIN__)
623 int i;
625 if (! id)
626 return NULL;
627 for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
629 if (languages[i].id == id)
630 return &languages[i];
631 return NULL;
632 #else
633 static wind_language_t wl;
635 wl.id = id;
636 wl.doscp = codepage_from_langid ((unsigned short) id);
637 wl.wincp = wincodepage_from_langid ((unsigned short) id);
638 wl.name = lang_from_langid ((unsigned short) id);
639 wl.country = country_from_langid ((unsigned short) id);
641 return & wl;
642 #endif
645 const local_iconv_map *
646 wind_find_codepage_info (unsigned cp)
648 #if !defined (_WIN32) && !defined (__CYGWIN__)
649 int i;
651 for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
653 if (codepages[i].codepage == (rc_uint_type) -1)
654 return NULL;
655 return &codepages[i];
656 #else
657 static local_iconv_map lim;
658 if (!unicode_is_valid_codepage (cp))
659 return NULL;
660 lim.codepage = cp;
661 lim.iconv_name = "";
662 return & lim;
663 #endif
666 /* Convert an Codepage string to a unicode string. */
668 void
669 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
671 rc_uint_type len;
673 len = wind_MultiByteToWideChar (cp, src, NULL, 0);
674 if (len)
676 *u = ((unichar *) res_alloc (len));
677 wind_MultiByteToWideChar (cp, src, *u, len);
679 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
680 this will set *length to -1. */
681 len -= sizeof (unichar);
683 if (length != NULL)
684 *length = len / sizeof (unichar);
687 /* Convert an unicode string to an codepage string. */
689 void
690 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
692 rc_uint_type len;
694 len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
695 if (len)
697 *ascii = (char *) res_alloc (len * sizeof (char));
698 wind_WideCharToMultiByte (cp, unicode, *ascii, len);
700 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
701 this will set *length to -1. */
702 len--;
704 if (length != NULL)
705 *length = len;
708 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
709 static int
710 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
712 int i;
714 for (i = 1; i <= 32; i++)
716 char *tmp_d = d;
717 ICONV_CONST char *tmp_s = s;
718 size_t ret;
719 size_t s_left = (size_t) i;
720 size_t d_left = (size_t) d_len;
722 ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
724 if (ret != (size_t) -1)
726 *n_s = tmp_s;
727 *n_d = tmp_d;
728 return 0;
732 return 1;
735 static const char *
736 wind_iconv_cp (rc_uint_type cp)
738 const local_iconv_map *lim = wind_find_codepage_info (cp);
740 if (!lim)
741 return NULL;
742 return lim->iconv_name;
744 #endif /* HAVE_ICONV */
746 static rc_uint_type
747 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
748 unichar *u, rc_uint_type u_len)
750 rc_uint_type ret = 0;
752 #if defined (_WIN32) || defined (__CYGWIN__)
753 rc_uint_type conv_flags = MB_PRECOMPOSED;
755 /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
756 MultiByteToWideChar will set the last error to
757 ERROR_INVALID_FLAGS if we do. */
758 if (cp == CP_UTF8 || cp == CP_UTF7)
759 conv_flags = 0;
761 ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
762 mb, -1, u, u_len);
763 /* Convert to bytes. */
764 ret *= sizeof (unichar);
766 #elif defined (HAVE_ICONV)
767 int first = 1;
768 char tmp[32];
769 char *p_tmp;
770 const char *iconv_name = wind_iconv_cp (cp);
772 if (!mb || !iconv_name)
773 return 0;
774 iconv_t cd = iconv_open ("UTF-16LE", iconv_name);
776 while (1)
778 int iret;
779 const char *n_mb = "";
780 char *n_tmp = "";
782 p_tmp = tmp;
783 iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
784 if (first)
786 first = 0;
787 continue;
789 if (!iret)
791 size_t l_tmp = (size_t) (n_tmp - p_tmp);
793 if (u)
795 if ((size_t) u_len < l_tmp)
796 break;
797 memcpy (u, tmp, l_tmp);
798 u += l_tmp/2;
799 u_len -= l_tmp;
801 ret += l_tmp;
803 else
804 break;
805 if (tmp[0] == 0 && tmp[1] == 0)
806 break;
807 mb = n_mb;
809 iconv_close (cd);
810 #else
811 if (cp)
812 ret = 0;
813 ret = strlen (mb) + 1;
814 ret *= sizeof (unichar);
815 if (u != NULL && u_len != 0)
819 *u++ = ((unichar) *mb) & 0xff;
820 --u_len; mb++;
822 while (u_len != 0 && mb[-1] != 0);
824 if (u != NULL && u_len != 0)
825 *u = 0;
826 #endif
827 return ret;
830 static rc_uint_type
831 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
833 rc_uint_type ret = 0;
834 #if defined (_WIN32) || defined (__CYGWIN__)
835 WINBOOL used_def = FALSE;
837 ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
838 NULL, & used_def);
839 #elif defined (HAVE_ICONV)
840 int first = 1;
841 char tmp[32];
842 char *p_tmp;
843 const char *iconv_name = wind_iconv_cp (cp);
845 if (!u || !iconv_name)
846 return 0;
847 iconv_t cd = iconv_open (iconv_name, "UTF-16LE");
849 while (1)
851 int iret;
852 const char *n_u = "";
853 char *n_tmp = "";
855 p_tmp = tmp;
856 iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
857 if (first)
859 first = 0;
860 continue;
862 if (!iret)
864 size_t l_tmp = (size_t) (n_tmp - p_tmp);
866 if (mb)
868 if ((size_t) mb_len < l_tmp)
869 break;
870 memcpy (mb, tmp, l_tmp);
871 mb += l_tmp;
872 mb_len -= l_tmp;
874 ret += l_tmp;
876 else
877 break;
878 if (u[0] == 0)
879 break;
880 u = (const unichar *) n_u;
882 iconv_close (cd);
883 #else
884 if (cp)
885 ret = 0;
887 while (u[ret] != 0)
888 ++ret;
890 ++ret;
892 if (mb)
894 while (*u != 0 && mb_len != 0)
896 if (u[0] == (u[0] & 0x7f))
897 *mb++ = (char) u[0];
898 else
899 *mb++ = '_';
900 ++u; --mb_len;
902 if (mb_len != 0)
903 *mb = 0;
905 #endif
906 return ret;