1 /* winduni.c -- unicode support for the windres program.
2 Copyright 1997, 1998, 2000, 2001, 2003, 2005, 2007, 2009
3 Free Software Foundation, Inc.
4 Written by Ian Lance Taylor, Cygnus Support.
5 Rewritten by Kai Tietz, Onevision.
7 This file is part of GNU Binutils.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
25 /* This file contains unicode support routines for the windres
26 program. Ideally, we would have generic unicode support which
27 would work on all systems. However, we don't. Instead, on a
28 Windows host, we are prepared to call some Windows routines. This
29 means that we will generate different output on Windows and Unix
30 hosts, but that seems better than not really supporting unicode at
35 #include "libiberty.h" /* for xstrdup */
37 /* Must be include before windows.h and winnls.h. */
38 #if defined (_WIN32) || defined (__CYGWIN__)
43 #include "safe-ctype.h"
49 static rc_uint_type
wind_WideCharToMultiByte (rc_uint_type
, const unichar
*, char *, rc_uint_type
);
50 static rc_uint_type
wind_MultiByteToWideChar (rc_uint_type
, const char *, unichar
*, rc_uint_type
);
51 static int unichar_isascii (const unichar
*, rc_uint_type
);
53 /* Convert an ASCII string to a unicode string. We just copy it,
54 expanding chars to shorts, rather than doing something intelligent. */
56 #if !defined (_WIN32) && !defined (__CYGWIN__)
58 /* Codepages mapped. */
59 static local_iconv_map codepages
[] =
62 { 1, "WINDOWS-1252" },
65 { 775, "WINBALTRIM" },
72 { 874, "WINDOWS-874" },
77 { 1250, "WINDOWS-1250" },
78 { 1251, "WINDOWS-1251" },
79 { 1252, "WINDOWS-1252" },
80 { 1253, "WINDOWS-1253" },
81 { 1254, "WINDOWS-1254" },
82 { 1255, "WINDOWS-1255" },
83 { 1256, "WINDOWS-1256" },
84 { 1257, "WINDOWS-1257" },
85 { 1258, "WINDOWS-1258" },
88 { CP_UTF16
, "UTF-16" },
89 { (rc_uint_type
) -1, NULL
}
92 /* Languages supported. */
93 static const wind_language_t languages
[] =
95 { 0x0000, 437, 1252, "Neutral", "Neutral" },
96 { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
97 { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" },
98 { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" },
99 { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" },
100 { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
101 { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" },
102 { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" },
103 { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" },
104 { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" },
105 { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokmål)", "Norway" },
106 { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" },
107 { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" },
108 { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" },
109 { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" },
110 { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" },
111 { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
112 { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" },
113 { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" },
114 { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
115 { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
116 { 0x042D, 850, 1252, "Basque", "Spain" },
117 { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
118 { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
119 { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
120 { 0x043C, 437, 1252, "Irish", "Ireland" },
121 { 0x043E, 850, 1252, "Malay", "Malaysia" },
122 { 0x0801, 864, 1256, "Arabic", "Iraq" },
123 { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" },
124 { 0x0807, 850, 1252, "German", "Switzerland" },
125 { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
126 { 0x080C, 850, 1252, "French", "Belgium" },
127 { 0x0810, 850, 1252, "Italian", "Switzerland" },
128 { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
129 { 0x0816, 850, 1252, "Portuguese", "Portugal" },
130 { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
131 { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
132 { 0x0C01, 864, 1256, "Arabic", "Egypt" },
133 { 0x0C04, 950, 950, "Chinese", "Hong Kong" },
134 { 0x0C07, 850, 1252, "German", "Austria" },
135 { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
136 { 0x0C0C, 850, 1252, "French", "Canada"},
137 { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
138 { 0x1001, 864, 1256, "Arabic", "Libya" },
139 { 0x1004, 936, 936, "Chinese", "Singapore" },
140 { 0x1007, 850, 1252, "German", "Luxembourg" },
141 { 0x1009, 850, 1252, "English", "Canada" },
142 { 0x100A, 850, 1252, "Spanish", "Guatemala" },
143 { 0x100C, 850, 1252, "French", "Switzerland" },
144 { 0x1401, 864, 1256, "Arabic", "Algeria" },
145 { 0x1407, 850, 1252, "German", "Liechtenstein" },
146 { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
147 { 0x140C, 850, 1252, "French", "Luxembourg" },
148 { 0x1801, 864, 1256, "Arabic", "Morocco" },
149 { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" },
150 { 0x180C, 850, 1252, "French", "Monaco" },
151 { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
152 { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
153 { 0x2001, 864, 1256, "Arabic", "Oman" },
154 { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" },
155 { 0x2401, 864, 1256, "Arabic", "Yemen" },
156 { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" },
157 { 0x2801, 864, 1256, "Arabic", "Syria" },
158 { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" },
159 { 0x2C01, 864, 1256, "Arabic", "Jordan" },
160 { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
161 { 0x3001, 864, 1256, "Arabic", "Lebanon" },
162 { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" },
163 { 0x3401, 864, 1256, "Arabic", "Kuwait" },
164 { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" },
165 { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
166 { 0x380A, 850, 1252, "Spanish", "Uruguay" },
167 { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
168 { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
169 { 0x4001, 864, 1256, "Arabic", "Qatar" },
170 { 0x400A, 850, 1252, "Spanish", "Bolivia" },
171 { 0x440A, 850, 1252, "Spanish", "El Salvador" },
172 { 0x480A, 850, 1252, "Spanish", "Honduras" },
173 { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
174 { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
175 { (unsigned) -1, 0, 0, NULL
, NULL
}
180 /* Specifies the default codepage to be used for unicode
181 transformations. By default this is CP_ACP. */
182 rc_uint_type wind_default_codepage
= CP_ACP
;
184 /* Specifies the currently used codepage for unicode
185 transformations. By default this is CP_ACP. */
186 rc_uint_type wind_current_codepage
= CP_ACP
;
188 /* Convert an ASCII string to a unicode string. We just copy it,
189 expanding chars to shorts, rather than doing something intelligent. */
192 unicode_from_ascii (rc_uint_type
*length
, unichar
**unicode
, const char *ascii
)
194 unicode_from_codepage (length
, unicode
, ascii
, wind_current_codepage
);
197 /* Convert an ASCII string with length A_LENGTH to a unicode string. We just
198 copy it, expanding chars to shorts, rather than doing something intelligent.
199 This routine converts also \0 within a string. */
202 unicode_from_ascii_len (rc_uint_type
*length
, unichar
**unicode
, const char *ascii
, rc_uint_type a_length
)
205 rc_uint_type tlen
, elen
, idx
= 0;
216 /* Make sure we have zero terminated string. */
217 p
= tmp
= (char *) alloca (a_length
+ 1);
218 memcpy (tmp
, ascii
, a_length
);
231 /* Make room for one more character. */
232 utmp
= (unichar
*) res_alloc (sizeof (unichar
) * (idx
+ 1));
235 memcpy (utmp
, *unicode
, idx
* sizeof (unichar
));
245 elen
= wind_MultiByteToWideChar (wind_current_codepage
, p
, NULL
, 0);
248 utmp
= ((unichar
*) res_alloc (elen
+ sizeof (unichar
) * 2));
249 wind_MultiByteToWideChar (wind_current_codepage
, p
, utmp
, elen
);
250 elen
/= sizeof (unichar
);
255 /* Make room for one more character. */
256 utmp
= (unichar
*) res_alloc (sizeof (unichar
) * (idx
+ 1));
259 memcpy (utmp
, *unicode
, idx
* sizeof (unichar
));
262 utmp
[idx
++] = ((unichar
) *p
) & 0xff;
270 up
= (unichar
*) res_alloc (sizeof (unichar
) * (idx
+ elen
));
272 memcpy (up
, *unicode
, idx
* sizeof (unichar
));
276 memcpy (&up
[idx
], utmp
, sizeof (unichar
) * elen
);
285 /* Convert an unicode string to an ASCII string. We just copy it,
286 shrink shorts to chars, rather than doing something intelligent.
287 Shorts with not within the char range are replaced by '_'. */
290 ascii_from_unicode (rc_uint_type
*length
, const unichar
*unicode
, char **ascii
)
292 codepage_from_unicode (length
, unicode
, ascii
, wind_current_codepage
);
295 /* Print the unicode string UNICODE to the file E. LENGTH is the
296 number of characters to print, or -1 if we should print until the
297 end of the string. FIXME: On a Windows host, we should be calling
298 some Windows function, probably WideCharToMultiByte. */
301 unicode_print (FILE *e
, const unichar
*unicode
, rc_uint_type length
)
309 if ((bfd_signed_vma
) length
> 0)
314 if (ch
== 0 && (bfd_signed_vma
) length
< 0)
319 if ((ch
& 0x7f) == ch
)
325 else if (ISPRINT (ch
))
360 fprintf (e
, "\\%03o", (unsigned int) ch
);
365 else if ((ch
& 0xff) == ch
)
366 fprintf (e
, "\\%03o", (unsigned int) ch
);
368 fprintf (e
, "\\x%04x", (unsigned int) ch
);
372 /* Print a unicode string to a file. */
375 ascii_print (FILE *e
, const char *s
, rc_uint_type length
)
383 if ((bfd_signed_vma
) length
> 0)
388 if (ch
== 0 && (bfd_signed_vma
) length
< 0)
393 if ((ch
& 0x7f) == ch
)
399 else if (ISPRINT (ch
))
434 fprintf (e
, "\\%03o", (unsigned int) ch
);
440 fprintf (e
, "\\%03o", (unsigned int) ch
& 0xff);
445 unichar_len (const unichar
*unicode
)
450 while (unicode
[r
] != 0)
458 unichar_dup (const unichar
*unicode
)
465 for (len
= 0; unicode
[len
] != 0; ++len
)
468 r
= ((unichar
*) res_alloc (len
* sizeof (unichar
)));
469 memcpy (r
, unicode
, len
* sizeof (unichar
));
474 unichar_dup_uppercase (const unichar
*u
)
476 unichar
*r
= unichar_dup (u
);
482 for (i
= 0; r
[i
] != 0; ++i
)
484 if (r
[i
] >= 'a' && r
[i
] <= 'z')
491 unichar_isascii (const unichar
*u
, rc_uint_type len
)
495 if ((bfd_signed_vma
) len
< 0)
498 len
= (rc_uint_type
) unichar_len (u
);
503 for (i
= 0; i
< len
; i
++)
504 if ((u
[i
] & 0xff80) != 0)
510 unicode_print_quoted (FILE *e
, const unichar
*u
, rc_uint_type len
)
512 if (! unichar_isascii (u
, len
))
515 unicode_print (e
, u
, len
);
520 unicode_is_valid_codepage (rc_uint_type cp
)
522 if ((cp
& 0xffff) != cp
)
524 if (cp
== CP_UTF16
|| cp
== CP_ACP
)
527 #if !defined (_WIN32) && !defined (__CYGWIN__)
528 if (! wind_find_codepage_info (cp
))
532 return !! IsValidCodePage ((UINT
) cp
);
536 #if defined (_WIN32) || defined (__CYGWIN__)
538 #define max_cp_string_len 6
541 codepage_from_langid (unsigned short langid
)
543 char cp_string
[max_cp_string_len
];
546 memset (cp_string
, 0, max_cp_string_len
);
547 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
548 but is unavailable on Win95. */
549 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
550 LOCALE_IDEFAULTANSICODEPAGE
,
551 cp_string
, max_cp_string_len
);
552 /* If codepage data for an LCID is not installed on users's system,
553 GetLocaleInfo returns an empty string. Fall back to system ANSI
557 return strtoul (cp_string
, 0, 10);
561 wincodepage_from_langid (unsigned short langid
)
563 char cp_string
[max_cp_string_len
];
566 memset (cp_string
, 0, max_cp_string_len
);
567 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
568 but is unavailable on Win95. */
569 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
570 LOCALE_IDEFAULTCODEPAGE
,
571 cp_string
, max_cp_string_len
);
572 /* If codepage data for an LCID is not installed on users's system,
573 GetLocaleInfo returns an empty string. Fall back to system ANSI
577 return strtoul (cp_string
, 0, 10);
581 lang_from_langid (unsigned short langid
)
586 memset (cp_string
, 0, 261);
587 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
590 /* If codepage data for an LCID is not installed on users's system,
591 GetLocaleInfo returns an empty string. Fall back to system ANSI
594 strcpy (cp_string
, "Neutral");
595 return xstrdup (cp_string
);
599 country_from_langid (unsigned short langid
)
604 memset (cp_string
, 0, 261);
605 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
608 /* If codepage data for an LCID is not installed on users's system,
609 GetLocaleInfo returns an empty string. Fall back to system ANSI
612 strcpy (cp_string
, "Neutral");
613 return xstrdup (cp_string
);
618 const wind_language_t
*
619 wind_find_language_by_id (unsigned id
)
621 #if !defined (_WIN32) && !defined (__CYGWIN__)
626 for (i
= 0; languages
[i
].id
!= (unsigned) -1 && languages
[i
].id
!= id
; i
++)
628 if (languages
[i
].id
== id
)
629 return &languages
[i
];
632 static wind_language_t wl
;
635 wl
.doscp
= codepage_from_langid ((unsigned short) id
);
636 wl
.wincp
= wincodepage_from_langid ((unsigned short) id
);
637 wl
.name
= lang_from_langid ((unsigned short) id
);
638 wl
.country
= country_from_langid ((unsigned short) id
);
644 const local_iconv_map
*
645 wind_find_codepage_info (unsigned cp
)
647 #if !defined (_WIN32) && !defined (__CYGWIN__)
650 for (i
= 0; codepages
[i
].codepage
!= (rc_uint_type
) -1 && codepages
[i
].codepage
!= cp
; i
++)
652 if (codepages
[i
].codepage
== (rc_uint_type
) -1)
654 return &codepages
[i
];
656 static local_iconv_map lim
;
657 if (!unicode_is_valid_codepage (cp
))
665 /* Convert an Codepage string to a unicode string. */
668 unicode_from_codepage (rc_uint_type
*length
, unichar
**u
, const char *src
, rc_uint_type cp
)
672 len
= wind_MultiByteToWideChar (cp
, src
, NULL
, 0);
675 *u
= ((unichar
*) res_alloc (len
));
676 wind_MultiByteToWideChar (cp
, src
, *u
, len
);
678 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
679 this will set *length to -1. */
680 len
-= sizeof (unichar
);
683 *length
= len
/ sizeof (unichar
);
686 /* Convert an unicode string to an codepage string. */
689 codepage_from_unicode (rc_uint_type
*length
, const unichar
*unicode
, char **ascii
, rc_uint_type cp
)
693 len
= wind_WideCharToMultiByte (cp
, unicode
, NULL
, 0);
696 *ascii
= (char *) res_alloc (len
* sizeof (char));
697 wind_WideCharToMultiByte (cp
, unicode
, *ascii
, len
);
699 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
700 this will set *length to -1. */
707 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
709 iconv_onechar (iconv_t cd
, ICONV_CONST
char *s
, char *d
, int d_len
, const char **n_s
, char **n_d
)
713 for (i
= 1; i
<= 32; i
++)
716 ICONV_CONST
char *tmp_s
= s
;
718 size_t s_left
= (size_t) i
;
719 size_t d_left
= (size_t) d_len
;
721 ret
= iconv (cd
, & tmp_s
, & s_left
, & tmp_d
, & d_left
);
723 if (ret
!= (size_t) -1)
735 wind_iconv_cp (rc_uint_type cp
)
737 const local_iconv_map
*lim
= wind_find_codepage_info (cp
);
741 return lim
->iconv_name
;
743 #endif /* HAVE_ICONV */
746 wind_MultiByteToWideChar (rc_uint_type cp
, const char *mb
,
747 unichar
*u
, rc_uint_type u_len
)
749 rc_uint_type ret
= 0;
751 #if defined (_WIN32) || defined (__CYGWIN__)
752 rc_uint_type conv_flags
= MB_PRECOMPOSED
;
754 /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
755 MultiByteToWideChar will set the last error to
756 ERROR_INVALID_FLAGS if we do. */
757 if (cp
== CP_UTF8
|| cp
== CP_UTF7
)
760 ret
= (rc_uint_type
) MultiByteToWideChar (cp
, conv_flags
,
762 /* Convert to bytes. */
763 ret
*= sizeof (unichar
);
765 #elif defined (HAVE_ICONV)
769 const char *iconv_name
= wind_iconv_cp (cp
);
771 if (!mb
|| !iconv_name
)
773 iconv_t cd
= iconv_open ("UTF-16", iconv_name
);
778 const char *n_mb
= "";
782 iret
= iconv_onechar (cd
, (ICONV_CONST
char *) mb
, p_tmp
, 32, & n_mb
, & n_tmp
);
790 size_t l_tmp
= (size_t) (n_tmp
- p_tmp
);
794 if ((size_t) u_len
< l_tmp
)
796 memcpy (u
, tmp
, l_tmp
);
804 if (tmp
[0] == 0 && tmp
[1] == 0)
812 ret
= strlen (mb
) + 1;
813 ret
*= sizeof (unichar
);
814 if (u
!= NULL
&& u_len
!= 0)
818 *u
++ = ((unichar
) *mb
) & 0xff;
821 while (u_len
!= 0 && mb
[-1] != 0);
823 if (u
!= NULL
&& u_len
!= 0)
830 wind_WideCharToMultiByte (rc_uint_type cp
, const unichar
*u
, char *mb
, rc_uint_type mb_len
)
832 rc_uint_type ret
= 0;
833 #if defined (_WIN32) || defined (__CYGWIN__)
834 WINBOOL used_def
= FALSE
;
836 ret
= (rc_uint_type
) WideCharToMultiByte (cp
, 0, u
, -1, mb
, mb_len
,
838 #elif defined (HAVE_ICONV)
842 const char *iconv_name
= wind_iconv_cp (cp
);
844 if (!u
|| !iconv_name
)
846 iconv_t cd
= iconv_open (iconv_name
, "UTF-16");
851 const char *n_u
= "";
855 iret
= iconv_onechar (cd
, (ICONV_CONST
char *) u
, p_tmp
, 32, &n_u
, & n_tmp
);
863 size_t l_tmp
= (size_t) (n_tmp
- p_tmp
);
867 if ((size_t) mb_len
< l_tmp
)
869 memcpy (mb
, tmp
, l_tmp
);
879 u
= (const unichar
*) n_u
;
893 while (*u
!= 0 && mb_len
!= 0)
895 if (u
[0] == (u
[0] & 0x7f))