Release 20050930.
[wine/gsoc-2012-control.git] / libs / unicode / wctomb.c
bloba7353f7c9f6f92c263814afb43410c271cc36a70
1 /*
2 * WideCharToMultiByte implementation
4 * Copyright 2000 Alexandre Julliard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include <string.h>
23 #include "wine/unicode.h"
25 /* search for a character in the unicode_compose_table; helper for compose() */
26 static inline int binary_search( WCHAR ch, int low, int high )
28 extern const WCHAR unicode_compose_table[];
29 while (low <= high)
31 int pos = (low + high) / 2;
32 if (unicode_compose_table[2*pos] < ch)
34 low = pos + 1;
35 continue;
37 if (unicode_compose_table[2*pos] > ch)
39 high = pos - 1;
40 continue;
42 return pos;
44 return -1;
47 /* return the result of the composition of two Unicode chars, or 0 if none */
48 static WCHAR compose( const WCHAR *str )
50 extern const WCHAR unicode_compose_table[];
51 extern const unsigned int unicode_compose_table_size;
53 int idx = 1, low = 0, high = unicode_compose_table_size - 1;
54 for (;;)
56 int pos = binary_search( str[idx], low, high );
57 if (pos == -1) return 0;
58 if (!idx--) return unicode_compose_table[2*pos+1];
59 low = unicode_compose_table[2*pos+1];
60 high = unicode_compose_table[2*pos+3] - 1;
65 /****************************************************************/
66 /* sbcs support */
68 /* check if 'ch' is an acceptable sbcs mapping for 'wch' */
69 static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags,
70 WCHAR wch, unsigned char ch )
72 if (flags & WC_NO_BEST_FIT_CHARS) return (table->cp2uni[ch] == wch);
73 if (ch != (unsigned char)table->info.def_char) return 1;
74 return (wch == table->info.def_unicode_char);
77 /* query necessary dst length for src string */
78 static int get_length_sbcs( const struct sbcs_table *table, int flags,
79 const WCHAR *src, unsigned int srclen, int *used )
81 const unsigned char * const uni2cp_low = table->uni2cp_low;
82 const unsigned short * const uni2cp_high = table->uni2cp_high;
83 int ret, tmp;
84 WCHAR composed;
86 if (!used) used = &tmp; /* avoid checking on every char */
87 *used = 0;
89 for (ret = 0; srclen; ret++, src++, srclen--)
91 WCHAR wch = *src;
92 unsigned char ch;
94 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
96 /* now check if we can use the composed char */
97 ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
98 if (is_valid_sbcs_mapping( table, flags, composed, ch ))
100 /* we have a good mapping, use it */
101 src++;
102 srclen--;
103 continue;
105 /* no mapping for the composed char, check the other flags */
106 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
108 *used = 1;
109 src++; /* skip the non-spacing char */
110 srclen--;
111 continue;
113 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
115 src++;
116 srclen--;
118 /* WC_SEPCHARS is the default */
120 if (!*used)
122 ch = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
123 *used = !is_valid_sbcs_mapping( table, flags, wch, ch );
126 return ret;
129 /* wcstombs for single-byte code page */
130 static inline int wcstombs_sbcs( const struct sbcs_table *table,
131 const WCHAR *src, unsigned int srclen,
132 char *dst, unsigned int dstlen )
134 const unsigned char * const uni2cp_low = table->uni2cp_low;
135 const unsigned short * const uni2cp_high = table->uni2cp_high;
136 int ret = srclen;
138 if (dstlen < srclen)
140 /* buffer too small: fill it up to dstlen and return error */
141 srclen = dstlen;
142 ret = -1;
145 while (srclen >= 16)
147 dst[0] = uni2cp_low[uni2cp_high[src[0] >> 8] + (src[0] & 0xff)];
148 dst[1] = uni2cp_low[uni2cp_high[src[1] >> 8] + (src[1] & 0xff)];
149 dst[2] = uni2cp_low[uni2cp_high[src[2] >> 8] + (src[2] & 0xff)];
150 dst[3] = uni2cp_low[uni2cp_high[src[3] >> 8] + (src[3] & 0xff)];
151 dst[4] = uni2cp_low[uni2cp_high[src[4] >> 8] + (src[4] & 0xff)];
152 dst[5] = uni2cp_low[uni2cp_high[src[5] >> 8] + (src[5] & 0xff)];
153 dst[6] = uni2cp_low[uni2cp_high[src[6] >> 8] + (src[6] & 0xff)];
154 dst[7] = uni2cp_low[uni2cp_high[src[7] >> 8] + (src[7] & 0xff)];
155 dst[8] = uni2cp_low[uni2cp_high[src[8] >> 8] + (src[8] & 0xff)];
156 dst[9] = uni2cp_low[uni2cp_high[src[9] >> 8] + (src[9] & 0xff)];
157 dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
158 dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
159 dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
160 dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
161 dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
162 dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
163 src += 16;
164 dst += 16;
165 srclen -= 16;
168 /* now handle remaining characters */
169 src += srclen;
170 dst += srclen;
171 switch(srclen)
173 case 15: dst[-15] = uni2cp_low[uni2cp_high[src[-15] >> 8] + (src[-15] & 0xff)];
174 case 14: dst[-14] = uni2cp_low[uni2cp_high[src[-14] >> 8] + (src[-14] & 0xff)];
175 case 13: dst[-13] = uni2cp_low[uni2cp_high[src[-13] >> 8] + (src[-13] & 0xff)];
176 case 12: dst[-12] = uni2cp_low[uni2cp_high[src[-12] >> 8] + (src[-12] & 0xff)];
177 case 11: dst[-11] = uni2cp_low[uni2cp_high[src[-11] >> 8] + (src[-11] & 0xff)];
178 case 10: dst[-10] = uni2cp_low[uni2cp_high[src[-10] >> 8] + (src[-10] & 0xff)];
179 case 9: dst[-9] = uni2cp_low[uni2cp_high[src[-9] >> 8] + (src[-9] & 0xff)];
180 case 8: dst[-8] = uni2cp_low[uni2cp_high[src[-8] >> 8] + (src[-8] & 0xff)];
181 case 7: dst[-7] = uni2cp_low[uni2cp_high[src[-7] >> 8] + (src[-7] & 0xff)];
182 case 6: dst[-6] = uni2cp_low[uni2cp_high[src[-6] >> 8] + (src[-6] & 0xff)];
183 case 5: dst[-5] = uni2cp_low[uni2cp_high[src[-5] >> 8] + (src[-5] & 0xff)];
184 case 4: dst[-4] = uni2cp_low[uni2cp_high[src[-4] >> 8] + (src[-4] & 0xff)];
185 case 3: dst[-3] = uni2cp_low[uni2cp_high[src[-3] >> 8] + (src[-3] & 0xff)];
186 case 2: dst[-2] = uni2cp_low[uni2cp_high[src[-2] >> 8] + (src[-2] & 0xff)];
187 case 1: dst[-1] = uni2cp_low[uni2cp_high[src[-1] >> 8] + (src[-1] & 0xff)];
188 case 0: break;
190 return ret;
193 /* slow version of wcstombs_sbcs that handles the various flags */
194 static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags,
195 const WCHAR *src, unsigned int srclen,
196 char *dst, unsigned int dstlen,
197 const char *defchar, int *used )
199 const unsigned char * const uni2cp_low = table->uni2cp_low;
200 const unsigned short * const uni2cp_high = table->uni2cp_high;
201 unsigned char def;
202 unsigned int len;
203 int tmp;
204 WCHAR composed;
206 if (!defchar)
207 def = table->info.def_char & 0xff;
208 else
209 def = *defchar;
211 if (!used) used = &tmp; /* avoid checking on every char */
212 *used = 0;
214 for (len = dstlen; srclen && len; dst++, len--, src++, srclen--)
216 WCHAR wch = *src;
218 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
220 /* now check if we can use the composed char */
221 *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
222 if (is_valid_sbcs_mapping( table, flags, composed, *dst ))
224 /* we have a good mapping, use it */
225 src++;
226 srclen--;
227 continue;
229 /* no mapping for the composed char, check the other flags */
230 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
232 *dst = def;
233 *used = 1;
234 src++; /* skip the non-spacing char */
235 srclen--;
236 continue;
238 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
240 src++;
241 srclen--;
243 /* WC_SEPCHARS is the default */
246 *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
247 if (!is_valid_sbcs_mapping( table, flags, wch, *dst ))
249 *dst = def;
250 *used = 1;
253 if (srclen) return -1; /* overflow */
254 return dstlen - len;
258 /****************************************************************/
259 /* dbcs support */
261 /* check if 'ch' is an acceptable dbcs mapping for 'wch' */
262 static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags,
263 WCHAR wch, unsigned short ch )
265 if (ch == table->info.def_char && wch != table->info.def_unicode_char) return 0;
266 if (flags & WC_NO_BEST_FIT_CHARS)
268 /* check if char maps back to the same Unicode value */
269 if (ch & 0xff00)
271 unsigned char off = table->cp2uni_leadbytes[ch >> 8];
272 return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch);
274 return (table->cp2uni[ch & 0xff] == wch);
276 return 1;
279 /* query necessary dst length for src string */
280 static int get_length_dbcs( const struct dbcs_table *table, int flags,
281 const WCHAR *src, unsigned int srclen,
282 const char *defchar, int *used )
284 const unsigned short * const uni2cp_low = table->uni2cp_low;
285 const unsigned short * const uni2cp_high = table->uni2cp_high;
286 WCHAR defchar_value = table->info.def_char;
287 WCHAR composed;
288 int len, tmp;
290 if (!defchar && !used && !(flags & WC_COMPOSITECHECK))
292 for (len = 0; srclen; srclen--, src++, len++)
294 if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
296 return len;
299 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
300 if (!used) used = &tmp; /* avoid checking on every char */
301 *used = 0;
302 for (len = 0; srclen; len++, srclen--, src++)
304 unsigned short res;
305 WCHAR wch = *src;
307 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
309 /* now check if we can use the composed char */
310 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
312 if (is_valid_dbcs_mapping( table, flags, composed, res ))
314 /* we have a good mapping for the composed char, use it */
315 if (res & 0xff00) len++;
316 src++;
317 srclen--;
318 continue;
320 /* no mapping for the composed char, check the other flags */
321 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
323 if (defchar_value & 0xff00) len++;
324 *used = 1;
325 src++; /* skip the non-spacing char */
326 srclen--;
327 continue;
329 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
331 src++;
332 srclen--;
334 /* WC_SEPCHARS is the default */
337 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
338 if (!is_valid_dbcs_mapping( table, flags, wch, res ))
340 res = defchar_value;
341 *used = 1;
343 if (res & 0xff00) len++;
345 return len;
348 /* wcstombs for double-byte code page */
349 static inline int wcstombs_dbcs( const struct dbcs_table *table,
350 const WCHAR *src, unsigned int srclen,
351 char *dst, unsigned int dstlen )
353 const unsigned short * const uni2cp_low = table->uni2cp_low;
354 const unsigned short * const uni2cp_high = table->uni2cp_high;
355 int len;
357 for (len = dstlen; srclen && len; len--, srclen--, src++)
359 unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
360 if (res & 0xff00)
362 if (len == 1) break; /* do not output a partial char */
363 len--;
364 *dst++ = res >> 8;
366 *dst++ = (char)res;
368 if (srclen) return -1; /* overflow */
369 return dstlen - len;
372 /* slow version of wcstombs_dbcs that handles the various flags */
373 static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
374 const WCHAR *src, unsigned int srclen,
375 char *dst, unsigned int dstlen,
376 const char *defchar, int *used )
378 const unsigned short * const uni2cp_low = table->uni2cp_low;
379 const unsigned short * const uni2cp_high = table->uni2cp_high;
380 WCHAR defchar_value = table->info.def_char;
381 WCHAR composed;
382 int len, tmp;
384 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
385 if (!used) used = &tmp; /* avoid checking on every char */
386 *used = 0;
388 for (len = dstlen; srclen && len; len--, srclen--, src++)
390 unsigned short res;
391 WCHAR wch = *src;
393 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
395 /* now check if we can use the composed char */
396 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
398 if (is_valid_dbcs_mapping( table, flags, composed, res ))
400 /* we have a good mapping for the composed char, use it */
401 src++;
402 srclen--;
403 goto output_char;
405 /* no mapping for the composed char, check the other flags */
406 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
408 res = defchar_value;
409 *used = 1;
410 src++; /* skip the non-spacing char */
411 srclen--;
412 goto output_char;
414 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
416 src++;
417 srclen--;
419 /* WC_SEPCHARS is the default */
422 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
423 if (!is_valid_dbcs_mapping( table, flags, wch, res ))
425 res = defchar_value;
426 *used = 1;
429 output_char:
430 if (res & 0xff00)
432 if (len == 1) break; /* do not output a partial char */
433 len--;
434 *dst++ = res >> 8;
436 *dst++ = (char)res;
438 if (srclen) return -1; /* overflow */
439 return dstlen - len;
442 /* wide char to multi byte string conversion */
443 /* return -1 on dst buffer overflow */
444 int wine_cp_wcstombs( const union cptable *table, int flags,
445 const WCHAR *src, int srclen,
446 char *dst, int dstlen, const char *defchar, int *used )
448 if (table->info.char_size == 1)
450 if (flags || defchar || used)
452 if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen, used );
453 return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen,
454 dst, dstlen, defchar, used );
456 if (!dstlen) return srclen;
457 return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
459 else /* mbcs */
461 if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar, used );
462 if (flags || defchar || used)
463 return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen,
464 dst, dstlen, defchar, used );
465 return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
469 /* CP_SYMBOL implementation */
470 /* return -1 on dst buffer overflow, -2 on invalid character */
471 int wine_cpsymbol_wcstombs( const WCHAR *src, int srclen, char *dst, int dstlen)
473 int len, i;
474 if( dstlen == 0) return srclen;
475 len = dstlen > srclen ? srclen : dstlen;
476 for( i = 0; i < len; i++)
478 WCHAR w = src [ i ];
479 if( w < 0x20 )
480 dst[i] = w;
481 else if( w >= 0xf020 && w < 0xf100)
482 dst[i] = w - 0xf000;
483 else
484 return -2;
486 if( srclen > len) return -1;
487 return len;