2 * WideCharToMultiByte implementation
4 * Copyright 2000 Alexandre Julliard
10 #include "wine/unicode.h"
12 /* search for a character in the unicode_compose_table; helper for compose() */
13 static inline int binary_search( WCHAR ch
, int low
, int high
)
15 extern const WCHAR unicode_compose_table
[];
18 int pos
= (low
+ high
) / 2;
19 if (unicode_compose_table
[2*pos
] < ch
)
24 if (unicode_compose_table
[2*pos
] > ch
)
34 /* return the result of the composition of two Unicode chars, or 0 if none */
35 static WCHAR
compose( const WCHAR
*str
)
37 extern const WCHAR unicode_compose_table
[];
38 extern const unsigned int unicode_compose_table_size
;
40 int idx
= 1, low
= 0, high
= unicode_compose_table_size
- 1;
43 int pos
= binary_search( str
[idx
], low
, high
);
44 if (pos
== -1) return 0;
45 if (!idx
--) return unicode_compose_table
[2*pos
+1];
46 low
= unicode_compose_table
[2*pos
+1];
47 high
= unicode_compose_table
[2*pos
+3] - 1;
52 /****************************************************************/
55 /* check if 'ch' is an acceptable sbcs mapping for 'wch' */
56 static inline int is_valid_sbcs_mapping( const struct sbcs_table
*table
, int flags
,
57 WCHAR wch
, unsigned char ch
)
59 if (flags
& WC_NO_BEST_FIT_CHARS
) return (table
->cp2uni
[ch
] == wch
);
60 if (ch
!= (unsigned char)table
->info
.def_char
) return 1;
61 return (wch
== table
->info
.def_unicode_char
);
64 /* query necessary dst length for src string */
65 static inline int get_length_sbcs( const struct sbcs_table
*table
, int flags
,
66 const WCHAR
*src
, unsigned int srclen
)
68 unsigned int ret
= srclen
;
70 if (flags
& WC_COMPOSITECHECK
)
72 const unsigned char * const uni2cp_low
= table
->uni2cp_low
;
73 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
76 for (ret
= 0; srclen
> 1; ret
++, srclen
--, src
++)
78 if (!(composed
= compose(src
))) continue;
79 /* check if we should skip the next char */
81 /* in WC_DEFAULTCHAR and WC_DISCARDNS mode, we always skip */
82 /* the next char no matter if the composition is valid or not */
83 if (!(flags
& (WC_DEFAULTCHAR
|WC_DISCARDNS
)))
85 unsigned char ch
= uni2cp_low
[uni2cp_high
[composed
>> 8] + (composed
& 0xff)];
86 if (!is_valid_sbcs_mapping( table
, flags
, composed
, ch
)) continue;
91 if (srclen
) ret
++; /* last char */
96 /* wcstombs for single-byte code page */
97 static inline int wcstombs_sbcs( const struct sbcs_table
*table
,
98 const WCHAR
*src
, unsigned int srclen
,
99 char *dst
, unsigned int dstlen
)
101 const unsigned char * const uni2cp_low
= table
->uni2cp_low
;
102 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
107 /* buffer too small: fill it up to dstlen and return error */
117 case 16: dst
[15] = uni2cp_low
[uni2cp_high
[src
[15] >> 8] + (src
[15] & 0xff)];
118 case 15: dst
[14] = uni2cp_low
[uni2cp_high
[src
[14] >> 8] + (src
[14] & 0xff)];
119 case 14: dst
[13] = uni2cp_low
[uni2cp_high
[src
[13] >> 8] + (src
[13] & 0xff)];
120 case 13: dst
[12] = uni2cp_low
[uni2cp_high
[src
[12] >> 8] + (src
[12] & 0xff)];
121 case 12: dst
[11] = uni2cp_low
[uni2cp_high
[src
[11] >> 8] + (src
[11] & 0xff)];
122 case 11: dst
[10] = uni2cp_low
[uni2cp_high
[src
[10] >> 8] + (src
[10] & 0xff)];
123 case 10: dst
[9] = uni2cp_low
[uni2cp_high
[src
[9] >> 8] + (src
[9] & 0xff)];
124 case 9: dst
[8] = uni2cp_low
[uni2cp_high
[src
[8] >> 8] + (src
[8] & 0xff)];
125 case 8: dst
[7] = uni2cp_low
[uni2cp_high
[src
[7] >> 8] + (src
[7] & 0xff)];
126 case 7: dst
[6] = uni2cp_low
[uni2cp_high
[src
[6] >> 8] + (src
[6] & 0xff)];
127 case 6: dst
[5] = uni2cp_low
[uni2cp_high
[src
[5] >> 8] + (src
[5] & 0xff)];
128 case 5: dst
[4] = uni2cp_low
[uni2cp_high
[src
[4] >> 8] + (src
[4] & 0xff)];
129 case 4: dst
[3] = uni2cp_low
[uni2cp_high
[src
[3] >> 8] + (src
[3] & 0xff)];
130 case 3: dst
[2] = uni2cp_low
[uni2cp_high
[src
[2] >> 8] + (src
[2] & 0xff)];
131 case 2: dst
[1] = uni2cp_low
[uni2cp_high
[src
[1] >> 8] + (src
[1] & 0xff)];
132 case 1: dst
[0] = uni2cp_low
[uni2cp_high
[src
[0] >> 8] + (src
[0] & 0xff)];
135 if (srclen
< 16) return ret
;
142 /* slow version of wcstombs_sbcs that handles the various flags */
143 static int wcstombs_sbcs_slow( const struct sbcs_table
*table
, int flags
,
144 const WCHAR
*src
, unsigned int srclen
,
145 char *dst
, unsigned int dstlen
,
146 const char *defchar
, int *used
)
148 const unsigned char * const uni2cp_low
= table
->uni2cp_low
;
149 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
150 const unsigned char table_default
= table
->info
.def_char
& 0xff;
155 if (!defchar
) defchar
= &table_default
;
156 if (!used
) used
= &tmp
; /* avoid checking on every char */
159 for (len
= dstlen
; srclen
&& len
; dst
++, len
--, src
++, srclen
--)
163 if ((flags
& WC_COMPOSITECHECK
) && (srclen
> 1) && (composed
= compose(src
)))
165 /* now check if we can use the composed char */
166 *dst
= uni2cp_low
[uni2cp_high
[composed
>> 8] + (composed
& 0xff)];
167 if (is_valid_sbcs_mapping( table
, flags
, composed
, *dst
))
169 /* we have a good mapping, use it */
174 /* no mapping for the composed char, check the other flags */
175 if (flags
& WC_DEFAULTCHAR
) /* use the default char instead */
179 src
++; /* skip the non-spacing char */
183 if (flags
& WC_DISCARDNS
) /* skip the second char of the composition */
188 /* WC_SEPCHARS is the default */
191 *dst
= uni2cp_low
[uni2cp_high
[wch
>> 8] + (wch
& 0xff)];
192 if (!is_valid_sbcs_mapping( table
, flags
, wch
, *dst
))
198 if (srclen
) return -1; /* overflow */
203 /****************************************************************/
206 /* check if 'ch' is an acceptable dbcs mapping for 'wch' */
207 static inline int is_valid_dbcs_mapping( const struct dbcs_table
*table
, int flags
,
208 WCHAR wch
, unsigned short ch
)
210 if (ch
== table
->info
.def_char
&& wch
!= table
->info
.def_unicode_char
) return 0;
211 if (flags
& WC_NO_BEST_FIT_CHARS
)
213 /* check if char maps back to the same Unicode value */
216 unsigned char off
= table
->cp2uni_leadbytes
[ch
>> 8];
217 return (table
->cp2uni
[(off
<< 8) + (ch
& 0xff)] == wch
);
219 return (table
->cp2uni
[ch
& 0xff] == wch
);
224 /* query necessary dst length for src string */
225 static int get_length_dbcs( const struct dbcs_table
*table
, int flags
,
226 const WCHAR
*src
, unsigned int srclen
,
227 const char *defchar
)
229 const unsigned short * const uni2cp_low
= table
->uni2cp_low
;
230 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
231 WCHAR defchar_value
= table
->info
.def_char
;
235 if (!defchar
&& !(flags
& WC_COMPOSITECHECK
))
237 for (len
= 0; srclen
; srclen
--, src
++, len
++)
239 if (uni2cp_low
[uni2cp_high
[*src
>> 8] + (*src
& 0xff)] & 0xff00) len
++;
244 if (defchar
) defchar_value
= defchar
[1] ? ((defchar
[0] << 8) | defchar
[1]) : defchar
[0];
245 for (len
= 0; srclen
; len
++, srclen
--, src
++)
250 if ((flags
& WC_COMPOSITECHECK
) && (srclen
> 1) && (composed
= compose(src
)))
252 /* now check if we can use the composed char */
253 res
= uni2cp_low
[uni2cp_high
[composed
>> 8] + (composed
& 0xff)];
255 if (is_valid_dbcs_mapping( table
, flags
, composed
, res
))
257 /* we have a good mapping for the composed char, use it */
258 if (res
& 0xff00) len
++;
263 /* no mapping for the composed char, check the other flags */
264 if (flags
& WC_DEFAULTCHAR
) /* use the default char instead */
266 if (defchar_value
& 0xff00) len
++;
267 src
++; /* skip the non-spacing char */
271 if (flags
& WC_DISCARDNS
) /* skip the second char of the composition */
276 /* WC_SEPCHARS is the default */
279 res
= uni2cp_low
[uni2cp_high
[wch
>> 8] + (wch
& 0xff)];
280 if (!is_valid_dbcs_mapping( table
, flags
, wch
, res
)) res
= defchar_value
;
281 if (res
& 0xff00) len
++;
286 /* wcstombs for double-byte code page */
287 static inline int wcstombs_dbcs( const struct dbcs_table
*table
,
288 const WCHAR
*src
, unsigned int srclen
,
289 char *dst
, unsigned int dstlen
)
291 const unsigned short * const uni2cp_low
= table
->uni2cp_low
;
292 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
295 for (len
= dstlen
; srclen
&& len
; len
--, srclen
--, src
++)
297 unsigned short res
= uni2cp_low
[uni2cp_high
[*src
>> 8] + (*src
& 0xff)];
300 if (len
== 1) break; /* do not output a partial char */
306 if (srclen
) return -1; /* overflow */
310 /* slow version of wcstombs_dbcs that handles the various flags */
311 static int wcstombs_dbcs_slow( const struct dbcs_table
*table
, int flags
,
312 const WCHAR
*src
, unsigned int srclen
,
313 char *dst
, unsigned int dstlen
,
314 const char *defchar
, int *used
)
316 const unsigned short * const uni2cp_low
= table
->uni2cp_low
;
317 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
318 WCHAR defchar_value
= table
->info
.def_char
;
322 if (defchar
) defchar_value
= defchar
[1] ? ((defchar
[0] << 8) | defchar
[1]) : defchar
[0];
323 if (!used
) used
= &tmp
; /* avoid checking on every char */
326 for (len
= dstlen
; srclen
&& len
; len
--, srclen
--, src
++)
331 if ((flags
& WC_COMPOSITECHECK
) && (srclen
> 1) && (composed
= compose(src
)))
333 /* now check if we can use the composed char */
334 res
= uni2cp_low
[uni2cp_high
[composed
>> 8] + (composed
& 0xff)];
336 if (is_valid_dbcs_mapping( table
, flags
, composed
, res
))
338 /* we have a good mapping for the composed char, use it */
343 /* no mapping for the composed char, check the other flags */
344 if (flags
& WC_DEFAULTCHAR
) /* use the default char instead */
348 src
++; /* skip the non-spacing char */
352 if (flags
& WC_DISCARDNS
) /* skip the second char of the composition */
357 /* WC_SEPCHARS is the default */
360 res
= uni2cp_low
[uni2cp_high
[wch
>> 8] + (wch
& 0xff)];
361 if (!is_valid_dbcs_mapping( table
, flags
, wch
, res
))
370 if (len
== 1) break; /* do not output a partial char */
376 if (srclen
) return -1; /* overflow */
380 /* wide char to multi byte string conversion */
381 /* return -1 on dst buffer overflow */
382 int cp_wcstombs( const union cptable
*table
, int flags
,
383 const WCHAR
*src
, int srclen
,
384 char *dst
, int dstlen
, const char *defchar
, int *used
)
386 if (table
->info
.char_size
== 1)
388 if (!dstlen
) return get_length_sbcs( &table
->sbcs
, flags
, src
, srclen
);
389 if (flags
|| defchar
|| used
)
390 return wcstombs_sbcs_slow( &table
->sbcs
, flags
, src
, srclen
,
391 dst
, dstlen
, defchar
, used
);
392 return wcstombs_sbcs( &table
->sbcs
, src
, srclen
, dst
, dstlen
);
396 if (!dstlen
) return get_length_dbcs( &table
->dbcs
, flags
, src
, srclen
, defchar
);
397 if (flags
|| defchar
|| used
)
398 return wcstombs_dbcs_slow( &table
->dbcs
, flags
, src
, srclen
,
399 dst
, dstlen
, defchar
, used
);
400 return wcstombs_dbcs( &table
->dbcs
, src
, srclen
, dst
, dstlen
);