2 .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
3 .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
4 .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
5 .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
6 .TH UCONV_U16TOU32 3C "Sep 18, 2007"
8 uconv_u16tou32, uconv_u16tou8, uconv_u32tou16, uconv_u32tou8, uconv_u8tou16,
9 uconv_u8tou32 \- Unicode encoding conversion functions
13 #include <sys/types.h>
14 #include <sys/errno.h>
15 #include <sys/u8_textprep.h>
17 \fBint\fR \fBuconv_u16tou32\fR(const \fBuint16_t *\fR\fIutf16str\fR, \fBsize_t *\fR\fIutf16len\fR,
18 \fBuint32_t *\fR\fIutf32str\fR, \fBsize_t *\fR\fIutf32len\fR, \fBint\fR \fIflag\fR);
23 \fBint\fR \fBuconv_u16tou8\fR(const \fBuint16_t *\fR\fIutf16str\fR, \fBsize_t *\fR\fIutf16len\fR,
24 \fBuchar_t *\fR\fIutf8str\fR, \fBsize_t *\fR\fIutf8len\fR, \fBint\fR \fIflag\fR);
29 \fBint\fR \fBuconv_u32tou16\fR(const \fBuint32_t *\fR\fIutf32str\fR, \fBsize_t *\fR\fIutf32len\fR,
30 \fBuint16_t *\fR\fIutf16str\fR, \fBsize_t *\fR\fIutf16len\fR, \fBint\fR \fIflag\fR);
35 \fBint\fR \fBuconv_u32tou8\fR(const \fBuint32_t *\fR\fIutf32str\fR, \fBsize_t *\fR\fIutf32len\fR,
36 \fBuchar_t *\fR\fIutf8str\fR, \fBsize_t *\fR\fIutf8len\fR, \fBint\fR \fIflag\fR);
41 \fBint\fR \fBuconv_u8tou16\fR(const \fBuchar_t *\fR\fIutf8str\fR, \fBsize_t *\fR\fIutf8len\fR,
42 \fBuint16_t *\fR\fIutf16str\fR, \fBsize_t *\fR\fIutf16len\fR, \fBint\fR \fIflag\fR);
47 \fBint\fR \fBuconv_u8tou32\fR(const \fBuchar_t *\fR\fIutf8str\fR, \fBsize_t *\fR\fIutf8len\fR,
48 \fBuint32_t *\fR\fIutf32str\fR, \fBsize_t *\fR\fIutf32len\fR, \fBint\fR \fIflag\fR);
58 A pointer to a \fBUTF-16\fR character string.
67 As an input parameter, the number of 16-bit unsigned integers in \fIutf16str\fR
68 as \fBUTF-16\fR characters to be converted or saved.
70 As an output parameter, the number of 16-bit unsigned integers in
71 \fIutf16str\fR consumed or saved during conversion.
80 A pointer to a \fBUTF-32\fR character string.
89 As an input parameter, the number of 32-bit unsigned integers in \fIutf32str\fR
90 as \fBUTF-32\fR characters to be converted or saved.
92 As an output parameter, the number of 32-bit unsigned integers in
93 \fIutf32str\fR consumed or saved during conversion.
102 A pointer to a \fBUTF-8\fR character string.
111 As an input parameter, the number of bytes in \fIutf8str\fR as \fBUTF-8\fR
112 characters to be converted or saved.
114 As an output parameter, the number of bytes in \fIutf8str\fR consumed or saved
124 The possible conversion options that are constructed by a bitwise-inclusive-OR
125 of the following values:
129 \fB\fBUCONV_IN_BIG_ENDIAN\fR\fR
133 The input parameter is in big endian byte ordering.
139 \fB\fBUCONV_OUT_BIG_ENDIAN\fR\fR
143 The output parameter should be in big endian byte ordering.
149 \fB\fBUCONV_IN_SYSTEM_ENDIAN\fR\fR
153 The input parameter is in the default byte ordering of the current system.
159 \fB\fBUCONV_OUT_SYSTEM_ENDIAN\fR\fR
163 The output parameter should be in the default byte ordering of the current
170 \fB\fBUCONV_IN_LITTLE_ENDIAN\fR\fR
174 The input parameter is in little endian byte ordering.
180 \fB\fBUCONV_OUT_LITTLE_ENDIAN\fR\fR
184 The output parameter should be in little endian byte ordering.
190 \fB\fBUCONV_IGNORE_NULL\fR\fR
194 The null or \fBU+0000\fR character should not stop the conversion.
200 \fB\fBUCONV_IN_ACCEPT_BOM\fR\fR
204 If the Byte Order Mark (\fBBOM\fR, \fBU+FEFF\fR) character exists as the first
205 character of the input parameter, interpret it as the \fBBOM\fR character.
211 \fB\fBUCONV_OUT_EMIT_BOM\fR\fR
215 Start the output parameter with Byte Order Mark (\fBBOM\fR, \fBU+FEFF\fR)
216 character to indicate the byte ordering if the output parameter is in
217 \fBUTF-16\fR or \fBUTF-32\fR.
225 The \fBuconv_u16tou32()\fR function reads the given \fIutf16str\fR in
226 \fBUTF-16\fR until \fBU+0000\fR (zero) in \fIutf16str\fR is encountered as a
227 character or until the number of 16-bit unsigned integers specified in
228 \fIutf16len\fR is read. The \fBUTF-16\fR characters that are read are converted
229 into \fBUTF-32\fR and the result is saved at \fIutf32str\fR. After the
230 successful conversion, \fIutf32len\fR contains the number of 32-bit unsigned
231 integers saved at \fIutf32str\fR as \fBUTF-32\fR characters.
234 The \fBuconv_u16tou8()\fR function reads the given \fIutf16str\fR in
235 \fBUTF-16\fR until \fBU+0000\fR (zero) in \fIutf16str\fR is encountered as a
236 character or until the number of 16-bit unsigned integers specified in
237 \fIutf16len\fR is read. The \fBUTF-16\fR characters that are read are converted
238 into \fBUTF-8\fR and the result is saved at \fIutf8str\fR. After the successful
239 conversion, \fIutf8len\fR contains the number of bytes saved at \fIutf8str\fR
240 as \fBUTF-8\fR characters.
243 The \fBuconv_u32tou16()\fR function reads the given \fIutf32str\fR in
244 \fBUTF-32\fR until \fBU+0000\fR (zero) in \fIutf32str\fR is encountered as a
245 character or until the number of 32-bit unsigned integers specified in
246 \fIutf32len\fR is read. The \fBUTF-32\fR characters that are read are converted
247 into \fBUTF-16\fR and the result is saved at \fIutf16str\fR. After the
248 successful conversion, \fIutf16len\fR contains the number of 16-bit unsigned
249 integers saved at \fIutf16str\fR as \fBUTF-16\fR characters.
252 The \fBuconv_u32tou8()\fR function reads the given \fIutf32str\fR in
253 \fBUTF-32\fR until \fBU+0000\fR (zero) in \fIutf32str\fR is encountered as a
254 character or until the number of 32-bit unsigned integers specified in
255 \fIutf32len\fR is read. The \fBUTF-32\fR characters that are read are converted
256 into \fBUTF-8\fR and the result is saved at \fIutf8str\fR. After the successful
257 conversion, \fIutf8len\fR contains the number of bytes saved at \fIutf8str\fR
258 as \fBUTF-8\fR characters.
261 The \fBuconv_u8tou16()\fR function reads the given \fIutf8str\fR in \fBUTF-8\fR
262 until the null ('\fB\e0\fR\&') byte in \fIutf8str\fR is encountered or until
263 the number of bytes specified in \fIutf8len\fR is read. The \fBUTF-8\fR
264 characters that are read are converted into \fBUTF-16\fR and the result is
265 saved at \fIutf16str\fR. After the successful conversion, \fIutf16len\fR
266 contains the number of 16-bit unsigned integers saved at \fIutf16str\fR as
267 \fBUTF-16\fR characters.
270 The \fBuconv_u8tou32()\fR function reads the given \fIutf8str\fR in \fBUTF-8\fR
271 until the null ('\fB\e0\fR\&') byte in \fIutf8str\fR is encountered or until
272 the number of bytes specified in \fIutf8len\fR is read. The \fBUTF-8\fR
273 characters that are read are converted into \fBUTF-32\fR and the result is
274 saved at \fIutf32str\fR. After the successful conversion, \fIutf32len\fR
275 contains the number of 32-bit unsigned integers saved at \fIutf32str\fR as
276 \fBUTF-32\fR characters.
279 During the conversion, the input and the output parameters are treated with
280 byte orderings specified in the \fIflag\fR parameter. When not specified, the
281 default byte ordering of the system is used. The byte ordering \fIflag\fR value
282 that is specified for \fBUTF-8\fR is ignored.
285 When \fBUCONV_IN_ACCEPT_BOM\fR is specified as the \fIflag\fR and the first
286 character of the string pointed to by the input parameter is the \fBBOM\fR
287 character, the value of the \fBBOM\fR character dictates the byte ordering of
288 the subsequent characters in the string pointed to by the input parameter,
289 regardless of the supplied input parameter byte ordering option \fIflag\fR
290 values. If the \fBUCONV_IN_ACCEPT_BOM\fR is not specified, the \fBBOM\fR as the
291 first character is treated as a regular Unicode character: Zero Width No Break
292 Space (\fBZWNBSP\fR) character.
295 When \fBUCONV_IGNORE_NULL\fR is specified, regardless of whether the input
296 parameter contains \fBU+0000\fR or null byte, the conversion continues until
297 the specified number of input parameter elements at \fIutf16len\fR,
298 \fIutf32len\fR, or \fIutf8len\fR are entirely consumed during the conversion.
301 As output parameters, \fIutf16len\fR, \fIutf32len\fR, and \fIutf8len\fR are not
302 changed if conversion fails for any reason.
306 Upon successful conversion, the functions return \fB0\fR. Upon failure, the
307 functions return one of the following \fBerrno\fR values:
314 The conversion detected an illegal or out of bound character value in the input
324 The conversion cannot finish because the size specified in the output parameter
334 The conversion stops due to an incomplete character at the end of the input
344 Conflicting byte-ordering option \fIflag\fR values are detected.
349 \fBExample 1 \fRConvert a \fBUTF-16\fR string in little-endian byte ordering
350 into \fBUTF-8\fR string.
354 #include <sys/types.h>
355 #include <sys/errno.h>
356 #include <sys/u8_textprep.h>
360 uint16_t u16s[MAXNAMELEN + 1];
361 uchar_t u8s[MAXNAMELEN + 1];
362 size_t u16len, u8len;
367 u16len = u8len = MAXNAMELEN;
368 ret = uconv_u16tou8(u16s, &u16len, u8s, &u8len,
369 UCONV_IN_LITTLE_ENDIAN);
371 /* Conversion error occurred. */
381 \fBExample 2 \fRConvert a \fBUTF-32\fR string in big endian byte ordering into
382 little endian \fBUTF-16\fR.
386 #include <sys/types.h>
387 #include <sys/errno.h>
388 #include <sys/u8_textprep.h>
393 * An UTF-32 character can be mapped to an UTF-16 character with
394 * two 16-bit integer entities as a "surrogate pair."
399 size_t u32len, u16len;
403 u32len = u16len = 100;
404 ret = uconv_u32tou16(u32s, &u32len, u16s, &u16len,
405 UCONV_IN_BIG_ENDIAN | UCONV_OUT_LITTLE_ENDIAN);
408 } else if (ret == E2BIG) {
409 /* Use bigger output parameter and try just one more time. */
413 ret = uconv_u32tou16(u32s, &u32len, u16s2, &u16len,
414 UCONV_IN_BIG_ENDIAN | UCONV_OUT_LITTLE_ENDIAN);
419 /* Otherwise, return -1 to indicate an error condition. */
425 \fBExample 3 \fRConvert a \fBUTF-8\fR string into \fBUTF-16\fR in little-endian
429 Convert a \fBUTF-8\fR string into \fBUTF-16\fR in little-endian byte ordering
430 with a Byte Order Mark (BOM) character at the beginning of the output
436 #include <sys/types.h>
437 #include <sys/errno.h>
438 #include <sys/u8_textprep>
442 uchar_t u8s[MAXNAMELEN + 1];
443 uint16_t u16s[MAXNAMELEN + 1];
444 size_t u8len, u16len;
449 u8len = u16len = MAXNAMELEN;
450 ret = uconv_u8tou16(u8s, &u8len, u16s, &u16len,
451 UCONV_IN_LITTLE_ENDIAN | UCONV_EMIT_BOM);
453 /* Conversion error occurred. */
465 See \fBattributes\fR(5) for descriptions of the following attributes:
473 ATTRIBUTE TYPE ATTRIBUTE VALUE
475 Interface Stability Committed
483 \fBattributes\fR(5), \fBuconv_u16tou32\fR(9F)
486 The Unicode Standard (http://www.unicode.org)
490 Each \fBUTF-16\fR or \fBUTF-32\fR character maps to an \fBUTF-8\fR character
491 that might need one to maximum of four bytes.
494 One \fBUTF-32\fR or \fBUTF-8\fR character can yield two 16-bit unsigned
495 integers as a \fBUTF-16\fR character, which is a surrogate pair if the Unicode
496 scalar value is bigger than \fBU+FFFF\fR.
499 Ill-formed \fBUTF-16\fR surrogate pairs are seen as illegal characters during