headers/bsd: Add sys/queue.h.
[haiku.git] / src / system / kernel / convertutf.cpp
blob897e580deeaa6b02cfe8bc05e4723783455f9bab
1 /*
2 * Copyright 2014 Jonathan Schleifer <js@webkeks.org>
3 * Copyright 2014 Haiku, Inc. All rights reserved.
5 * Distributed under the terms of the MIT License.
7 * Authors:
8 * Jonathan Schleifer, js@webkeks.org
9 * John Scipione, jscipione@gmail.com
13 #include "convertutf.h"
16 #include <ByteOrder.h>
17 #include <Errors.h>
18 #include <StorageDefs.h>
21 static inline size_t
22 glyph_length(uint32 glyph)
24 if (glyph < 0x80)
25 return 1;
26 else if (glyph < 0x800)
27 return 2;
28 else if (glyph < 0x10000)
29 return 3;
30 else if (glyph < 0x110000)
31 return 4;
33 return 0;
37 void
38 encode_glyph(uint32 glyph, size_t glyphLength, char* buffer)
40 if (glyphLength == 1) {
41 *buffer = glyph;
42 } else if (glyphLength == 2) {
43 *buffer++ = 0xC0 | (glyph >> 6);
44 *buffer = 0x80 | (glyph & 0x3F);
45 } else if (glyphLength == 3) {
46 *buffer++ = 0xE0 | (glyph >> 12);
47 *buffer++ = 0x80 | (glyph >> 6 & 0x3F);
48 *buffer = 0x80 | (glyph & 0x3F);
49 } else if (glyphLength == 4) {
50 *buffer++ = 0xF0 | (glyph >> 18);
51 *buffer++ = 0x80 | (glyph >> 12 & 0x3F);
52 *buffer++ = 0x80 | (glyph >> 6 & 0x3F);
53 *buffer = 0x80 | (glyph & 0x3F);
58 static ssize_t
59 utf16_to_utf8(const uint16* source, size_t sourceCodeUnitCount, char* target,
60 size_t targetLength, bool isLittleEndian)
62 if (source == NULL || sourceCodeUnitCount == 0
63 || target == NULL || targetLength == 0) {
64 return B_BAD_VALUE;
67 ssize_t outLength = 0;
69 for (size_t i = 0; i < sourceCodeUnitCount; i++) {
70 uint32 glyph = isLittleEndian
71 ? B_LENDIAN_TO_HOST_INT32(source[i])
72 : B_BENDIAN_TO_HOST_INT32(source[i]);
74 if ((glyph & 0xFC00) == 0xDC00) {
75 // missing high surrogate
76 return B_BAD_VALUE;
79 if ((glyph & 0xFC00) == 0xD800) {
80 if (sourceCodeUnitCount <= i + 1) {
81 // high surrogate at end of string
82 return B_BAD_VALUE;
85 uint32 low = isLittleEndian
86 ? B_LENDIAN_TO_HOST_INT32(source[i + 1])
87 : B_BENDIAN_TO_HOST_INT32(source[i + 1]);
88 if ((low & 0xFC00) != 0xDC00) {
89 // missing low surrogate
90 return B_BAD_VALUE;
93 glyph = (((glyph & 0x3FF) << 10) | (low & 0x3FF)) + 0x10000;
94 i++;
97 size_t glyphLength = glyph_length(glyph);
98 if (glyphLength == 0)
99 return B_BAD_VALUE;
100 else if (outLength + glyphLength >= targetLength
101 || outLength + glyphLength >= B_FILE_NAME_LENGTH) {
102 // NUL terminate the string so the caller can use the
103 // abbreviated version in this case. Since the length
104 // isn't returned the caller will need to call strlen()
105 // to get the length of the string.
106 target[outLength] = '\0';
107 return B_NAME_TOO_LONG;
110 encode_glyph(glyph, glyphLength, target + outLength);
111 outLength += glyphLength;
114 target[outLength] = '\0';
116 return outLength;
120 ssize_t
121 utf16le_to_utf8(const uint16* source, size_t sourceCodeUnitCount,
122 char* target, size_t targetLength)
124 return utf16_to_utf8(source, sourceCodeUnitCount, target, targetLength,
125 true);
129 ssize_t
130 utf16be_to_utf8(const uint16* source, size_t sourceCodeUnitCount,
131 char* target, size_t targetLength)
133 return utf16_to_utf8(source, sourceCodeUnitCount, target, targetLength,
134 false);