1 /* libasf - An Advanced Systems Format media file parser
2 * Copyright (C) 2006-2010 Juho Vähä-Herttua
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 * Decode UTF-16LE text from buffer of buflen size and
28 * allocate a new buffer containing the same string
29 * encoded as UTF-8. Supports characters outside of BMP
30 * encoded as an UTF-16 surrogate pair. Returns NULL in
31 * case of allocation failure or invalid surrogate pair.
35 asf_utf8_from_utf16le(uint8_t *buf
, uint16_t buflen
)
42 for (i
=0; i
<buflen
/2; i
++) {
43 uint16_t wchar1
, wchar2
;
45 wchar1
= buf
[i
*2] | (buf
[i
*2+1] << 8);
46 if (wchar1
>= 0xD800 && wchar1
< 0xDC00) {
50 /* unexpected end of buffer */
53 wchar2
= buf
[i
*2] | (buf
[i
*2+1] << 8);
54 if (wchar2
< 0xDB00 || wchar2
> 0xDFFF) {
55 /* invalid surrogate pair */
59 } else if (wchar1
> 0x07FF) {
61 } else if (wchar1
> 0x7F) {
68 ret
= malloc(length
+ 1);
74 for (i
=0; i
<buflen
/2; i
++) {
75 uint16_t wchar1
, wchar2
;
78 wchar1
= buf
[i
*2] | (buf
[i
*2+1] << 8);
79 if (wchar1
>= 0xD800 && wchar1
< 0xDC00) {
81 wchar2
= buf
[i
*2] | (buf
[i
*2+1] << 8);
83 codepoint
+= ((wchar1
& 0x03FF) << 10);
84 codepoint
|= (wchar2
& 0x03FF);
89 if (codepoint
> 0xFFFF) {
90 ret
[pos
++] = 0xF0 | ((codepoint
>> 18) & 0x07);
91 ret
[pos
++] = 0x80 | ((codepoint
>> 12) & 0x3F);
92 ret
[pos
++] = 0x80 | ((codepoint
>> 6) & 0x3F);
93 ret
[pos
++] = 0x80 | (codepoint
& 0x3F);
94 } else if (codepoint
> 0x07FF) {
95 ret
[pos
++] = 0xE0 | (codepoint
>> 12);
96 ret
[pos
++] = 0x80 | ((codepoint
>> 6) & 0x3F);
97 ret
[pos
++] = 0x80 | (codepoint
& 0x3F);
98 } else if (codepoint
> 0x7F) {
99 ret
[pos
++] = 0xC0 | (codepoint
>> 6);
100 ret
[pos
++] = 0x80 | (codepoint
& 0x3F);
102 ret
[pos
++] = codepoint
;