vfs: check userland buffers before reading them.
[haiku.git] / src / add-ons / media / plugins / asf_reader / libasf / utf.h
blob0072ae2427578c781e1e7cf135547f89d4ad44db
1 /* libasf - An Advanced Systems Format media file parser
2 * Copyright (C) 2006-2010 Juho Vähä-Herttua
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 #ifndef UTF_H
20 #define UTF_H
22 #include <stdlib.h>
24 #include "asfint.h"
26 /**
27 * Decode UTF-16LE text from buffer of buflen size and
28 * allocate a new buffer containing the same string
29 * encoded as UTF-8. Supports characters outside of BMP
30 * encoded as an UTF-16 surrogate pair. Returns NULL in
31 * case of allocation failure or invalid surrogate pair.
32 * Buflen is in bytes.
34 static char *
35 asf_utf8_from_utf16le(uint8_t *buf, uint16_t buflen)
37 uint32_t length, pos;
38 char *ret;
39 int i;
41 length = 0;
42 for (i=0; i<buflen/2; i++) {
43 uint16_t wchar1, wchar2;
45 wchar1 = buf[i*2] | (buf[i*2+1] << 8);
46 if (wchar1 >= 0xD800 && wchar1 < 0xDC00) {
47 i++;
49 if (i*2 >= buflen) {
50 /* unexpected end of buffer */
51 return NULL;
53 wchar2 = buf[i*2] | (buf[i*2+1] << 8);
54 if (wchar2 < 0xDB00 || wchar2 > 0xDFFF) {
55 /* invalid surrogate pair */
56 return NULL;
58 length += 4;
59 } else if (wchar1 > 0x07FF) {
60 length += 3;
61 } else if (wchar1 > 0x7F) {
62 length += 2;
63 } else {
64 length++;
68 ret = malloc(length + 1);
69 if (!ret) {
70 return NULL;
73 pos = 0;
74 for (i=0; i<buflen/2; i++) {
75 uint16_t wchar1, wchar2;
76 uint32_t codepoint;
78 wchar1 = buf[i*2] | (buf[i*2+1] << 8);
79 if (wchar1 >= 0xD800 && wchar1 < 0xDC00) {
80 i++;
81 wchar2 = buf[i*2] | (buf[i*2+1] << 8);
82 codepoint = 0x10000;
83 codepoint += ((wchar1 & 0x03FF) << 10);
84 codepoint |= (wchar2 & 0x03FF);
85 } else {
86 codepoint = wchar1;
89 if (codepoint > 0xFFFF) {
90 ret[pos++] = 0xF0 | ((codepoint >> 18) & 0x07);
91 ret[pos++] = 0x80 | ((codepoint >> 12) & 0x3F);
92 ret[pos++] = 0x80 | ((codepoint >> 6) & 0x3F);
93 ret[pos++] = 0x80 | (codepoint & 0x3F);
94 } else if (codepoint > 0x07FF) {
95 ret[pos++] = 0xE0 | (codepoint >> 12);
96 ret[pos++] = 0x80 | ((codepoint >> 6) & 0x3F);
97 ret[pos++] = 0x80 | (codepoint & 0x3F);
98 } else if (codepoint > 0x7F) {
99 ret[pos++] = 0xC0 | (codepoint >> 6);
100 ret[pos++] = 0x80 | (codepoint & 0x3F);
101 } else {
102 ret[pos++] = codepoint;
106 ret[length] = '\0';
107 return ret;
110 #endif