graphics updates
[voxelands-alt.git] / src / lib / utf8.c
blob37c07585203de3cd4940df66e85859bd9a093d42
1 /************************************************************************
2 * utf8.c
3 * voxelands - 3d voxel world sandbox game
4 * Copyright (C) Lisa 'darkrose' Milne 2016 <lisa@ltmnet.com>
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 * See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>
19 * Modified from original Public Domain sources by Jeff Bezanson
20 ************************************************************************/
22 #include "common.h"
24 #include <string.h>
26 static const uint32_t utf8_offsets[6] = {
27 0x00000000UL, 0x00003080UL, 0x000E2080UL,
28 0x03C82080UL, 0xFA082080UL, 0x82082080UL
31 static const char utf8_trailing_bytes[256] = {
32 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
33 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
34 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
35 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
36 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
37 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
38 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
39 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
42 /* is c the start of a utf8 sequence? */
43 #define isutf8(c) (((c)&0xC0)!=0x80)
45 /* returns length of next utf-8 sequence */
46 int utf8_seqlen(char* str)
48 return utf8_trailing_bytes[(unsigned int)(unsigned char)str[0]] + 1;
51 /* reads the next utf-8 sequence out of a string, updating an index */
52 uint32_t utf8_nextchar(char* str, int *i)
54 uint32_t ch = 0;
55 int sz = 0;
57 do {
58 ch <<= 6;
59 ch += (unsigned char)str[(*i)++];
60 sz++;
61 } while (str[*i] && !isutf8(str[*i]));
63 ch -= utf8_offsets[sz-1];
65 return ch;
68 /* get a single utf32 char from a utf8 string */
69 uint32_t utf8_toutf32(char* src, int size)
71 char buff[6];
72 int i;
73 uint32_t ch = 0;
75 if (!size)
76 return 0;
78 if (size < 0)
79 size = strlen(src);
81 if (size > 5)
82 size = 5;
84 strncpy(buff,src,size);
85 buff[size] = 0;
87 i = 0;
89 ch = utf8_nextchar(buff,&i);
91 return ch;
94 /* convert a single utf32 char to a utf8 string */
95 int utf8_fromutf32(char *dest, int sz, uint32_t ch)
97 int i = 0;
98 char *dest_end = dest + sz;
100 if (ch < 0x80) {
101 if (dest >= dest_end)
102 return i;
103 *dest++ = (char)ch;
104 }else if (ch < 0x800) {
105 if (dest >= dest_end-1)
106 return i;
107 *dest++ = (ch>>6) | 0xC0;
108 *dest++ = (ch & 0x3F) | 0x80;
109 }else if (ch < 0x10000) {
110 if (dest >= dest_end-2)
111 return i;
112 *dest++ = (ch>>12) | 0xE0;
113 *dest++ = ((ch>>6) & 0x3F) | 0x80;
114 *dest++ = (ch & 0x3F) | 0x80;
115 }else if (ch < 0x110000) {
116 if (dest >= dest_end-3)
117 return i;
118 *dest++ = (ch>>18) | 0xF0;
119 *dest++ = ((ch>>12) & 0x3F) | 0x80;
120 *dest++ = ((ch>>6) & 0x3F) | 0x80;
121 *dest++ = (ch & 0x3F) | 0x80;
123 if (dest < dest_end)
124 *dest = 0;
125 return i;
128 /* needed because windows - get a single utf32 char from utf16 */
129 uint32_t utf16_toutf32(uint16_t *str)
131 uint32_t ch = 0;
132 uint16_t s0 = 0;
133 uint16_t s1 = 0;
135 if (!str || !str[0])
136 return 0;
138 s0 = str[0];
139 s1 = str[1];
141 if ((s0-0xD800) >- 2048) {
142 ch = s0;
143 }else if (s1 && (s0&0xFFFFFC00) == 0xD800 && (s1&0xFFFFFC00) == 0xDC00) {
144 ch = (s0 << 10) + s1 - 0x35fdc00;
147 return ch;
150 /* char index to byte offset */
151 int utf8_offset(char* str, int i)
153 int offset = 0;
155 while (i > 0 && str[offset]) {
156 (void)(isutf8(str[++offset]) || isutf8(str[++offset]) || isutf8(str[++offset]) || ++offset);
157 i--;
160 return offset;
163 /* byte offset to charindex */
164 int utf8_charindex(char* str, int o)
166 int i = 0;
167 int offset = 0;
169 while (offset < o && str[offset]) {
170 (void)(isutf8(str[++offset]) || isutf8(str[++offset]) || isutf8(str[++offset]) || ++offset);
171 i++;
174 return i;
177 /* number of characters */
178 int utf8_strlen(char* str)
180 int count = 0;
181 int i = 0;
183 while (utf8_nextchar(str, &i) != 0) {
184 count++;
187 return count;
190 /* increment i by one character index */
191 void utf8_inc(char* str, int *i)
193 (void)(isutf8(str[++(*i)]) || isutf8(str[++(*i)]) || isutf8(str[++(*i)]) || ++(*i));
196 /* decrement i by one character index */
197 void utf8_dec(char* str, int *i)
199 (void)(isutf8(str[--(*i)]) || isutf8(str[--(*i)]) || isutf8(str[--(*i)]) || --(*i));
202 /* strchr() for utf8 */
203 char* utf8_strchr(char* str, uint32_t ch, int *charn)
205 int i = 0;
206 int lasti = 0;
207 uint32_t c;
209 if (charn)
210 *charn = 0;
212 while (str[i]) {
213 c = utf8_nextchar(str, &i);
214 if (c == ch)
215 return &str[lasti];
216 lasti = i;
217 if (charn)
218 (*charn)++;
221 return NULL;
224 /* memchr() for utf8 */
225 char* utf8_memchr(char* str, uint32_t ch, size_t sz, int *charn)
227 int i = 0;
228 int lasti = 0;
229 uint32_t c;
230 int csz;
232 if (charn)
233 *charn = 0;
235 while (i < sz) {
236 c = csz = 0;
237 do {
238 c <<= 6;
239 c += (unsigned char)str[i++];
240 csz++;
241 } while (i < sz && !isutf8(str[i]));
243 c -= utf8_offsets[csz-1];
245 if (c == ch)
246 return &str[lasti];
247 lasti = i;
248 if (charn)
249 (*charn)++;
252 return NULL;