1 /************************************************************************
3 * voxelands - 3d voxel world sandbox game
4 * Copyright (C) Lisa 'darkrose' Milne 2016 <lisa@ltmnet.com>
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 * See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>
19 * Modified from original Public Domain sources by Jeff Bezanson
20 ************************************************************************/
26 static const uint32_t utf8_offsets
[6] = {
27 0x00000000UL
, 0x00003080UL
, 0x000E2080UL
,
28 0x03C82080UL
, 0xFA082080UL
, 0x82082080UL
31 static const char utf8_trailing_bytes
[256] = {
32 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
33 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
34 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
35 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
36 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
37 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
38 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
39 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
42 /* is c the start of a utf8 sequence? */
43 #define isutf8(c) (((c)&0xC0)!=0x80)
45 /* returns length of next utf-8 sequence */
46 int utf8_seqlen(char* str
)
48 return utf8_trailing_bytes
[(unsigned int)(unsigned char)str
[0]] + 1;
51 /* reads the next utf-8 sequence out of a string, updating an index */
52 uint32_t utf8_nextchar(char* str
, int *i
)
59 ch
+= (unsigned char)str
[(*i
)++];
61 } while (str
[*i
] && !isutf8(str
[*i
]));
63 ch
-= utf8_offsets
[sz
-1];
68 /* get a single utf32 char from a utf8 string */
69 uint32_t utf8_toutf32(char* src
, int size
)
84 strncpy(buff
,src
,size
);
89 ch
= utf8_nextchar(buff
,&i
);
94 /* convert a single utf32 char to a utf8 string */
95 int utf8_fromutf32(char *dest
, int sz
, uint32_t ch
)
98 char *dest_end
= dest
+ sz
;
101 if (dest
>= dest_end
)
104 }else if (ch
< 0x800) {
105 if (dest
>= dest_end
-1)
107 *dest
++ = (ch
>>6) | 0xC0;
108 *dest
++ = (ch
& 0x3F) | 0x80;
109 }else if (ch
< 0x10000) {
110 if (dest
>= dest_end
-2)
112 *dest
++ = (ch
>>12) | 0xE0;
113 *dest
++ = ((ch
>>6) & 0x3F) | 0x80;
114 *dest
++ = (ch
& 0x3F) | 0x80;
115 }else if (ch
< 0x110000) {
116 if (dest
>= dest_end
-3)
118 *dest
++ = (ch
>>18) | 0xF0;
119 *dest
++ = ((ch
>>12) & 0x3F) | 0x80;
120 *dest
++ = ((ch
>>6) & 0x3F) | 0x80;
121 *dest
++ = (ch
& 0x3F) | 0x80;
128 /* needed because windows - get a single utf32 char from utf16 */
129 uint32_t utf16_toutf32(uint16_t *str
)
141 if ((s0
-0xD800) >- 2048) {
143 }else if (s1
&& (s0
&0xFFFFFC00) == 0xD800 && (s1
&0xFFFFFC00) == 0xDC00) {
144 ch
= (s0
<< 10) + s1
- 0x35fdc00;
150 /* char index to byte offset */
151 int utf8_offset(char* str
, int i
)
155 while (i
> 0 && str
[offset
]) {
156 (void)(isutf8(str
[++offset
]) || isutf8(str
[++offset
]) || isutf8(str
[++offset
]) || ++offset
);
163 /* byte offset to charindex */
164 int utf8_charindex(char* str
, int o
)
169 while (offset
< o
&& str
[offset
]) {
170 (void)(isutf8(str
[++offset
]) || isutf8(str
[++offset
]) || isutf8(str
[++offset
]) || ++offset
);
177 /* number of characters */
178 int utf8_strlen(char* str
)
183 while (utf8_nextchar(str
, &i
) != 0) {
190 /* increment i by one character index */
191 void utf8_inc(char* str
, int *i
)
193 (void)(isutf8(str
[++(*i
)]) || isutf8(str
[++(*i
)]) || isutf8(str
[++(*i
)]) || ++(*i
));
196 /* decrement i by one character index */
197 void utf8_dec(char* str
, int *i
)
199 (void)(isutf8(str
[--(*i
)]) || isutf8(str
[--(*i
)]) || isutf8(str
[--(*i
)]) || --(*i
));
202 /* strchr() for utf8 */
203 char* utf8_strchr(char* str
, uint32_t ch
, int *charn
)
213 c
= utf8_nextchar(str
, &i
);
224 /* memchr() for utf8 */
225 char* utf8_memchr(char* str
, uint32_t ch
, size_t sz
, int *charn
)
239 c
+= (unsigned char)str
[i
++];
241 } while (i
< sz
&& !isutf8(str
[i
]));
243 c
-= utf8_offsets
[csz
-1];