optimize the interface with python
[liba.git] / src / utf.c
blob63e509d417809ff1e19e9e902b6005d995fbb4a7
1 #include "a/utf.h"
3 unsigned int a_utf_encode(void *_str, a_u32 val)
5 a_u32 mask = 0;
6 unsigned int offset = 0;
7 a_u32 x = val & A_U32_C(0x7FFFFFFF);
8 if (x < A_U32_C(0x0010000))
10 if (x < A_U32_C(0x0000800))
12 if (x < A_U32_C(0x0000080) && x)
14 offset = 1; /* U+0000001 ~ U+0000007F */
16 else
18 offset = 2; /* U+0000080 ~ U+000007FF */
19 mask = 0xC0;
22 else
24 offset = 3; /* U+0000800 ~ U+0000FFFF */
25 mask = 0xE0;
28 else
30 if (x < A_U32_C(0x0200000))
32 offset = 4; /* U+0010000 ~ U+001FFFFF */
33 mask = 0xF0;
35 else
37 if (x < A_U32_C(0x4000000))
39 offset = 5; /* U+0200000 ~ U+03FFFFFF */
40 mask = 0xF8;
42 else
44 offset = 6; /* U+4000000 ~ U+7FFFFFFF */
45 mask = 0xFC;
49 if (_str)
51 a_byte *const str = (a_byte *)_str;
52 switch (offset)
54 case 6:
55 str[5] = (a_byte)(0x80 | (x & 0x3F));
56 x >>= 6;
57 A_FALLTHROUGH;
58 case 5:
59 str[4] = (a_byte)(0x80 | (x & 0x3F));
60 x >>= 6;
61 A_FALLTHROUGH;
62 case 4:
63 str[3] = (a_byte)(0x80 | (x & 0x3F));
64 x >>= 6;
65 A_FALLTHROUGH;
66 case 3:
67 str[2] = (a_byte)(0x80 | (x & 0x3F));
68 x >>= 6;
69 A_FALLTHROUGH;
70 case 2:
71 str[1] = (a_byte)(0x80 | (x & 0x3F));
72 x >>= 6;
73 A_FALLTHROUGH;
74 case 1:
75 str[0] = (a_byte)(mask | x);
76 A_FALLTHROUGH;
77 default:
78 break;
81 return offset;
84 unsigned int a_utf_decode(void const *_str, a_u32 *val)
86 a_byte const *str = (a_byte const *)_str;
87 unsigned int offset = 0;
88 unsigned int chr = *str;
89 a_u32 res = 0;
90 if (chr < 0x80)
92 res = chr;
93 if (!chr) { return offset; }
95 else
97 for (; chr & 0x40; chr <<= 1)
99 unsigned int c = *(++str);
100 if ((c & 0xC0) != 0x80) { return offset; }
101 res = (res << 6) | (c & 0x3F);
103 offset = (unsigned int)(str - (a_byte const *)_str);
104 res |= (a_u32)(chr & 0x7F) << (offset * 5);
106 if (val) { *val = res; }
107 return offset + 1;
110 a_size a_utf_length(void const *_str)
112 a_size length = 0;
113 char const *str = (char const *)_str;
114 unsigned int offset = a_utf_decode(str, A_NULL);
115 for (; offset; offset = a_utf_decode(str, A_NULL))
117 str += offset;
118 ++length;
120 return length;