SystemCall run(block) can now exit the run if it returns false
[io/quag.git] / libs / basekit / source / UArray_utf.c
blobc3ff8cc5188a5afba490137f87541e817e5dd0fb
1 /*
2 copyright: Steve Dekorte, 2006. All rights reserved.
3 license: See _BSDLicense.txt.
4 */
6 #include "UArray.h"
7 #include "ConvertUTF.h"
8 #include <string.h>
9 #include <stdarg.h>
10 #include <stdio.h>
11 #include <stddef.h>
13 int UArray_MachineIsLittleEndian(void)
15 unsigned int i = 0x1;
16 return ((unsigned char *)(&i))[0] == 1;
19 static int UArray_SizeOfUTFChar(const uint8_t *s)
21 uint8_t c = *s;
23 if (c & 0x80)
25 if((c & 0xE0) == 0xC0) return 2;
26 if((c & 0xF0) == 0xE0) return 3;
27 if((c & 0xF8) == 0xF0) return 4;
28 if((c & 0xFC) == 0xF8) return 5;
29 if((c & 0xFE) == 0xFC) return 6;
30 return -1;
33 return 1;
36 int UArray_maxCharSize(const UArray *self)
38 if (self->encoding == CENCODING_UTF8)
40 int maxCharSize = 1;
41 size_t i = 0;
43 while (i < self->size)
45 int charSize = UArray_SizeOfUTFChar(self->data + i);
46 if (charSize > maxCharSize) maxCharSize = charSize;
47 i += charSize;
50 return maxCharSize;
53 return self->itemSize;
56 int UArray_convertToFixedSizeType(UArray *self)
58 if (self->encoding == CENCODING_UTF8)
60 int maxCharSize = UArray_maxCharSize(self);
62 if(maxCharSize == 1)
64 self->encoding = CENCODING_ASCII;
66 else if(maxCharSize == 2)
68 UArray_convertToUTF16(self);
70 else
72 UArray_convertToUTF32(self);
75 return 1;
78 return 0;
81 int UArray_isMultibyte(const UArray *self)
83 if (self->encoding == CENCODING_UTF8)
85 UARRAY_INTFOREACH(self, i, v, if (ismbchar((int)v)) return 1; );
88 return 0;
91 int UArray_isLegalUTF8(const UArray *self)
93 void *sourceStart = self->data;
94 void *sourceEnd = self->data + self->size * self->itemSize;
96 return isLegalUTF8Sequence(sourceStart, sourceEnd);
99 UArray *UArray_asUTF8(const UArray *self)
101 UArray *out = UArray_new();
102 UArray_setItemType_(out, CTYPE_uint8_t);
103 UArray_setEncoding_(out, CENCODING_UTF8);
104 UArray_setSize_(out, self->size * 4);
107 ConversionResult r = conversionOK;
108 ConversionFlags options = lenientConversion;
109 void *sourceStart = self->data;
110 void *sourceEnd = self->data + self->size * self->itemSize;
111 UTF8 *targetStart = out->data;
112 UTF8 *targetEnd = out->data + out->size * out->itemSize;
113 size_t outSize;
115 switch(self->encoding)
117 case CENCODING_ASCII:
118 UArray_copy_(out, self);
119 break;
120 case CENCODING_UTF8:
121 UArray_copy_(out, self);
122 break;
123 case CENCODING_UTF16:
124 r = ConvertUTF16toUTF8((const UTF16 **)&sourceStart, (const UTF16 *)sourceEnd, &targetStart, targetEnd, options);
125 //outSize = (targetStart - out->data) / out->itemSize;
126 break;
127 case CENCODING_UTF32:
128 r = ConvertUTF32toUTF8((const UTF32 **)&sourceStart, (const UTF32 *)sourceEnd, &targetStart, targetEnd, options);
129 //outSize = (targetStart - out->data) / out->itemSize;
130 break;
131 default:
132 printf("UArray_asUTF8 - unknown source encoding\n");
137 UArray_setSize_(out, strlen((char *)out->data));
139 return out;
142 UArray *UArray_asUTF16(const UArray *self)
144 UArray *out = UArray_new();
145 UArray_setItemType_(out, CTYPE_uint16_t);
146 UArray_setEncoding_(out, CENCODING_UTF16);
147 UArray_setSize_(out, self->size);
150 ConversionResult r = conversionOK;
151 ConversionFlags options = lenientConversion;
152 void *sourceStart = self->data;
153 void *sourceEnd = self->data + self->size * self->itemSize;
154 UTF16 *targetStart = (UTF16 *)out->data;
155 UTF16 *targetEnd = (UTF16 *)(out->data + out->size * out->itemSize);
157 switch(self->encoding)
159 case CENCODING_ASCII:
160 r = ConvertUTF8toUTF16((const UTF8 **)&sourceStart, (const UTF8 *)sourceEnd, &targetStart, targetEnd, options);
161 break;
162 case CENCODING_UTF8:
163 r = ConvertUTF8toUTF16((const UTF8 **)&sourceStart, (const UTF8 *)sourceEnd, &targetStart, targetEnd, options);
164 break;
165 case CENCODING_UTF16:
166 UArray_copy_(out, self);
167 break;
168 case CENCODING_UTF32:
169 r = ConvertUTF32toUTF16((const UTF32 **)&sourceStart, (const UTF32 *)sourceEnd, &targetStart, targetEnd, options);
170 break;
171 default:
172 printf("UArray_asUTF16 - unknown source encoding\n");
176 return out;
179 UArray *UArray_asUTF32(const UArray *self)
181 UArray *out = UArray_new();
182 UArray_setItemType_(out, CTYPE_uint32_t);
183 UArray_setEncoding_(out, CENCODING_UTF32);
184 UArray_setSize_(out, self->size);
187 ConversionResult r = conversionOK;
188 ConversionFlags options = lenientConversion;
189 void *sourceStart = self->data;
190 void *sourceEnd = self->data + self->size * self->itemSize;
191 UTF32 *targetStart = (UTF32 *)out->data;
192 UTF32 *targetEnd = (UTF32 *)(out->data + out->size * out->itemSize);
194 switch(self->encoding)
196 case CENCODING_ASCII:
197 r = ConvertUTF8toUTF32((const UTF8 **)&sourceStart, (const UTF8 *)sourceEnd, &targetStart, targetEnd, options);
198 break;
199 case CENCODING_UTF8:
200 r = ConvertUTF8toUTF32((const UTF8 **)&sourceStart, (const UTF8 *)sourceEnd, &targetStart, targetEnd, options);
201 break;
202 case CENCODING_UTF16:
203 r = ConvertUTF16toUTF32((const UTF16 **)&sourceStart, (const UTF16 *)sourceEnd, &targetStart, targetEnd, options);
204 break;
205 case CENCODING_UTF32:
206 UArray_copy_(out, self);
207 break;
208 default:
209 printf("UArray_asUTF32 - unknown source encoding\n");
213 return out;
216 void UArray_convertToUTF8(UArray *self)
218 UArray *a = UArray_asUTF8(self);
219 UArray_swapWith_(self, a);
220 UArray_free(a);
223 void UArray_convertToUTF16(UArray *self)
225 UArray *a = UArray_asUTF16(self);
226 UArray_swapWith_(self, a);
227 UArray_free(a);
230 void UArray_convertToUTF32(UArray *self)
232 UArray *a = UArray_asUTF32(self);
233 UArray_swapWith_(self, a);
234 UArray_free(a);
238 // ----------------------------------------------------