2 copyright: Steve Dekorte, 2006. All rights reserved.
3 license: See _BSDLicense.txt.
7 #include "ConvertUTF.h"
13 int UArray_MachineIsLittleEndian(void)
16 return ((unsigned char *)(&i
))[0] == 1;
19 static int UArray_SizeOfUTFChar(const uint8_t *s
)
25 if((c
& 0xE0) == 0xC0) return 2;
26 if((c
& 0xF0) == 0xE0) return 3;
27 if((c
& 0xF8) == 0xF0) return 4;
28 if((c
& 0xFC) == 0xF8) return 5;
29 if((c
& 0xFE) == 0xFC) return 6;
36 int UArray_maxCharSize(const UArray
*self
)
38 if (self
->encoding
== CENCODING_UTF8
)
43 while (i
< self
->size
)
45 int charSize
= UArray_SizeOfUTFChar(self
->data
+ i
);
46 if (charSize
> maxCharSize
) maxCharSize
= charSize
;
53 return self
->itemSize
;
56 int UArray_convertToFixedSizeType(UArray
*self
)
58 if (self
->encoding
== CENCODING_UTF8
)
60 int maxCharSize
= UArray_maxCharSize(self
);
64 self
->encoding
= CENCODING_ASCII
;
66 else if(maxCharSize
== 2)
68 UArray_convertToUTF16(self
);
72 UArray_convertToUTF32(self
);
81 int UArray_isMultibyte(const UArray
*self
)
83 if (self
->encoding
== CENCODING_UTF8
)
85 UARRAY_INTFOREACH(self
, i
, v
, if (ismbchar((int)v
)) return 1; );
91 int UArray_isLegalUTF8(const UArray
*self
)
93 void *sourceStart
= self
->data
;
94 void *sourceEnd
= self
->data
+ self
->size
* self
->itemSize
;
96 return isLegalUTF8Sequence(sourceStart
, sourceEnd
);
99 UArray
*UArray_asUTF8(const UArray
*self
)
101 UArray
*out
= UArray_new();
102 UArray_setItemType_(out
, CTYPE_uint8_t
);
103 UArray_setEncoding_(out
, CENCODING_UTF8
);
104 UArray_setSize_(out
, self
->size
* 4);
107 ConversionResult r
= conversionOK
;
108 ConversionFlags options
= lenientConversion
;
109 void *sourceStart
= self
->data
;
110 void *sourceEnd
= self
->data
+ self
->size
* self
->itemSize
;
111 UTF8
*targetStart
= out
->data
;
112 UTF8
*targetEnd
= out
->data
+ out
->size
* out
->itemSize
;
115 switch(self
->encoding
)
117 case CENCODING_ASCII
:
118 UArray_copy_(out
, self
);
121 UArray_copy_(out
, self
);
123 case CENCODING_UTF16
:
124 r
= ConvertUTF16toUTF8((const UTF16
**)&sourceStart
, (const UTF16
*)sourceEnd
, &targetStart
, targetEnd
, options
);
125 //outSize = (targetStart - out->data) / out->itemSize;
127 case CENCODING_UTF32
:
128 r
= ConvertUTF32toUTF8((const UTF32
**)&sourceStart
, (const UTF32
*)sourceEnd
, &targetStart
, targetEnd
, options
);
129 //outSize = (targetStart - out->data) / out->itemSize;
132 printf("UArray_asUTF8 - unknown source encoding\n");
137 UArray_setSize_(out
, strlen((char *)out
->data
));
142 UArray
*UArray_asUTF16(const UArray
*self
)
144 UArray
*out
= UArray_new();
145 UArray_setItemType_(out
, CTYPE_uint16_t
);
146 UArray_setEncoding_(out
, CENCODING_UTF16
);
147 UArray_setSize_(out
, self
->size
);
150 ConversionResult r
= conversionOK
;
151 ConversionFlags options
= lenientConversion
;
152 void *sourceStart
= self
->data
;
153 void *sourceEnd
= self
->data
+ self
->size
* self
->itemSize
;
154 UTF16
*targetStart
= (UTF16
*)out
->data
;
155 UTF16
*targetEnd
= (UTF16
*)(out
->data
+ out
->size
* out
->itemSize
);
157 switch(self
->encoding
)
159 case CENCODING_ASCII
:
160 r
= ConvertUTF8toUTF16((const UTF8
**)&sourceStart
, (const UTF8
*)sourceEnd
, &targetStart
, targetEnd
, options
);
163 r
= ConvertUTF8toUTF16((const UTF8
**)&sourceStart
, (const UTF8
*)sourceEnd
, &targetStart
, targetEnd
, options
);
165 case CENCODING_UTF16
:
166 UArray_copy_(out
, self
);
168 case CENCODING_UTF32
:
169 r
= ConvertUTF32toUTF16((const UTF32
**)&sourceStart
, (const UTF32
*)sourceEnd
, &targetStart
, targetEnd
, options
);
172 printf("UArray_asUTF16 - unknown source encoding\n");
179 UArray
*UArray_asUTF32(const UArray
*self
)
181 UArray
*out
= UArray_new();
182 UArray_setItemType_(out
, CTYPE_uint32_t
);
183 UArray_setEncoding_(out
, CENCODING_UTF32
);
184 UArray_setSize_(out
, self
->size
);
187 ConversionResult r
= conversionOK
;
188 ConversionFlags options
= lenientConversion
;
189 void *sourceStart
= self
->data
;
190 void *sourceEnd
= self
->data
+ self
->size
* self
->itemSize
;
191 UTF32
*targetStart
= (UTF32
*)out
->data
;
192 UTF32
*targetEnd
= (UTF32
*)(out
->data
+ out
->size
* out
->itemSize
);
194 switch(self
->encoding
)
196 case CENCODING_ASCII
:
197 r
= ConvertUTF8toUTF32((const UTF8
**)&sourceStart
, (const UTF8
*)sourceEnd
, &targetStart
, targetEnd
, options
);
200 r
= ConvertUTF8toUTF32((const UTF8
**)&sourceStart
, (const UTF8
*)sourceEnd
, &targetStart
, targetEnd
, options
);
202 case CENCODING_UTF16
:
203 r
= ConvertUTF16toUTF32((const UTF16
**)&sourceStart
, (const UTF16
*)sourceEnd
, &targetStart
, targetEnd
, options
);
205 case CENCODING_UTF32
:
206 UArray_copy_(out
, self
);
209 printf("UArray_asUTF32 - unknown source encoding\n");
216 void UArray_convertToUTF8(UArray
*self
)
218 UArray
*a
= UArray_asUTF8(self
);
219 UArray_swapWith_(self
, a
);
223 void UArray_convertToUTF16(UArray
*self
)
225 UArray
*a
= UArray_asUTF16(self
);
226 UArray_swapWith_(self
, a
);
230 void UArray_convertToUTF32(UArray
*self
)
232 UArray
*a
= UArray_asUTF32(self
);
233 UArray_swapWith_(self
, a
);
238 // ----------------------------------------------------