Initial Patch of Auction House bot rev. 135
[auctionmangos.git] / dep / ACE_wrappers / ace / UTF32_Encoding_Converter.cpp
blob459bf2530b030c98bc0781bc49b4697a30652881
1 // $Id: UTF32_Encoding_Converter.cpp 80826 2008-03-04 14:51:23Z wotte $
3 // ======================================================================
4 //
5 // The actual conversion methods are covered by the copyright information
6 // below. It is not the actual code provided by Unicode, Inc. but is an
7 // ACE-ified and only slightly modified version.
8 //
9 // Chad Elliott 4/28/2005
11 // Copyright 2001-2004 Unicode, Inc.
13 // Limitations on Rights to Redistribute This Code
15 // Unicode, Inc. hereby grants the right to freely use the information
16 // supplied in this file in the creation of products supporting the
17 // Unicode Standard, and to make copies of this file in any form
18 // for internal or external distribution as long as this notice
19 // remains attached.
21 // ======================================================================
23 #include "ace/UTF32_Encoding_Converter.h"
25 #if defined (ACE_USES_WCHAR)
26 #include "ace/OS_NS_stdio.h"
27 #include "ace/OS_Memory.h"
28 #include "ace/Min_Max.h"
30 ACE_BEGIN_VERSIONED_NAMESPACE_DECL
32 static const ACE_UINT32 UNI_MAX_LEGAL_UTF32 = 0x0010FFFF;
34 ACE_UTF32_Encoding_Converter::ACE_UTF32_Encoding_Converter (bool swap)
35 : ACE_UTF16_Encoding_Converter (swap)
39 ACE_UTF32_Encoding_Converter::~ACE_UTF32_Encoding_Converter (void)
43 ACE_UTF32_Encoding_Converter::Result
44 ACE_UTF32_Encoding_Converter::to_utf8 (const void* source,
45 size_t source_size,
46 ACE_Byte* target,
47 size_t target_size,
48 bool strict)
50 static const ACE_UINT32 byteMask = 0xBF;
51 static const ACE_UINT32 byteMark = 0x80;
52 static const ACE_UINT32 UNI_SUR_HIGH_START = get_UNI_SUR_HIGH_START ();
53 static const ACE_UINT32 UNI_SUR_LOW_END = get_UNI_SUR_LOW_END ();
54 static const ACE_Byte* firstByteMark = get_first_byte_mark ();
56 Result result = CONVERSION_OK;
57 ACE_Byte* targetEnd = target + target_size;
58 const ACE_UINT32* sourceStart = static_cast<const ACE_UINT32*> (source);
59 const ACE_UINT32* sourceEnd = sourceStart + (source_size / sizeof (ACE_UINT32));
61 while (sourceStart < sourceEnd)
63 ACE_UINT32 nw = *sourceStart++;
64 ACE_UINT32 ch = (this->swap_ ? ACE_SWAP_LONG (nw) : nw);
65 unsigned short bytesToWrite = 0;
67 if (strict)
69 // UTF-16 surrogate values are illegal in UTF-32
70 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
72 result = SOURCE_ILLEGAL;
73 break;
77 // Figure out how many bytes the result will require. Turn any
78 // illegally large ACE_UINT32 things (> Plane 17) into replacement
79 // chars.
80 if (ch < 0x80)
82 bytesToWrite = 1;
84 else if (ch < 0x800)
86 bytesToWrite = 2;
88 else if (ch < 0x10000)
90 bytesToWrite = 3;
92 else if (ch <= UNI_MAX_LEGAL_UTF32)
94 bytesToWrite = 4;
96 else
98 result = SOURCE_ILLEGAL;
99 break;
102 target += bytesToWrite;
103 if (target > targetEnd)
105 result = TARGET_EXHAUSTED;
106 break;
109 // NOTE: everything falls through.
110 switch (bytesToWrite)
112 case 4:
113 *--target = (ACE_Byte)((ch | byteMark) & byteMask);
114 ch >>= 6;
115 case 3:
116 *--target = (ACE_Byte)((ch | byteMark) & byteMask);
117 ch >>= 6;
118 case 2:
119 *--target = (ACE_Byte)((ch | byteMark) & byteMask);
120 ch >>= 6;
121 case 1:
122 *--target = (ACE_Byte) (ch | firstByteMark[bytesToWrite]);
124 target += bytesToWrite;
127 return result;
130 ACE_UTF32_Encoding_Converter::Result
131 ACE_UTF32_Encoding_Converter::from_utf8 (const ACE_Byte* source,
132 size_t source_size,
133 void* target,
134 size_t target_size,
135 bool strict)
137 static const ACE_UINT32 UNI_SUR_HIGH_START = get_UNI_SUR_HIGH_START ();
138 static const ACE_UINT32 UNI_SUR_LOW_END = get_UNI_SUR_LOW_END ();
139 static const ACE_UINT32 UNI_REPLACEMENT_CHAR = get_UNI_REPLACEMENT_CHAR ();
140 static const ACE_Byte* trailingBytesForUTF8 = get_trailing_bytes_for_utf8 ();
141 static const ACE_UINT32* offsetsFromUTF8 = get_offsets_from_utf8 ();
143 Result result = CONVERSION_OK;
144 const ACE_Byte* sourceEnd = source + source_size;
145 ACE_UINT32* targetStart = static_cast<ACE_UINT32*> (target);
146 ACE_UINT32* targetEnd = targetStart + target_size;
148 while (source < sourceEnd)
150 ACE_UINT32 ch = 0;
151 unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
152 if (source + extraBytesToRead >= sourceEnd)
154 result = SOURCE_EXHAUSTED;
155 break;
158 // Do this check whether lenient or strict
159 if (!this->is_legal_utf8 (source, extraBytesToRead + 1))
161 result = SOURCE_ILLEGAL;
162 break;
165 // The cases all fall through. See "Note A" below.
166 switch (extraBytesToRead)
168 case 5:
169 ch += *source++;
170 ch <<= 6;
171 case 4:
172 ch += *source++;
173 ch <<= 6;
174 case 3:
175 ch += *source++;
176 ch <<= 6;
177 case 2:
178 ch += *source++;
179 ch <<= 6;
180 case 1:
181 ch += *source++;
182 ch <<= 6;
183 case 0:
184 ch += *source++;
186 ch -= offsetsFromUTF8[extraBytesToRead];
188 if (targetStart >= targetEnd)
190 result = TARGET_EXHAUSTED;
191 break;
194 if (ch <= UNI_MAX_LEGAL_UTF32)
196 // UTF-16 surrogate values are illegal in UTF-32, and anything
197 // over Plane 17 (> 0x10FFFF) is illegal.
198 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
200 if (strict)
202 result = SOURCE_ILLEGAL;
203 break;
205 else
207 *targetStart++ = UNI_REPLACEMENT_CHAR;
210 else
212 *targetStart++ = ch;
215 else
217 result = SOURCE_ILLEGAL;
218 break;
222 return result;
225 ACE_UTF32_Encoding_Converter*
226 ACE_UTF32_Encoding_Converter::encoded (const ACE_Byte* source,
227 size_t source_size)
229 static const size_t begin = 16;
230 static const size_t converted = begin * 4;
232 ACE_Byte target[converted];
233 ACE_UTF32_Encoding_Converter* converter = 0;
234 ACE_NEW_RETURN (converter,
235 ACE_UTF32_Encoding_Converter (false),
238 if (converter->to_utf8 (source,
239 ACE_MIN (begin, source_size),
240 target,
241 converted) == CONVERSION_OK)
243 return converter;
245 else
247 delete converter;
250 return 0;
253 ACE_END_VERSIONED_NAMESPACE_DECL
254 #endif /* ACE_USES_WCHAR */