Changes to attempt to silence bcc64x
[ACE_TAO.git] / ACE / ace / UTF32_Encoding_Converter.cpp
blob9312fbe4acb23695e22fb29504d7ef8cb57b2dd0
1 // ======================================================================
2 //
3 // The actual conversion methods are covered by the copyright information
4 // below. It is not the actual code provided by Unicode, Inc. but is an
5 // ACE-ified and only slightly modified version.
6 //
7 // Chad Elliott 4/28/2005
8 //
9 // Copyright 2001-2004 Unicode, Inc.
11 // Limitations on Rights to Redistribute This Code
13 // Unicode, Inc. hereby grants the right to freely use the information
14 // supplied in this file in the creation of products supporting the
15 // Unicode Standard, and to make copies of this file in any form
16 // for internal or external distribution as long as this notice
17 // remains attached.
19 // ======================================================================
21 #include "ace/UTF32_Encoding_Converter.h"
23 #if defined (ACE_USES_WCHAR)
24 #include "ace/OS_NS_stdio.h"
25 #include "ace/OS_Memory.h"
26 #include "ace/Min_Max.h"
28 ACE_BEGIN_VERSIONED_NAMESPACE_DECL
30 static const ACE_UINT32 UNI_MAX_LEGAL_UTF32 = 0x0010FFFF;
32 ACE_UTF32_Encoding_Converter::ACE_UTF32_Encoding_Converter (bool swap)
33 : ACE_UTF16_Encoding_Converter (swap)
37 ACE_UTF32_Encoding_Converter::~ACE_UTF32_Encoding_Converter ()
41 ACE_UTF32_Encoding_Converter::Result
42 ACE_UTF32_Encoding_Converter::to_utf8 (const void* source,
43 size_t source_size,
44 ACE_Byte* target,
45 size_t target_size,
46 bool strict)
48 static const ACE_UINT32 byteMask = 0xBF;
49 static const ACE_UINT32 byteMark = 0x80;
50 static const ACE_UINT32 UNI_SUR_HIGH_START = get_UNI_SUR_HIGH_START ();
51 static const ACE_UINT32 UNI_SUR_LOW_END = get_UNI_SUR_LOW_END ();
52 static const ACE_Byte* firstByteMark = get_first_byte_mark ();
54 Result result = CONVERSION_OK;
55 ACE_Byte* targetEnd = target + target_size;
56 const ACE_UINT32* sourceStart = static_cast<const ACE_UINT32*> (source);
57 const ACE_UINT32* sourceEnd = sourceStart + (source_size / sizeof (ACE_UINT32));
59 while (sourceStart < sourceEnd)
61 ACE_UINT32 nw = *sourceStart++;
62 ACE_UINT32 ch = (this->swap_ ? ACE_SWAP_LONG (nw) : nw);
63 unsigned short bytesToWrite = 0;
65 if (strict)
67 // UTF-16 surrogate values are illegal in UTF-32
68 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
70 result = SOURCE_ILLEGAL;
71 break;
75 // Figure out how many bytes the result will require. Turn any
76 // illegally large ACE_UINT32 things (> Plane 17) into replacement
77 // chars.
78 if (ch < 0x80)
80 bytesToWrite = 1;
82 else if (ch < 0x800)
84 bytesToWrite = 2;
86 else if (ch < 0x10000)
88 bytesToWrite = 3;
90 else if (ch <= UNI_MAX_LEGAL_UTF32)
92 bytesToWrite = 4;
94 else
96 result = SOURCE_ILLEGAL;
97 break;
100 target += bytesToWrite;
101 if (target > targetEnd)
103 result = TARGET_EXHAUSTED;
104 break;
107 // NOTE: everything falls through.
108 switch (bytesToWrite)
110 case 4:
111 *--target = (ACE_Byte)((ch | byteMark) & byteMask);
112 ch >>= 6;
113 case 3:
114 *--target = (ACE_Byte)((ch | byteMark) & byteMask);
115 ch >>= 6;
116 case 2:
117 *--target = (ACE_Byte)((ch | byteMark) & byteMask);
118 ch >>= 6;
119 case 1:
120 *--target = (ACE_Byte) (ch | firstByteMark[bytesToWrite]);
122 target += bytesToWrite;
125 return result;
128 ACE_UTF32_Encoding_Converter::Result
129 ACE_UTF32_Encoding_Converter::from_utf8 (const ACE_Byte* source,
130 size_t source_size,
131 void* target,
132 size_t target_size,
133 bool strict)
135 static const ACE_UINT32 UNI_SUR_HIGH_START = get_UNI_SUR_HIGH_START ();
136 static const ACE_UINT32 UNI_SUR_LOW_END = get_UNI_SUR_LOW_END ();
137 static const ACE_UINT32 UNI_REPLACEMENT_CHAR = get_UNI_REPLACEMENT_CHAR ();
138 static const ACE_Byte* trailingBytesForUTF8 = get_trailing_bytes_for_utf8 ();
139 static const ACE_UINT32* offsetsFromUTF8 = get_offsets_from_utf8 ();
141 Result result = CONVERSION_OK;
142 const ACE_Byte* sourceEnd = source + source_size;
143 ACE_UINT32* targetStart = static_cast<ACE_UINT32*> (target);
144 ACE_UINT32* targetEnd = targetStart + target_size;
146 while (source < sourceEnd)
148 ACE_UINT32 ch = 0;
149 unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
150 if (source + extraBytesToRead >= sourceEnd)
152 result = SOURCE_EXHAUSTED;
153 break;
156 // Do this check whether lenient or strict
157 if (!this->is_legal_utf8 (source, extraBytesToRead + 1))
159 result = SOURCE_ILLEGAL;
160 break;
163 // The cases all fall through. See "Note A" below.
164 switch (extraBytesToRead)
166 case 5:
167 ch += *source++;
168 ch <<= 6;
169 case 4:
170 ch += *source++;
171 ch <<= 6;
172 case 3:
173 ch += *source++;
174 ch <<= 6;
175 case 2:
176 ch += *source++;
177 ch <<= 6;
178 case 1:
179 ch += *source++;
180 ch <<= 6;
181 case 0:
182 ch += *source++;
184 ch -= offsetsFromUTF8[extraBytesToRead];
186 if (targetStart >= targetEnd)
188 result = TARGET_EXHAUSTED;
189 break;
192 if (ch <= UNI_MAX_LEGAL_UTF32)
194 // UTF-16 surrogate values are illegal in UTF-32, and anything
195 // over Plane 17 (> 0x10FFFF) is illegal.
196 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
198 if (strict)
200 result = SOURCE_ILLEGAL;
201 break;
203 else
205 *targetStart++ = UNI_REPLACEMENT_CHAR;
208 else
210 *targetStart++ = ch;
213 else
215 result = SOURCE_ILLEGAL;
216 break;
220 return result;
223 ACE_UTF32_Encoding_Converter*
224 ACE_UTF32_Encoding_Converter::encoded (const ACE_Byte* source,
225 size_t source_size)
227 static const size_t begin = 16;
228 static const size_t converted = begin * 4;
230 ACE_Byte target[converted];
231 ACE_UTF32_Encoding_Converter* converter = 0;
232 ACE_NEW_RETURN (converter,
233 ACE_UTF32_Encoding_Converter (false),
236 if (converter->to_utf8 (source,
237 ACE_MIN (begin, source_size),
238 target,
239 converted) == CONVERSION_OK)
241 return converter;
243 else
245 delete converter;
248 return 0;
251 ACE_END_VERSIONED_NAMESPACE_DECL
252 #endif /* ACE_USES_WCHAR */