Use =default for skeleton copy constructor
[ACE_TAO.git] / TAO / tao / Codeset / UTF16_BOM_Translator.cpp
blob4df509712c88c1b307c9c5f580e1e7e3b06b8b7b
1 // -*- C++ -*-
2 // ============================================================================
3 // Manages the transformation between native and transmitted UTF-16. It is
4 // Required because transmitted UTF-16 may carry a byte order marker (BOM)
5 // that is not part of the data contents. If no BOM is present, then the
6 // serialized UTF-16 data is big-endian, regardless of the byte order of
7 // the containing encapsulation.
8 //
9 // AUTHOR
10 // Phil Mesnier <mesnier_p@ociweb.com>
12 // ============================================================================
14 #include "tao/Codeset/UTF16_BOM_Translator.h"
15 #include "ace/OS_Memory.h"
16 #include "tao/debug.h"
17 #include "ace/Log_Msg.h"
19 // ****************************************************************
21 typedef ACE_CDR::UShort ACE_UTF16_T;
22 static constexpr size_t ACE_UTF16_CODEPOINT_SIZE = sizeof (ACE_UTF16_T);
23 static constexpr ACE_CDR::ULong ACE_UL_UTF16_CODEPOINT_SIZE = static_cast<ACE_CDR::ULong>(ACE_UTF16_CODEPOINT_SIZE);
24 static constexpr unsigned short ACE_UNICODE_BOM_CORRECT = 0xFEFFU;
25 static constexpr unsigned short ACE_UNICODE_BOM_SWAPPED = 0xFFFEU;
27 TAO_BEGIN_VERSIONED_NAMESPACE_DECL
29 /////////////////////////////
30 // TAO_UTF16_BOM_Translator implementation
32 TAO_UTF16_BOM_Translator::TAO_UTF16_BOM_Translator (bool forceBE)
33 : forceBE_(forceBE)
35 if (TAO_debug_level > 1)
36 TAOLIB_DEBUG((LM_DEBUG,
37 ACE_TEXT ("TAO (%P|%t) - UTF16_BOM_Translator: ")
38 ACE_TEXT("forceBE %d\n"), this->forceBE_ ? 1:0 ));
41 // = Documented in $ACE_ROOT/ace/CDR_Stream.h
42 ACE_CDR::Boolean
43 TAO_UTF16_BOM_Translator::read_wchar (ACE_InputCDR &cdr, ACE_CDR::WChar &x)
45 if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1 &&
46 static_cast<ACE_CDR::Short> (this->minor_version (cdr)) == 2)
48 ACE_CDR::Octet len;
49 if (! this->read_1 (cdr, &len))
50 return 0;
52 if (len == 2) // no BOM present
54 ACE_CDR::Short sx;
56 if (!this->read_array (cdr,
57 reinterpret_cast<char *> (&sx), 1,1,2))
58 return 0;
60 #if defined (ACE_LITTLE_ENDIAN)
61 ACE_CDR::Short ux;
62 ACE_CDR::swap_2 (reinterpret_cast<const char*> (&sx),
63 reinterpret_cast<char *> (&ux));
64 x = static_cast<ACE_CDR::WChar> (ux);
65 #else
66 x = static_cast<ACE_CDR::WChar> (sx);
67 #endif // ACE_LITTLE_ENDIAN
68 return 1;
71 ACE_UTF16_T buf[2];
72 if (len != 4 || !this->read_array (cdr,
73 reinterpret_cast<char *> (buf),
74 1,1,4)) // get BO & payload
75 return 0;
76 // Check for byte order mark, if found, consume and honor it.
77 if (buf[0] == ACE_UNICODE_BOM_CORRECT ||
78 buf[0] == ACE_UNICODE_BOM_SWAPPED)
80 // if we found it, but it came in in the wrong order
81 // invert the byte order flag for the duration of this method
82 if (buf[0] == ACE_UNICODE_BOM_SWAPPED)
84 ACE_CDR::Short ux;
85 ACE_CDR::swap_2 (reinterpret_cast<const char*> (&buf[1]),
86 reinterpret_cast<char *> (&ux));
87 x = static_cast<ACE_CDR::WChar> (ux);
89 else
90 x = static_cast<ACE_CDR::WChar> (buf[1]);
91 return 1;
93 // What do we do here? The length is > 2 but the first word
94 // is not a BOM. Just return an error I suppose
95 return 0;
98 ACE_UTF16_T sx;
99 if (this->read_2 (cdr, &sx))
101 x = static_cast<ACE_CDR::WChar> (sx);
102 return 1;
104 return 0;
107 ACE_CDR::Boolean
108 TAO_UTF16_BOM_Translator::read_wstring (ACE_InputCDR &cdr,
109 ACE_CDR::WChar *&x)
111 ACE_CDR::ULong len;
112 if (!this->read_4 (cdr, &len))
113 return 0;
115 // A check for the length being too great is done later in the
116 // call to read_char_array but we want to have it done before
117 // the memory is allocated.
118 if (len > 0 && len <= cdr.length ())
120 if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
121 && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
123 len /= ACE_UTF16_CODEPOINT_SIZE;
125 //allocating one extra for the null character needed by applications
126 ACE_NEW_RETURN (x,
127 ACE_CDR::WChar [len + 1],
130 x[len] = L'\x00';
131 if (this->read_wchar_array_i (cdr, x, len, 1))
133 // Since reading the array may have adjusted the length,
134 // we simply rewrite the null terminator
135 x[len] = L'\x00';
136 return 1;
139 else
141 ACE_NEW_RETURN (x,
142 ACE_CDR::WChar [len],
144 if (this->read_wchar_array (cdr, x, len))
145 return 1;
147 delete [] x;
149 else if (len == 0)
151 // Convert any null strings to empty strings since empty
152 // strings can cause crashes. (See bug 58.)
153 ACE_NEW_RETURN (x,
154 ACE_CDR::WChar[1],
156 x[0] = '\x00';
157 return 1;
159 x = 0;
160 return 0;
163 #if !defined(ACE_LACKS_STD_WSTRING)
164 ACE_CDR::Boolean
165 TAO_UTF16_BOM_Translator::read_wstring (ACE_InputCDR &cdr,
166 std::wstring &x)
168 ACE_CDR::ULong len;
169 if (!this->read_4 (cdr, &len))
170 return false;
172 // A check for the length being too great is done later in the
173 // call to read_char_array but we want to have it done before
174 // the memory is allocated.
175 if (len > 0 && len <= cdr.length ())
177 if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
178 && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
180 len /= ACE_UTF16_CODEPOINT_SIZE;
184 x.resize (len);
186 catch (const std::bad_alloc&)
188 return false;
191 if (this->read_wchar_array_i (cdr, &x[0], len, 1))
193 // Since reading the array may have adjusted the length,
194 // shrink to fit
195 x.resize (len);
196 return true;
199 else
203 x.resize (len);
205 catch (const std::bad_alloc&)
207 return false;
210 if (this->read_wchar_array (cdr, &x[0], len))
212 x.resize (len-1); // drop terminating zero wchar read from stream
213 return true;
217 else if (len == 0)
219 x.clear ();
220 return true;
222 x.clear ();
223 return false;
225 #endif
227 ACE_CDR::Boolean
228 TAO_UTF16_BOM_Translator::read_wchar_array_i (ACE_InputCDR & cdr,
229 ACE_CDR::WChar *x,
230 ACE_CDR::ULong &length,
231 int adjust_len)
233 int has_bom = 0;
234 int must_swap = 0;
235 char* buf;
236 static const size_t align = ACE_CDR::SHORT_ALIGN;
237 if (cdr.adjust (ACE_UTF16_CODEPOINT_SIZE * length, align, buf) == 0)
239 // check for byte order mark. If found, honor it then discard it
240 ACE_UTF16_T *sb = reinterpret_cast<ACE_UTF16_T *> (buf);
241 if (*sb == ACE_UNICODE_BOM_CORRECT || *sb == ACE_UNICODE_BOM_SWAPPED)
243 must_swap = (*sb == ACE_UNICODE_BOM_SWAPPED);
244 has_bom = 1;
246 else
248 #if defined (ACE_LITTLE_ENDIAN)
249 must_swap = 1;
250 #endif // ACE_LITTLE_ENDIAN
253 if (has_bom)
255 buf += ACE_UTF16_CODEPOINT_SIZE;
256 ++sb;
258 if (adjust_len)
259 length -= 1;
262 for (size_t i = 0; i < length; ++i)
263 #if defined (ACE_DISABLE_SWAP_ON_READ)
264 x[i] = static_cast<ACE_CDR::WChar> (sb[i]);
265 #else
266 if (!must_swap)
268 x[i] = static_cast<ACE_CDR::WChar> (sb[i]);
270 else
272 ACE_CDR::UShort sx;
273 ACE_CDR::swap_2 (&buf[i*2], reinterpret_cast<char *> (&sx));
274 x[i] = static_cast<ACE_CDR::WChar> (sx);
276 #endif /* ACE_DISABLE_SWAP_ON_READ */
278 if (has_bom && !adjust_len)
280 cdr.adjust (ACE_UTF16_CODEPOINT_SIZE, align, buf);
282 return 1;
284 return 0;
287 ACE_CDR::Boolean
288 TAO_UTF16_BOM_Translator::read_wchar_array (ACE_InputCDR & cdr,
289 ACE_CDR::WChar *x,
290 ACE_CDR::ULong length)
292 if (length == 0)
293 return 1;
295 if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
296 && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
298 for (size_t i = 0; i < length; ++i)
299 if (!this->read_wchar (cdr, x[i]))
300 return 0;
302 return 1;
304 else
305 return this->read_wchar_array_i (cdr, x, length);
308 ACE_CDR::Boolean
309 TAO_UTF16_BOM_Translator::write_wchar (ACE_OutputCDR &cdr,
310 ACE_CDR::WChar x)
312 return this->write_wchar_i (cdr, x, true);
315 ACE_CDR::Boolean
316 TAO_UTF16_BOM_Translator::write_wchar_i (ACE_OutputCDR &cdr,
317 ACE_CDR::WChar x,
318 bool allow_BOM)
320 if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
321 && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
323 int len = 0;
324 ACE_CDR::UShort buffer[2];
326 if( allow_BOM && cdr.byte_order())
328 len = 2;
329 #if defined (ACE_LITTLE_ENDIAN)
330 if (this->forceBE_)
332 // force both the byte order mark and the data to Big Endian order
333 buffer[0] = ACE_UNICODE_BOM_SWAPPED;
334 ACE_CDR::swap_2 (reinterpret_cast<const char *> (&x),
335 reinterpret_cast<char *> (&buffer[1]));
337 else
338 #endif
340 // store both the byte order mark and the data in native order
341 buffer[0] = ACE_UNICODE_BOM_CORRECT;
342 buffer[1] = static_cast<ACE_CDR::Short> (x);
345 else
347 // not using a byte order mark
348 // force it to be big endian w/o BOM
349 len = 1;
350 if (cdr.byte_order ())
351 ACE_CDR::swap_2 (reinterpret_cast<const char *> (&x),
352 reinterpret_cast<char *> (buffer));
353 else
354 buffer[0] = static_cast<ACE_CDR::Short> (x);
357 unsigned char tcsize =
358 static_cast<unsigned char> (len * ACE_UTF16_CODEPOINT_SIZE);
360 if (this->write_1 (cdr, &tcsize))
361 return this->write_array(cdr, &buffer, tcsize, 1, 1);
362 else
363 return 0;
365 else if (static_cast<ACE_CDR::Short> (this->minor_version (cdr)) != 0)
367 // GIOP 1.1 simple support
368 ACE_UTF16_T sx = static_cast<ACE_UTF16_T> (x);
369 return this->write_2 (cdr, &sx);
371 else
372 { // wchar is not allowed with GIOP 1.0.
373 errno = EINVAL;
374 return 0;
378 ACE_CDR::Boolean
379 TAO_UTF16_BOM_Translator::write_wstring (ACE_OutputCDR & cdr,
380 ACE_CDR::ULong len,
381 const ACE_CDR::WChar *x)
383 // we'll accept a null pointer but only for an empty string
384 ACE_ASSERT ((x != 0 || len == 0) &&
385 len < (ACE_UINT32_MAX - 1) / ACE_UL_UTF16_CODEPOINT_SIZE);
386 if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
387 && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
389 if (len == 0) // for zero length strings, only write a length of
390 // zero. The BOM is not needed in this case.
391 return this->write_4(cdr, &len);
393 if (this->forceBE_ && cdr.byte_order())
395 ACE_CDR::ULong l = (len+1) * ACE_UL_UTF16_CODEPOINT_SIZE;
396 if (this->write_4 (cdr, &l) &&
397 this->write_2 (cdr, &ACE_UNICODE_BOM_SWAPPED) &&
398 x != 0)
399 return this->write_swapped_wchar_array_i (cdr, x, len);
401 else
403 ACE_CDR::ULong l = (len+1) * ACE_UL_UTF16_CODEPOINT_SIZE;
404 if (this->write_4 (cdr, &l) &&
405 this->write_2 (cdr, &ACE_UNICODE_BOM_CORRECT) &&
406 x != 0)
407 return this->write_wchar_array_i (cdr, x, len);
410 else
412 // pre GIOP 1.2: include null terminator in length
413 ACE_CDR::ULong l = len + 1;
415 if (this->write_4 (cdr, &l))
417 if (x != 0)
419 return this->write_wchar_array_i (cdr, x, len + 1);
421 else
423 ACE_UTF16_T s = 0;
424 return this->write_2 (cdr,&s);
429 return 0;
432 ACE_CDR::Boolean
433 TAO_UTF16_BOM_Translator::write_wchar_array (ACE_OutputCDR & cdr,
434 const ACE_CDR::WChar *x,
435 ACE_CDR::ULong length)
437 if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
438 && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
440 for (size_t i = 0; i < length; ++i)
441 if (this->write_wchar_i (cdr, x[i], false) == 0)
442 return 0;
444 return 1;
447 return this->write_wchar_array_i (cdr, x, length);
450 ACE_CDR::Boolean
451 TAO_UTF16_BOM_Translator::write_wchar_array_i (ACE_OutputCDR & cdr,
452 const ACE_CDR::WChar *x,
453 ACE_CDR::ULong length)
455 if (length == 0)
456 return 1;
457 char* buf;
458 static const size_t align = ACE_CDR::SHORT_ALIGN;
459 if (cdr.adjust (ACE_UTF16_CODEPOINT_SIZE * length, align, buf)
460 != 0)
462 return 0;
465 ACE_UTF16_T *sb = reinterpret_cast<ACE_UTF16_T *> (buf);
467 for (size_t i = 0; i < length; ++i)
469 sb[i] = static_cast<ACE_UTF16_T> (x[i]);
471 return 1;
474 ACE_CDR::Boolean
475 TAO_UTF16_BOM_Translator::write_swapped_wchar_array_i (ACE_OutputCDR & cdr,
476 const ACE_CDR::WChar *x,
477 ACE_CDR::ULong length)
479 if (length == 0)
480 return 1;
481 char* buf;
482 static const size_t align = ACE_CDR::SHORT_ALIGN;
483 if (cdr.adjust (ACE_UTF16_CODEPOINT_SIZE * length, align, buf)
484 != 0)
486 return 0;
489 ACE_UTF16_T *sb = reinterpret_cast<ACE_UTF16_T *> (buf);
491 for (size_t i = 0; i < length; ++i)
493 ACE_CDR::swap_2 (reinterpret_cast<const char*> (&x[i]),
494 reinterpret_cast<char *> (&sb[i]));
496 return 1;
499 TAO_END_VERSIONED_NAMESPACE_DECL