2 // ============================================================================
3 // Manages the transformation between native and transmitted UTF-16. It is
4 // Required because transmitted UTF-16 may carry a byte order marker (BOM)
5 // that is not part of the data contents. If no BOM is present, then the
6 // serialized UTF-16 data is big-endian, regardless of the byte order of
7 // the containing encapsulation.
10 // Phil Mesnier <mesnier_p@ociweb.com>
12 // ============================================================================
14 #include "tao/Codeset/UTF16_BOM_Translator.h"
15 #include "ace/OS_Memory.h"
16 #include "tao/debug.h"
17 #include "ace/Log_Msg.h"
19 // ****************************************************************
21 typedef ACE_CDR::UShort ACE_UTF16_T
;
22 static constexpr size_t ACE_UTF16_CODEPOINT_SIZE
= sizeof (ACE_UTF16_T
);
23 static constexpr ACE_CDR::ULong ACE_UL_UTF16_CODEPOINT_SIZE
= static_cast<ACE_CDR::ULong
>(ACE_UTF16_CODEPOINT_SIZE
);
24 static constexpr unsigned short ACE_UNICODE_BOM_CORRECT
= 0xFEFFU
;
25 static constexpr unsigned short ACE_UNICODE_BOM_SWAPPED
= 0xFFFEU
;
27 TAO_BEGIN_VERSIONED_NAMESPACE_DECL
29 /////////////////////////////
30 // TAO_UTF16_BOM_Translator implementation
32 TAO_UTF16_BOM_Translator::TAO_UTF16_BOM_Translator (bool forceBE
)
35 if (TAO_debug_level
> 1)
36 TAOLIB_DEBUG((LM_DEBUG
,
37 ACE_TEXT ("TAO (%P|%t) - UTF16_BOM_Translator: ")
38 ACE_TEXT("forceBE %d\n"), this->forceBE_
? 1:0 ));
41 // = Documented in $ACE_ROOT/ace/CDR_Stream.h
43 TAO_UTF16_BOM_Translator::read_wchar (ACE_InputCDR
&cdr
, ACE_CDR::WChar
&x
)
45 if (static_cast<ACE_CDR::Short
> (this->major_version (cdr
)) == 1 &&
46 static_cast<ACE_CDR::Short
> (this->minor_version (cdr
)) == 2)
49 if (! this->read_1 (cdr
, &len
))
52 if (len
== 2) // no BOM present
56 if (!this->read_array (cdr
,
57 reinterpret_cast<char *> (&sx
), 1,1,2))
60 #if defined (ACE_LITTLE_ENDIAN)
62 ACE_CDR::swap_2 (reinterpret_cast<const char*> (&sx
),
63 reinterpret_cast<char *> (&ux
));
64 x
= static_cast<ACE_CDR::WChar
> (ux
);
66 x
= static_cast<ACE_CDR::WChar
> (sx
);
67 #endif // ACE_LITTLE_ENDIAN
72 if (len
!= 4 || !this->read_array (cdr
,
73 reinterpret_cast<char *> (buf
),
74 1,1,4)) // get BO & payload
76 // Check for byte order mark, if found, consume and honor it.
77 if (buf
[0] == ACE_UNICODE_BOM_CORRECT
||
78 buf
[0] == ACE_UNICODE_BOM_SWAPPED
)
80 // if we found it, but it came in in the wrong order
81 // invert the byte order flag for the duration of this method
82 if (buf
[0] == ACE_UNICODE_BOM_SWAPPED
)
85 ACE_CDR::swap_2 (reinterpret_cast<const char*> (&buf
[1]),
86 reinterpret_cast<char *> (&ux
));
87 x
= static_cast<ACE_CDR::WChar
> (ux
);
90 x
= static_cast<ACE_CDR::WChar
> (buf
[1]);
93 // What do we do here? The length is > 2 but the first word
94 // is not a BOM. Just return an error I suppose
99 if (this->read_2 (cdr
, &sx
))
101 x
= static_cast<ACE_CDR::WChar
> (sx
);
108 TAO_UTF16_BOM_Translator::read_wstring (ACE_InputCDR
&cdr
,
112 if (!this->read_4 (cdr
, &len
))
115 // A check for the length being too great is done later in the
116 // call to read_char_array but we want to have it done before
117 // the memory is allocated.
118 if (len
> 0 && len
<= cdr
.length ())
120 if (static_cast<ACE_CDR::Short
> (this->major_version (cdr
)) == 1
121 && static_cast<ACE_CDR::Short
> (this->minor_version (cdr
)) > 1)
123 len
/= ACE_UTF16_CODEPOINT_SIZE
;
125 //allocating one extra for the null character needed by applications
127 ACE_CDR::WChar
[len
+ 1],
131 if (this->read_wchar_array_i (cdr
, x
, len
, 1))
133 // Since reading the array may have adjusted the length,
134 // we simply rewrite the null terminator
142 ACE_CDR::WChar
[len
],
144 if (this->read_wchar_array (cdr
, x
, len
))
151 // Convert any null strings to empty strings since empty
152 // strings can cause crashes. (See bug 58.)
163 #if !defined(ACE_LACKS_STD_WSTRING)
165 TAO_UTF16_BOM_Translator::read_wstring (ACE_InputCDR
&cdr
,
169 if (!this->read_4 (cdr
, &len
))
172 // A check for the length being too great is done later in the
173 // call to read_char_array but we want to have it done before
174 // the memory is allocated.
175 if (len
> 0 && len
<= cdr
.length ())
177 if (static_cast<ACE_CDR::Short
> (this->major_version (cdr
)) == 1
178 && static_cast<ACE_CDR::Short
> (this->minor_version (cdr
)) > 1)
180 len
/= ACE_UTF16_CODEPOINT_SIZE
;
186 catch (const std::bad_alloc
&)
191 if (this->read_wchar_array_i (cdr
, &x
[0], len
, 1))
193 // Since reading the array may have adjusted the length,
205 catch (const std::bad_alloc
&)
210 if (this->read_wchar_array (cdr
, &x
[0], len
))
212 x
.resize (len
-1); // drop terminating zero wchar read from stream
228 TAO_UTF16_BOM_Translator::read_wchar_array_i (ACE_InputCDR
& cdr
,
230 ACE_CDR::ULong
&length
,
236 static const size_t align
= ACE_CDR::SHORT_ALIGN
;
237 if (cdr
.adjust (ACE_UTF16_CODEPOINT_SIZE
* length
, align
, buf
) == 0)
239 // check for byte order mark. If found, honor it then discard it
240 ACE_UTF16_T
*sb
= reinterpret_cast<ACE_UTF16_T
*> (buf
);
241 if (*sb
== ACE_UNICODE_BOM_CORRECT
|| *sb
== ACE_UNICODE_BOM_SWAPPED
)
243 must_swap
= (*sb
== ACE_UNICODE_BOM_SWAPPED
);
248 #if defined (ACE_LITTLE_ENDIAN)
250 #endif // ACE_LITTLE_ENDIAN
255 buf
+= ACE_UTF16_CODEPOINT_SIZE
;
262 for (size_t i
= 0; i
< length
; ++i
)
263 #if defined (ACE_DISABLE_SWAP_ON_READ)
264 x
[i
] = static_cast<ACE_CDR::WChar
> (sb
[i
]);
268 x
[i
] = static_cast<ACE_CDR::WChar
> (sb
[i
]);
273 ACE_CDR::swap_2 (&buf
[i
*2], reinterpret_cast<char *> (&sx
));
274 x
[i
] = static_cast<ACE_CDR::WChar
> (sx
);
276 #endif /* ACE_DISABLE_SWAP_ON_READ */
278 if (has_bom
&& !adjust_len
)
280 cdr
.adjust (ACE_UTF16_CODEPOINT_SIZE
, align
, buf
);
288 TAO_UTF16_BOM_Translator::read_wchar_array (ACE_InputCDR
& cdr
,
290 ACE_CDR::ULong length
)
295 if (static_cast<ACE_CDR::Short
> (this->major_version (cdr
)) == 1
296 && static_cast<ACE_CDR::Short
> (this->minor_version (cdr
)) > 1)
298 for (size_t i
= 0; i
< length
; ++i
)
299 if (!this->read_wchar (cdr
, x
[i
]))
305 return this->read_wchar_array_i (cdr
, x
, length
);
309 TAO_UTF16_BOM_Translator::write_wchar (ACE_OutputCDR
&cdr
,
312 return this->write_wchar_i (cdr
, x
, true);
316 TAO_UTF16_BOM_Translator::write_wchar_i (ACE_OutputCDR
&cdr
,
320 if (static_cast<ACE_CDR::Short
> (this->major_version (cdr
)) == 1
321 && static_cast<ACE_CDR::Short
> (this->minor_version (cdr
)) > 1)
324 ACE_CDR::UShort buffer
[2];
326 if( allow_BOM
&& cdr
.byte_order())
329 #if defined (ACE_LITTLE_ENDIAN)
332 // force both the byte order mark and the data to Big Endian order
333 buffer
[0] = ACE_UNICODE_BOM_SWAPPED
;
334 ACE_CDR::swap_2 (reinterpret_cast<const char *> (&x
),
335 reinterpret_cast<char *> (&buffer
[1]));
340 // store both the byte order mark and the data in native order
341 buffer
[0] = ACE_UNICODE_BOM_CORRECT
;
342 buffer
[1] = static_cast<ACE_CDR::Short
> (x
);
347 // not using a byte order mark
348 // force it to be big endian w/o BOM
350 if (cdr
.byte_order ())
351 ACE_CDR::swap_2 (reinterpret_cast<const char *> (&x
),
352 reinterpret_cast<char *> (buffer
));
354 buffer
[0] = static_cast<ACE_CDR::Short
> (x
);
357 unsigned char tcsize
=
358 static_cast<unsigned char> (len
* ACE_UTF16_CODEPOINT_SIZE
);
360 if (this->write_1 (cdr
, &tcsize
))
361 return this->write_array(cdr
, &buffer
, tcsize
, 1, 1);
365 else if (static_cast<ACE_CDR::Short
> (this->minor_version (cdr
)) != 0)
367 // GIOP 1.1 simple support
368 ACE_UTF16_T sx
= static_cast<ACE_UTF16_T
> (x
);
369 return this->write_2 (cdr
, &sx
);
372 { // wchar is not allowed with GIOP 1.0.
379 TAO_UTF16_BOM_Translator::write_wstring (ACE_OutputCDR
& cdr
,
381 const ACE_CDR::WChar
*x
)
383 // we'll accept a null pointer but only for an empty string
384 ACE_ASSERT ((x
!= 0 || len
== 0) &&
385 len
< (ACE_UINT32_MAX
- 1) / ACE_UL_UTF16_CODEPOINT_SIZE
);
386 if (static_cast<ACE_CDR::Short
> (this->major_version (cdr
)) == 1
387 && static_cast<ACE_CDR::Short
> (this->minor_version (cdr
)) > 1)
389 if (len
== 0) // for zero length strings, only write a length of
390 // zero. The BOM is not needed in this case.
391 return this->write_4(cdr
, &len
);
393 if (this->forceBE_
&& cdr
.byte_order())
395 ACE_CDR::ULong l
= (len
+1) * ACE_UL_UTF16_CODEPOINT_SIZE
;
396 if (this->write_4 (cdr
, &l
) &&
397 this->write_2 (cdr
, &ACE_UNICODE_BOM_SWAPPED
) &&
399 return this->write_swapped_wchar_array_i (cdr
, x
, len
);
403 ACE_CDR::ULong l
= (len
+1) * ACE_UL_UTF16_CODEPOINT_SIZE
;
404 if (this->write_4 (cdr
, &l
) &&
405 this->write_2 (cdr
, &ACE_UNICODE_BOM_CORRECT
) &&
407 return this->write_wchar_array_i (cdr
, x
, len
);
412 // pre GIOP 1.2: include null terminator in length
413 ACE_CDR::ULong l
= len
+ 1;
415 if (this->write_4 (cdr
, &l
))
419 return this->write_wchar_array_i (cdr
, x
, len
+ 1);
424 return this->write_2 (cdr
,&s
);
433 TAO_UTF16_BOM_Translator::write_wchar_array (ACE_OutputCDR
& cdr
,
434 const ACE_CDR::WChar
*x
,
435 ACE_CDR::ULong length
)
437 if (static_cast<ACE_CDR::Short
> (this->major_version (cdr
)) == 1
438 && static_cast<ACE_CDR::Short
> (this->minor_version (cdr
)) > 1)
440 for (size_t i
= 0; i
< length
; ++i
)
441 if (this->write_wchar_i (cdr
, x
[i
], false) == 0)
447 return this->write_wchar_array_i (cdr
, x
, length
);
451 TAO_UTF16_BOM_Translator::write_wchar_array_i (ACE_OutputCDR
& cdr
,
452 const ACE_CDR::WChar
*x
,
453 ACE_CDR::ULong length
)
458 static const size_t align
= ACE_CDR::SHORT_ALIGN
;
459 if (cdr
.adjust (ACE_UTF16_CODEPOINT_SIZE
* length
, align
, buf
)
465 ACE_UTF16_T
*sb
= reinterpret_cast<ACE_UTF16_T
*> (buf
);
467 for (size_t i
= 0; i
< length
; ++i
)
469 sb
[i
] = static_cast<ACE_UTF16_T
> (x
[i
]);
475 TAO_UTF16_BOM_Translator::write_swapped_wchar_array_i (ACE_OutputCDR
& cdr
,
476 const ACE_CDR::WChar
*x
,
477 ACE_CDR::ULong length
)
482 static const size_t align
= ACE_CDR::SHORT_ALIGN
;
483 if (cdr
.adjust (ACE_UTF16_CODEPOINT_SIZE
* length
, align
, buf
)
489 ACE_UTF16_T
*sb
= reinterpret_cast<ACE_UTF16_T
*> (buf
);
491 for (size_t i
= 0; i
< length
; ++i
)
493 ACE_CDR::swap_2 (reinterpret_cast<const char*> (&x
[i
]),
494 reinterpret_cast<char *> (&sb
[i
]));
499 TAO_END_VERSIONED_NAMESPACE_DECL