3 //=============================================================================
5 * @file UTF8_Latin1_Translator.cpp
7 * Defines the methods required to convert UTF-8 based unicode strings
8 * to the Latin-1 codeset.
10 * @author Phil Mesnier <mesnier_p@ociweb.com>
12 //=============================================================================
14 #include "tao/Codeset/UTF8_Latin1_Translator.h"
15 #include "tao/debug.h"
16 #include "ace/OS_Memory.h"
18 TAO_BEGIN_VERSIONED_NAMESPACE_DECL
20 // = Documented in $ACE_ROOT/ace/CDR_Stream.h
22 TAO_UTF8_Latin1_Translator::read_char (ACE_InputCDR
&cdr
, ACE_CDR::Char
&x
)
24 // We cannot have a codepoint > 0xBF at this point, since we are expecting
25 // only one single char.
27 if (this->read_1 (cdr
, &ox
))
39 TAO_UTF8_Latin1_Translator::read_char_i (ACE_InputCDR
&cdr
, ACE_CDR::Char
&x
)
41 // This will read up to 2 octets and combine them into one char if possible
43 if (this->read_1 (cdr
, &upper
))
45 if ( upper
>= 0xC4) // Anything with a leading char > 110001xx converts
46 // to a codepoint value > 0x00FF, thus won't fit in
51 x
= static_cast<ACE_CDR::Char
>(upper
);
55 if (this->read_1 (cdr
, &lower
))
57 ACE_CDR::Octet final
= ((upper
& 0xBF) << 6) + (lower
& 0xC0);
58 x
= static_cast<ACE_CDR::Char
>(final
);
66 TAO_UTF8_Latin1_Translator::read_string (ACE_InputCDR
&cdr
,
70 if (!cdr
.read_ulong (len
))
73 // A check for the length being too great is done later in the
74 // call to read_char_array but we want to have it done before
75 // the memory is allocated.
76 if (len
> 0 && len
<= cdr
.length())
81 // pos keeps track of the character position, it will never be
84 ACE_CDR::ULong incr
= 1;
85 for (ACE_CDR::ULong i
= 0; incr
> 0 && i
< len
; i
+= incr
)
87 incr
= this->read_char_i(cdr
,x
[pos
++]);
95 // Convert any null strings to empty strings since empty
96 // strings can cause crashes. (See bug 58.)
108 TAO_UTF8_Latin1_Translator::read_string (ACE_InputCDR
&cdr
,
112 if (!cdr
.read_ulong (len
))
115 // A check for the length being too great is done later in the
116 // call to read_char_array but we want to have it done before
117 // the memory is allocated.
118 if (len
> 0 && len
<= cdr
.length())
120 // detract terminating '\0' from length
126 catch (const std::bad_alloc
&)
131 // pos keeps track of the character position, it will never be
134 ACE_CDR::ULong incr
= 1;
135 for (ACE_CDR::ULong i
= 0; incr
> 0 && i
< len
; i
+= incr
)
137 incr
= this->read_char_i(cdr
,x
[pos
++]);
141 // read terminating '\0' from stream
143 incr
= this->read_char_i(cdr
, c
);
153 TAO_UTF8_Latin1_Translator::read_char_array (ACE_InputCDR
& cdr
,
155 ACE_CDR::ULong length
)
160 for (size_t i
= 0; i
< length
; ++i
)
161 if (!this->read_char(cdr
,x
[i
]))
168 TAO_UTF8_Latin1_Translator::write_char (ACE_OutputCDR
&cdr
,
171 ACE_CDR::Octet ox
= x
;
173 return this->write_1 (cdr
,&ox
);
175 { // character cannot be represented in a single octet
182 TAO_UTF8_Latin1_Translator::write_char_i (ACE_OutputCDR
&cdr
,
185 // @@@ Strictly speaking, we should test for 7F < x < C0 and do
186 // something else in that case, but for now we will just let it
189 ACE_CDR::Octet ox
= x
;
191 return this->write_1 (cdr
,&ox
);
193 { // character cannot be represented in a single octet
194 // Since the source will never be > 0xFF, we don't have to worry about
195 // using a third octet.
196 ACE_CDR::Octet upper
= 0xC0 + (ox
>> 6);
197 ACE_CDR::Octet lower
= 0x80 + (ox
& 0x3F);
198 if (this->write_1(cdr
, &upper
))
199 return this->write_1(cdr
, &lower
);
205 TAO_UTF8_Latin1_Translator::write_string (ACE_OutputCDR
& cdr
,
207 const ACE_CDR::Char
*x
)
209 // we'll accept a null pointer but only for an empty string
210 if (x
== 0 && len
!= 0)
213 ACE_CDR::ULong l
= len
;
214 // Compute the real buffer size by adding in multi-byte codepoints.
215 for (ACE_CDR::ULong i
= 0; i
< len
; i
++)
216 if (static_cast<ACE_CDR::Octet
>(x
[i
]) > 0xbf) l
++;
218 // Always add one for the nul
220 if (cdr
.write_ulong (l
))
222 for (ACE_CDR::ULong i
= 0; i
< len
; ++i
)
224 if (this->write_char_i (cdr
,x
[i
]) == 0)
227 ACE_CDR::Octet s
= 0;
228 return this->write_1 (cdr
, &s
);
234 TAO_UTF8_Latin1_Translator::write_char_array (ACE_OutputCDR
& cdr
,
235 const ACE_CDR::Char
*x
,
236 ACE_CDR::ULong length
)
241 for (size_t i
= 0; i
< length
; ++i
)
242 // We still have to write each char individually, as any translated
243 // value may fail to fit in a single octet.
244 if (this->write_char (cdr
, x
[i
]) == 0)
250 TAO_END_VERSIONED_NAMESPACE_DECL