Merge pull request #2317 from jwillemsen/jwi-deleteop
[ACE_TAO.git] / TAO / tao / Codeset / UTF8_Latin1_Translator.cpp
blob4ae488e3df1c6ac93724718e99aceff9d221ac5e
1 // -*- C++ -*-
3 //=============================================================================
4 /**
5 * @file UTF8_Latin1_Translator.cpp
7 * Defines the methods required to convert UTF-8 based unicode strings
8 * to the Latin-1 codeset.
10 * @author Phil Mesnier <mesnier_p@ociweb.com>
12 //=============================================================================
14 #include "tao/Codeset/UTF8_Latin1_Translator.h"
15 #include "tao/debug.h"
16 #include "ace/OS_Memory.h"
18 TAO_BEGIN_VERSIONED_NAMESPACE_DECL
20 // = Documented in $ACE_ROOT/ace/CDR_Stream.h
21 ACE_CDR::Boolean
22 TAO_UTF8_Latin1_Translator::read_char (ACE_InputCDR &cdr, ACE_CDR::Char &x)
24 // We cannot have a codepoint > 0xBF at this point, since we are expecting
25 // only one single char.
26 ACE_CDR::Octet ox;
27 if (this->read_1 (cdr, &ox))
29 if (ox < 0xC0)
31 x = ox;
32 return true;
35 return false;
38 ACE_CDR::ULong
39 TAO_UTF8_Latin1_Translator::read_char_i (ACE_InputCDR &cdr, ACE_CDR::Char &x)
41 // This will read up to 2 octets and combine them into one char if possible
42 ACE_CDR::Octet upper;
43 if (this->read_1 (cdr, &upper))
45 if ( upper >= 0xC4) // Anything with a leading char > 110001xx converts
46 // to a codepoint value > 0x00FF, thus won't fit in
47 // a single char.
48 return 0;
49 if ( upper < 0xC0 )
51 x = static_cast<ACE_CDR::Char>(upper);
52 return 1;
54 ACE_CDR::Octet lower;
55 if (this->read_1 (cdr, &lower))
57 ACE_CDR::Octet final = ((upper & 0xBF) << 6) + (lower & 0xC0);
58 x = static_cast<ACE_CDR::Char>(final);
59 return 2;
62 return 0;
65 ACE_CDR::Boolean
66 TAO_UTF8_Latin1_Translator::read_string (ACE_InputCDR &cdr,
67 ACE_CDR::Char *&x)
69 ACE_CDR::ULong len;
70 if (!cdr.read_ulong (len))
71 return 0;
73 // A check for the length being too great is done later in the
74 // call to read_char_array but we want to have it done before
75 // the memory is allocated.
76 if (len > 0 && len <= cdr.length())
78 ACE_NEW_RETURN (x,
79 ACE_CDR::Char [len],
80 0);
81 // pos keeps track of the character position, it will never be
82 // greater than len
83 size_t pos = 0;
84 ACE_CDR::ULong incr = 1;
85 for (ACE_CDR::ULong i = 0; incr > 0 && i < len; i += incr)
87 incr = this->read_char_i(cdr,x[pos++]);
89 if (incr > 0)
90 return 1;
91 delete [] x;
93 else if (len == 0)
95 // Convert any null strings to empty strings since empty
96 // strings can cause crashes. (See bug 58.)
97 ACE_NEW_RETURN (x,
98 ACE_CDR::Char[1],
99 0);
100 x[0] = '\x00';
101 return 1;
103 x = 0;
104 return 0;
107 ACE_CDR::Boolean
108 TAO_UTF8_Latin1_Translator::read_string (ACE_InputCDR &cdr,
109 std::string &x)
111 ACE_CDR::ULong len;
112 if (!cdr.read_ulong (len))
113 return false;
115 // A check for the length being too great is done later in the
116 // call to read_char_array but we want to have it done before
117 // the memory is allocated.
118 if (len > 0 && len <= cdr.length())
120 // detract terminating '\0' from length
121 len--;
124 x.resize (len);
126 catch (const std::bad_alloc&)
128 return false;
131 // pos keeps track of the character position, it will never be
132 // greater than len
133 size_t pos = 0;
134 ACE_CDR::ULong incr = 1;
135 for (ACE_CDR::ULong i = 0; incr > 0 && i < len; i += incr)
137 incr = this->read_char_i(cdr,x[pos++]);
139 if (incr > 0)
141 // read terminating '\0' from stream
142 ACE_CDR::Char c;
143 incr = this->read_char_i(cdr, c);
144 return (incr > 0);
148 x.clear ();
149 return false;
152 ACE_CDR::Boolean
153 TAO_UTF8_Latin1_Translator::read_char_array (ACE_InputCDR & cdr,
154 ACE_CDR::Char *x,
155 ACE_CDR::ULong length)
157 if (length == 0)
158 return 1;
160 for (size_t i = 0; i < length; ++i)
161 if (!this->read_char(cdr,x[i]))
162 return 0;
164 return 1;
167 ACE_CDR::Boolean
168 TAO_UTF8_Latin1_Translator::write_char (ACE_OutputCDR &cdr,
169 ACE_CDR::Char x)
171 ACE_CDR::Octet ox = x;
172 if (ox < 0xC0)
173 return this->write_1 (cdr,&ox);
174 else
175 { // character cannot be represented in a single octet
176 errno = EINVAL;
177 return 0;
181 ACE_CDR::Boolean
182 TAO_UTF8_Latin1_Translator::write_char_i (ACE_OutputCDR &cdr,
183 ACE_CDR::Char x)
185 // @@@ Strictly speaking, we should test for 7F < x < C0 and do
186 // something else in that case, but for now we will just let it
187 // pass.
189 ACE_CDR::Octet ox = x;
190 if (ox < 0xC0)
191 return this->write_1 (cdr,&ox);
192 else
193 { // character cannot be represented in a single octet
194 // Since the source will never be > 0xFF, we don't have to worry about
195 // using a third octet.
196 ACE_CDR::Octet upper = 0xC0 + (ox >> 6);
197 ACE_CDR::Octet lower = 0x80 + (ox & 0x3F);
198 if (this->write_1(cdr, &upper))
199 return this->write_1(cdr, &lower);
201 return 0;
204 ACE_CDR::Boolean
205 TAO_UTF8_Latin1_Translator::write_string (ACE_OutputCDR & cdr,
206 ACE_CDR::ULong len,
207 const ACE_CDR::Char *x)
209 // we'll accept a null pointer but only for an empty string
210 if (x == 0 && len != 0)
211 return 0;
213 ACE_CDR::ULong l = len;
214 // Compute the real buffer size by adding in multi-byte codepoints.
215 for (ACE_CDR::ULong i = 0; i < len; i++)
216 if (static_cast<ACE_CDR::Octet>(x[i]) > 0xbf) l++;
218 // Always add one for the nul
219 l++;
220 if (cdr.write_ulong (l))
222 for (ACE_CDR::ULong i = 0; i < len; ++i)
224 if (this->write_char_i (cdr,x[i]) == 0)
225 return 0;
227 ACE_CDR::Octet s = 0;
228 return this->write_1 (cdr, &s);
230 return 0;
233 ACE_CDR::Boolean
234 TAO_UTF8_Latin1_Translator::write_char_array (ACE_OutputCDR & cdr,
235 const ACE_CDR::Char *x,
236 ACE_CDR::ULong length)
238 if (length == 0)
239 return true;
241 for (size_t i = 0; i < length; ++i)
242 // We still have to write each char individually, as any translated
243 // value may fail to fit in a single octet.
244 if (this->write_char (cdr, x[i]) == 0)
245 return false;
247 return true;
250 TAO_END_VERSIONED_NAMESPACE_DECL